|
@@ -182,7 +182,7 @@ class TestCli:
|
|
|
def test_pdf_dev_cli_local_jsonl_txt(self):
|
|
def test_pdf_dev_cli_local_jsonl_txt(self):
|
|
|
"""magic_pdf_dev cli local txt."""
|
|
"""magic_pdf_dev cli local txt."""
|
|
|
jsonl_path = os.path.join(pdf_dev_path, 'line1.jsonl')
|
|
jsonl_path = os.path.join(pdf_dev_path, 'line1.jsonl')
|
|
|
- cmd = 'magic-pdf-dev --jsonl %s -m %s' % (jsonl_path, "txt")
|
|
|
|
|
|
|
+ cmd = 'magic-pdf-dev --jsonl %s --method %s' % (jsonl_path, "txt")
|
|
|
logging.info(cmd)
|
|
logging.info(cmd)
|
|
|
os.system(cmd)
|
|
os.system(cmd)
|
|
|
|
|
|
|
@@ -191,7 +191,7 @@ class TestCli:
|
|
|
def test_pdf_dev_cli_local_jsonl_ocr(self):
|
|
def test_pdf_dev_cli_local_jsonl_ocr(self):
|
|
|
"""magic_pdf_dev cli local ocr."""
|
|
"""magic_pdf_dev cli local ocr."""
|
|
|
jsonl_path = os.path.join(pdf_dev_path, 'line1.jsonl')
|
|
jsonl_path = os.path.join(pdf_dev_path, 'line1.jsonl')
|
|
|
- cmd = 'magic-pdf-dev --jsonl %s -m %s' % (jsonl_path, 'ocr')
|
|
|
|
|
|
|
+ cmd = 'magic-pdf-dev --jsonl %s --method %s' % (jsonl_path, 'ocr')
|
|
|
logging.info(cmd)
|
|
logging.info(cmd)
|
|
|
os.system(cmd)
|
|
os.system(cmd)
|
|
|
|
|
|
|
@@ -199,7 +199,7 @@ class TestCli:
|
|
|
def test_pdf_dev_cli_local_jsonl_auto(self):
|
|
def test_pdf_dev_cli_local_jsonl_auto(self):
|
|
|
"""magic_pdf_dev cli local auto."""
|
|
"""magic_pdf_dev cli local auto."""
|
|
|
jsonl_path = os.path.join(pdf_dev_path, 'line1.jsonl')
|
|
jsonl_path = os.path.join(pdf_dev_path, 'line1.jsonl')
|
|
|
- cmd = 'magic-pdf-dev --jsonl %s -m %s' % (jsonl_path, 'auto')
|
|
|
|
|
|
|
+ cmd = 'magic-pdf-dev --jsonl %s --method %s' % (jsonl_path, 'auto')
|
|
|
logging.info(cmd)
|
|
logging.info(cmd)
|
|
|
os.system(cmd)
|
|
os.system(cmd)
|
|
|
|
|
|
|
@@ -207,7 +207,7 @@ class TestCli:
|
|
|
def test_pdf_dev_cli_s3_jsonl_txt(self):
|
|
def test_pdf_dev_cli_s3_jsonl_txt(self):
|
|
|
"""magic_pdf_dev cli s3 txt."""
|
|
"""magic_pdf_dev cli s3 txt."""
|
|
|
jsonl_path = os.path.join(pdf_dev_path, 'line1.jsonl')
|
|
jsonl_path = os.path.join(pdf_dev_path, 'line1.jsonl')
|
|
|
- cmd = 'magic-pdf-dev --jsonl %s -m %s' % (jsonl_path, "txt")
|
|
|
|
|
|
|
+ cmd = 'magic-pdf-dev --jsonl %s --method %s' % (jsonl_path, "txt")
|
|
|
logging.info(cmd)
|
|
logging.info(cmd)
|
|
|
os.system(cmd)
|
|
os.system(cmd)
|
|
|
|
|
|
|
@@ -216,7 +216,7 @@ class TestCli:
|
|
|
def test_pdf_dev_cli_s3_jsonl_ocr(self):
|
|
def test_pdf_dev_cli_s3_jsonl_ocr(self):
|
|
|
"""magic_pdf_dev cli s3 ocr."""
|
|
"""magic_pdf_dev cli s3 ocr."""
|
|
|
jsonl_path = os.path.join(pdf_dev_path, 'line1.jsonl')
|
|
jsonl_path = os.path.join(pdf_dev_path, 'line1.jsonl')
|
|
|
- cmd = 'magic-pdf-dev --jsonl %s -m %s' % (jsonl_path, 'ocr')
|
|
|
|
|
|
|
+ cmd = 'magic-pdf-dev --jsonl %s --method %s' % (jsonl_path, 'ocr')
|
|
|
logging.info(cmd)
|
|
logging.info(cmd)
|
|
|
os.system(cmd)
|
|
os.system(cmd)
|
|
|
|
|
|
|
@@ -229,5 +229,26 @@ class TestCli:
|
|
|
os.system(cmd)
|
|
os.system(cmd)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
+ @pytest.mark.P1
|
|
|
|
|
+ def test_pdf_dev_cli_pdf_json_auto(self):
|
|
|
|
|
+ """magic_pdf_dev cli pdf+json auto."""
|
|
|
|
|
+ json_path = os.path.join(pdf_dev_path, 'test_model.json')
|
|
|
|
|
+ pdf_path = os.path.join(pdf_dev_path, 'pdf', 'research_report_1f978cd81fb7260c8f7644039ec2c054.pdf')
|
|
|
|
|
+ cmd = 'magic-pdf-dev --pdf %s --json %s --method %s' % (pdf_path, json_path, 'auto')
|
|
|
|
|
+ logging.info(cmd)
|
|
|
|
|
+ os.system(cmd)
|
|
|
|
|
+
|
|
|
|
|
+ @pytest.mark.P1
|
|
|
|
|
+ def test_pdf_dev_cli_pdf_json_ocr(self):
|
|
|
|
|
+ """magic_pdf_dev cli pdf+json ocr."""
|
|
|
|
|
+ json_path = os.path.join(pdf_dev_path, 'test_model.json')
|
|
|
|
|
+ pdf_path = os.path.join(pdf_dev_path, 'pdf', 'research_report_1f978cd81fb7260c8f7644039ec2c054.pdf')
|
|
|
|
|
+ cmd = 'magic-pdf-dev --pdf %s --json %s --method %s' % (pdf_path, json_path, 'auto')
|
|
|
|
|
+ logging.info(cmd)
|
|
|
|
|
+ os.system(cmd)
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
if __name__ == '__main__':
|
|
if __name__ == '__main__':
|
|
|
pytest.main()
|
|
pytest.main()
|