Răsfoiți Sursa

feat: add magic-pdf-dev case

quyuan 1 an în urmă
părinte
comite
01420850e1

+ 1 - 1
.github/workflows/cli.yml

@@ -43,7 +43,7 @@ jobs:
         cd $GITHUB_WORKSPACE && python tests/get_coverage.py
     - name: cli test
       run: |
-        cd $GITHUB_WORKSPACE &&  pytest -s -v tests/test_cli/test_cli_sdk.py
+        source ~/.bashrc && cd $GITHUB_WORKSPACE &&  pytest -s -v tests/test_cli/test_cli.py
 
   notify_to_feishu:
     if: ${{ always() && !cancelled() && contains(needs.*.result, 'failure') && (github.ref_name == 'master') }}

+ 0 - 0
tests/clean_covrage.py → tests/clean_coverage.py


+ 2 - 1
tests/test_cli/conf/conf.py

@@ -3,5 +3,6 @@ conf = {
 "code_path": os.environ.get('GITHUB_WORKSPACE'),
 "pdf_dev_path" : os.environ.get('GITHUB_WORKSPACE') + "/tests/test_cli/pdf_dev",
 "pdf_res_path": "/tmp/magic-pdf",
-"jsonl_path": "s3://llm-qatest-pnorm/mineru/test/line1.jsonl"
+"jsonl_path": "s3://llm-qatest-pnorm/mineru/test/line1.jsonl",
+"s3_pdf_path": "s3://llm-qatest-pnorm/mineru/test/test.pdf"
 }

+ 19 - 1
tests/test_cli/test_cli_sdk.py

@@ -9,7 +9,7 @@ from lib import common
 import magic_pdf.model as model_config
 from magic_pdf.pipe.UNIPipe import UNIPipe
 from magic_pdf.rw.DiskReaderWriter import DiskReaderWriter
-
+from magic_pdf.rw.S3ReaderWriter import S3ReaderWriter
 model_config.__use_inside_model__ = True
 pdf_res_path = conf.conf['pdf_res_path']
 code_path = conf.conf['code_path']
@@ -248,6 +248,24 @@ class TestCli:
         os.system(cmd)
 
 
+    @pytest.mark.P1
+    def test_s3_sdk_suto(self):
+        pdf_ak = os.environ.get('pdf_ak', "")
+        pdf_sk = os.environ.get('pdf_sk', "")
+        pdf_bucket = os.environ.get('bucket', "")
+        pdf_endpoint = os.environ.get('pdf_endpoint', "")
+        s3_pdf_path = conf.conf["s3_pdf_path"]
+        image_dir = "s3://" + pdf_bucket + "/mineru/test/test.md"
+        s3pdf_cli = S3ReaderWriter(pdf_ak, pdf_sk, pdf_endpoint)
+        s3image_cli = S3ReaderWriter(pdf_ak, pdf_sk, pdf_endpoint, parent_path=image_dir)
+        pdf_bytes = s3pdf_cli.read(s3_pdf_path, mode=s3pdf_cli.MODE_BIN)
+        jso_useful_key = {"_pdf_type": "", "model_list": []}
+        pipe = UNIPipe(pdf_bytes, jso_useful_key, s3image_cli)
+        pipe.pipe_classify()
+        pipe.pipe_analyze()
+        pipe.pipe_parse()
+        md_content = pipe.pipe_mk_markdown(image_dir, drop_mode="none")
+        assert len(md_content) > 0
 
 
 if __name__ == '__main__':