quyuan 1 år sedan
förälder
incheckning
2e48d62c00

+ 0 - 39
.github/workflows/benchmark.yml

@@ -1,39 +0,0 @@
-# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
-# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
-
-name: mineru
-on:
-  push:
-    branches:
-      - "master"
-    paths-ignore:
-      - "cmds/**"
-      - "**.md"
-  pull_request:
-    branches:
-      - "master"
-    paths-ignore:
-      - "cmds/**"
-      - "**.md"
-  workflow_dispatch:
-jobs:
-  pdf-test:
-    runs-on: ubuntu-latest
-    timeout-minutes: 180
-    strategy:
-      fail-fast: true
-
-    steps:
-    - name: PDF benchmark
-      uses: actions/checkout@v3
-      with:
-        fetch-depth: 2
-    - name: check-requirements
-      run: |
-        pip install -r requirements-qa.txt
-        pip install magic-pdf
-    - name: get-benchmark-result
-      run: |
-        echo "start test"
-        cd $GITHUB_WORKSPACE &&  pytest -s -v tests/test_cli/test_bench.py
-  

+ 4 - 0
.github/workflows/cli.yml

@@ -33,6 +33,7 @@ jobs:
       run: |
         pip install -r requirements.txt
         pip install -r requirements-qa.txt
+        pip install magic-pdf
     - name: test_cli
       run: |
         cp magic-pdf.template.json ~/magic-pdf.json
@@ -40,3 +41,6 @@ jobs:
         cd $GITHUB_WORKSPACE && export PYTHONPATH=. && pytest -s -v tests/test_unit.py
         cd $GITHUB_WORKSPACE &&  pytest -s -v tests/test_cli/test_cli.py
                                                                                                                             
+    - name: benchmark
+      run: |
+        cd $GITHUB_WORKSPACE &&  pytest -s -v tests/test_cli/test_bench.py

+ 3 - 1
tests/test_cli/lib/common.py

@@ -5,7 +5,9 @@ def check_shell(cmd):
     assert res == 0
 
 def count_folders_and_check_contents(file_path):
-    # 获取目录下的所有文件和文件夹
+    """"
+    获取文件夹大小
+    """
     if os.path.exists(file_path):
         folder_count = os.path.getsize(file_path)
         assert folder_count > 0

+ 1 - 22
tests/test_cli/test_bench.py

@@ -30,9 +30,7 @@ class TestBench():
             last_editdistance = last_score["average_edit_distance"]
             last_bleu = last_score["average_bleu_score"]
         except IOError:
-            print ("result.json not exist")
-        test_cli()
-    
+            print ("result.json not exist")    
         os.system(f"python lib/pre_clean.py --tool_name mineru --download_dir {pdf_dev_path}")
         now_score = get_score()
         print ("now_score:", now_score)
@@ -54,23 +52,4 @@ def get_score():
     res = score.summary_scores()
     return res
 
-def test_cli():
-    """
-    test pdf-command cli
-    """
-    rm_cmd = f"rm -rf {pdf_res_path}"
-    os.system(rm_cmd)
-    os.makedirs(pdf_res_path)
-    cmd = f'magic-pdf pdf-command --pdf {os.path.join(pdf_dev_path, "mineru")}'
-    os.system(cmd)
-    for root, dirs, files in os.walk(pdf_res_path):
-         for magic_file in files:
-            target_dir = os.path.join(pdf_dev_path, "mineru")
-            if magic_file.endswith(".md"):
-                source_file = os.path.join(root, magic_file)
-                target_file = os.path.join(pdf_dev_path, "mineru", magic_file)
-                if not os.path.exists(target_dir):
-                    os.makedirs(target_dir) 
-                shutil.copy(source_file, target_file)
-