Browse Source

feat: add test case (#645)

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

---------

Co-authored-by: quyuan <quyuan@pjlab.org>
yyy 1 year ago
parent
commit
0aa4577874

+ 9 - 12
.github/workflows/cli.yml

@@ -10,7 +10,6 @@ on:
     paths-ignore:
       - "cmds/**"
       - "**.md"
-      - "**.yml"
   pull_request:
     branches:
       - "master"
@@ -18,12 +17,11 @@ on:
     paths-ignore:
       - "cmds/**"
       - "**.md"
-      - "**.yml"
   workflow_dispatch:
 jobs:
   cli-test:
     runs-on: pdf
-    timeout-minutes: 120
+    timeout-minutes: 240
     strategy:
       fail-fast: true
 
@@ -33,17 +31,16 @@ jobs:
       with:
         fetch-depth: 2
 
-    - name: install
+    - name: install&test
       run: |
-        echo $GITHUB_WORKSPACE && sh tests/retry_env.sh
-    - name: unit test
-      run: | 
-        cd $GITHUB_WORKSPACE && python tests/clean_coverage.py
-        cd $GITHUB_WORKSPACE && export PYTHONPATH=. && coverage run -m  pytest  tests/unittest --cov=magic_pdf/ --cov-report term-missing --cov-report html
+        source activate mineru
+        conda env list
+        pip show coverage
+        # cd $GITHUB_WORKSPACE && sh tests/retry_env.sh
+        cd $GITHUB_WORKSPACE && python tests/clean_coverage.py      
+        cd $GITHUB_WORKSPACE && coverage run -m pytest tests/unittest/ --cov=magic_pdf/  --cov-report html --cov-report term-missing
         cd $GITHUB_WORKSPACE && python tests/get_coverage.py
-    - name: cli test
-      run: |
-        source ~/.bashrc && cd $GITHUB_WORKSPACE &&  pytest -s -v tests/test_cli/test_cli.py
+        cd $GITHUB_WORKSPACE && pytest -m P0 -s -v tests/test_cli/test_cli_sdk.py
 
   notify_to_feishu:
     if: ${{ always() && !cancelled() && contains(needs.*.result, 'failure') && (github.ref_name == 'master') }}

+ 55 - 0
.github/workflows/daily.yml

@@ -0,0 +1,55 @@
+# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
+# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
+
+name: mineru
+on:
+  schedule:
+    - cron: '0 22 * * *'  # 每天晚上 10 点执行
+jobs:
+  cli-test:
+    runs-on: pdf
+    timeout-minutes: 240
+    strategy:
+      fail-fast: true
+
+    steps:
+    - name: PDF cli
+      uses: actions/checkout@v3
+      with:
+        fetch-depth: 2
+
+    - name: install&test
+      run: |
+        source activate mineru
+        conda env list
+        pip show coverage
+        # cd $GITHUB_WORKSPACE && sh tests/retry_env.sh
+        cd $GITHUB_WORKSPACE && python tests/clean_coverage.py      
+        cd $GITHUB_WORKSPACE && coverage run -m pytest tests/unittest/ --cov=magic_pdf/  --cov-report html --cov-report term-missing
+        cd $GITHUB_WORKSPACE && python tests/get_coverage.py
+        cd $GITHUB_WORKSPACE && pytest -s -v tests/test_cli/test_cli_sdk.py
+
+  notify_to_feishu:
+    if: ${{ always() && !cancelled() && contains(needs.*.result, 'failure') && (github.ref_name == 'master') }}
+    needs: cli-test
+    runs-on: pdf
+    steps:
+    - name: get_actor
+      run: |
+          metion_list="dt-yy"
+          echo $GITHUB_ACTOR
+          if [[ $GITHUB_ACTOR == "drunkpig" ]]; then
+            metion_list="xuchao"
+          elif [[ $GITHUB_ACTOR == "myhloli" ]]; then
+            metion_list="zhaoxiaomeng"
+          elif [[ $GITHUB_ACTOR == "icecraft" ]]; then
+            metion_list="xurui1"
+          fi
+          echo $metion_list
+          echo "METIONS=$metion_list" >> "$GITHUB_ENV"
+          echo ${{ env.METIONS }}
+
+    - name: notify
+      run: |
+        echo ${{ secrets.USER_ID }}
+        curl -X POST -H "Content-Type: application/json" -d '{"msg_type":"post","content":{"post":{"zh_cn":{"title":"'${{ github.repository }}' GitHubAction Failed","content":[[{"tag":"text","text":""},{"tag":"a","text":"Please click here for details ","href":"https://github.com/'${{ github.repository }}'/actions/runs/'${GITHUB_RUN_ID}'"},{"tag":"at","user_id":"'${{ secrets.USER_ID }}'"}]]}}}}'  ${{ secrets.WEBHOOK_URL }}

+ 61 - 0
.github/workflows/huigui.yml

@@ -0,0 +1,61 @@
+# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
+# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
+
+name: mineru
+on:
+  push:
+    branches:
+      - "master"
+      - "dev"
+    paths-ignore:
+      - "cmds/**"
+      - "**.md"
+  workflow_dispatch:
+jobs:
+  cli-test:
+    runs-on: pdf
+    timeout-minutes: 240
+    strategy:
+      fail-fast: true
+
+    steps:
+    - name: PDF cli
+      uses: actions/checkout@v3
+      with:
+        fetch-depth: 2
+
+    - name: install&test
+      run: |
+        source activate mineru
+        conda env list
+        pip show coverage
+        # cd $GITHUB_WORKSPACE && sh tests/retry_env.sh
+        cd $GITHUB_WORKSPACE && python tests/clean_coverage.py      
+        cd $GITHUB_WORKSPACE && coverage run -m pytest tests/unittest/ --cov=magic_pdf/  --cov-report html --cov-report term-missing
+        cd $GITHUB_WORKSPACE && python tests/get_coverage.py
+        cd $GITHUB_WORKSPACE && pytest -s -v tests/test_cli/test_cli_sdk.py
+
+  notify_to_feishu:
+    if: ${{ always() && !cancelled() && contains(needs.*.result, 'failure') && (github.ref_name == 'master') }}
+    needs: cli-test
+    runs-on: pdf
+    steps:
+    - name: get_actor
+      run: |
+          metion_list="dt-yy"
+          echo $GITHUB_ACTOR
+          if [[ $GITHUB_ACTOR == "drunkpig" ]]; then
+            metion_list="xuchao"
+          elif [[ $GITHUB_ACTOR == "myhloli" ]]; then
+            metion_list="zhaoxiaomeng"
+          elif [[ $GITHUB_ACTOR == "icecraft" ]]; then
+            metion_list="xurui1"
+          fi
+          echo $metion_list
+          echo "METIONS=$metion_list" >> "$GITHUB_ENV"
+          echo ${{ env.METIONS }}
+
+    - name: notify
+      run: |
+        echo ${{ secrets.USER_ID }}
+        curl -X POST -H "Content-Type: application/json" -d '{"msg_type":"post","content":{"post":{"zh_cn":{"title":"'${{ github.repository }}' GitHubAction Failed","content":[[{"tag":"text","text":""},{"tag":"a","text":"Please click here for details ","href":"https://github.com/'${{ github.repository }}'/actions/runs/'${GITHUB_RUN_ID}'"},{"tag":"at","user_id":"'${{ secrets.USER_ID }}'"}]]}}}}'  ${{ secrets.WEBHOOK_URL }}

+ 0 - 22
.github/workflows/update_base.yml

@@ -1,22 +0,0 @@
-# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
-# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
-
-name: update-base
-on:
-  push:
-    tags:
-      - '*released'
-  workflow_dispatch:
-jobs:
-  pdf-test:
-    runs-on: pdf
-    timeout-minutes: 40
-
-
-    steps:
-    - name: update-base
-      uses: actions/checkout@v3
-    - name: start-update
-      run: |
-        echo "start test"
-  

+ 2 - 1
.gitignore

@@ -1,5 +1,6 @@
 *.tar
 *.tar.gz
+*.zip
 venv*/
 envs/
 slurm_logs/
@@ -31,7 +32,7 @@ tmp
 .vscode
 .vscode/
 ocr_demo
-
+.coveragerc
 /app/common/__init__.py
 /magic_pdf/config/__init__.py
 source.dev.env

+ 2 - 1
requirements-qa.txt

@@ -16,4 +16,5 @@ pypandoc
 pyopenssl==24.0.0
 struct-eqtable==0.1.0
 pytest-cov
-beautifulsoup4
+beautifulsoup4
+coverage

+ 2 - 1
tests/clean_coverage.py

@@ -21,4 +21,5 @@ def delete_file(path):
             print(f"Error deleting directory '{path}': {e}")
 
 if __name__ == "__main__":
-    delete_file("htmlcov")
+    delete_file("htmlcov/")
+    #delete_file(".coverage")

+ 4 - 7
tests/retry_env.sh

@@ -1,16 +1,13 @@
 #!/bin/bash
 
-# 定义最大重试次数
 max_retries=5
 retry_count=0
 
 while true; do
     # prepare env
-    source activate MinerU
-    pip install -r requirements-qa.txt
-    pip uninstall magic-pdf
-    pip install -U magic-pdf[full] --extra-index-url https://wheels.myhloli.com
-    pip install paddlepaddle-gpu==3.0.0b1 -i https://www.paddlepaddle.org.cn/packages/stable/cu118/
+    #python -m pip install -r requirements-qa.txt
+    python -m pip install -U magic-pdf[full] --extra-index-url https://wheels.myhloli.com -i https://pypi.tuna.tsinghua.edu.cn/simple
+    python -m pip install paddlepaddle-gpu==3.0.0b1 -i https://www.paddlepaddle.org.cn/packages/stable/cu118/
     exit_code=$?
     if [ $exit_code -eq 0 ]; then
         echo "test.sh 成功执行!"
@@ -22,6 +19,6 @@ while true; do
             exit 1
         fi
         echo "test.sh 执行失败 (退出码: $exit_code)。尝试第 $retry_count 次重试..."
-        sleep 5  # 等待 5 秒后重试
+        sleep 5
     fi
 done

+ 1 - 1
tests/test_cli/conf/conf.py

@@ -4,5 +4,5 @@ conf = {
 "pdf_dev_path" : os.environ.get('GITHUB_WORKSPACE') + "/tests/test_cli/pdf_dev",
 "pdf_res_path": "/tmp/magic-pdf",
 "jsonl_path": "s3://llm-qatest-pnorm/mineru/test/line1.jsonl",
-"s3_pdf_path": "s3://llm-qatest-pnorm/mineru/test/test.pdf"
+"s3_pdf_path": "s3://llm-qatest-pnorm/mineru/test/test_rearch_report.pdf"
 }

+ 17 - 0
tests/test_cli/conftest.py

@@ -0,0 +1,17 @@
+import pytest
+import torch
+
+def clear_gpu_memory():
+    '''
+    clear GPU memory
+    '''
+    torch.cuda.empty_cache()
+    print("GPU memory cleared.")
+
+@pytest.hookimpl(tryfirst=True, hookwrapper=True)
+def pytest_runtest_teardown(item, nextitem):
+    '''
+    clear GPU memory after each test
+    '''
+    yield
+    clear_gpu_memory()

+ 39 - 3
tests/test_cli/lib/common.py

@@ -1,13 +1,20 @@
 """common definitions."""
 import os
 import shutil
-
-
+import re
+import json
 def check_shell(cmd):
     """shell successful."""
     res = os.system(cmd)
     assert res == 0
 
+def update_config_file(file_path, key, value):
+    """update config file."""
+    with open(file_path, 'r', encoding="utf-8") as f:
+        config  = json.loads(f.read())
+    config[key] = value
+    with open(file_path, 'w', encoding="utf-8") as f:
+        f.write(json.dumps(config))
 
 def cli_count_folders_and_check_contents(file_path):
     """" count cli files."""
@@ -40,4 +47,33 @@ def delete_file(path):
             shutil.rmtree(path)
             print(f"Directory '{path}' and its contents deleted.")
         except TypeError as e:
-            print(f"Error deleting directory '{path}': {e}")
+            print(f"Error deleting directory '{path}': {e}")
+
+def check_latex_table_exists(file_path):
+    """check latex table exists."""
+    pattern = r'\\begin\{tabular\}.*?\\end\{tabular\}'
+    with open(file_path, 'r', encoding='utf-8') as file:
+        content = file.read()
+    matches = re.findall(pattern, content, re.DOTALL)
+    return len(matches) > 0
+
+def check_html_table_exists(file_path):
+    """check html table exists."""
+    pattern = r'<table.*?>.*?</table>'
+    with open(file_path, 'r', encoding='utf-8') as file:
+        content = file.read()
+    matches = re.findall(pattern, content, re.DOTALL)
+    return len(matches) > 0
+
+def check_close_tables(file_path):
+    """delete no tables."""
+    latex_pattern = r'\\begin\{tabular\}.*?\\end\{tabular\}'
+    html_pattern = r'<table.*?>.*?</table>'
+    with open(file_path, 'r', encoding='utf-8') as file:
+        content = file.read()
+    latex_matches = re.findall(latex_pattern, content, re.DOTALL)
+    html_matches = re.findall(html_pattern, content, re.DOTALL)
+    if len(latex_matches) == 0 and len(html_matches) == 0:
+        return True
+    else:
+        return False

+ 0 - 0
tests/test_cli/pdf_dev/pdf/research_report_1f978cd81fb7260c8f7644039ec2c054.pdf → tests/test_cli/pdf_dev/pdf/test_rearch_report.pdf


+ 95 - 21
tests/test_cli/test_cli_sdk.py

@@ -1,11 +1,10 @@
 """test cli and sdk."""
 import logging
 import os
-
 import pytest
 from conf import conf
 from lib import common
-
+import time
 import magic_pdf.model as model_config
 from magic_pdf.pipe.UNIPipe import UNIPipe
 from magic_pdf.rw.DiskReaderWriter import DiskReaderWriter
@@ -57,6 +56,7 @@ class TestCli:
     @pytest.mark.P0
     def test_pdf_ocr_sdk(self):
         """pdf sdk ocr test."""
+        time.sleep(2)
         demo_names = list()
         pdf_path = os.path.join(pdf_dev_path, 'pdf')
         for pdf_file in os.listdir(pdf_path):
@@ -88,10 +88,11 @@ class TestCli:
             with open(res_path, 'w+', encoding='utf-8') as f:
                 f.write(md_content)
             common.sdk_count_folders_and_check_contents(res_path)
-
+    
     @pytest.mark.P0
     def test_pdf_txt_sdk(self):
         """pdf sdk txt test."""
+        time.sleep(2)
         demo_names = list()
         pdf_path = os.path.join(pdf_dev_path, 'pdf')
         for pdf_file in os.listdir(pdf_path):
@@ -99,7 +100,6 @@ class TestCli:
                 demo_names.append(pdf_file.split('.')[0])
         for demo_name in demo_names:
             pdf_path = os.path.join(pdf_dev_path, 'pdf', f'{demo_name}.pdf')
-            print(pdf_path)
             pdf_bytes = open(pdf_path, 'rb').read()
             local_image_dir = os.path.join(pdf_dev_path, 'pdf', 'images')
             image_dir = str(os.path.basename(local_image_dir))
@@ -123,10 +123,11 @@ class TestCli:
             with open(res_path, 'w+', encoding='utf-8') as f:
                 f.write(md_content)
             common.sdk_count_folders_and_check_contents(res_path)
-
+    
     @pytest.mark.P0
     def test_pdf_cli_auto(self):
         """magic_pdf cli test auto."""
+        time.sleep(2)
         demo_names = []
         pdf_path = os.path.join(pdf_dev_path, 'pdf')
         for pdf_file in os.listdir(pdf_path):
@@ -141,10 +142,11 @@ class TestCli:
             os.system(cmd)
             common.cli_count_folders_and_check_contents(
                 os.path.join(res_path, demo_name, 'auto'))
-
+   
     @pytest.mark.P0
-    def test_pdf_clit_txt(self):
+    def test_pdf_cli_txt(self):
         """magic_pdf cli test txt."""
+        time.sleep(2)
         demo_names = []
         pdf_path = os.path.join(pdf_dev_path, 'pdf')
         for pdf_file in os.listdir(pdf_path):
@@ -159,10 +161,11 @@ class TestCli:
             os.system(cmd)
             common.cli_count_folders_and_check_contents(
                 os.path.join(res_path, demo_name, 'txt'))
-
+   
     @pytest.mark.P0
-    def test_pdf_clit_ocr(self):
+    def test_pdf_cli_ocr(self):
         """magic_pdf cli test ocr."""
+        time.sleep(2)
         demo_names = []
         pdf_path = os.path.join(pdf_dev_path, 'pdf')
         for pdf_file in os.listdir(pdf_path):
@@ -177,85 +180,102 @@ class TestCli:
             os.system(cmd)
             common.cli_count_folders_and_check_contents(
                 os.path.join(res_path, demo_name, 'ocr'))
-
+    
+    @pytest.mark.skip(reason='out-of-date api')
     @pytest.mark.P1
     def test_pdf_dev_cli_local_jsonl_txt(self):
         """magic_pdf_dev cli local txt."""
+        time.sleep(2)
         jsonl_path = os.path.join(pdf_dev_path, 'line1.jsonl')
         cmd = 'magic-pdf-dev --jsonl %s --method %s' % (jsonl_path, "txt")
         logging.info(cmd)
         os.system(cmd)
 
-
+    @pytest.mark.skip(reason='out-of-date api')
     @pytest.mark.P1
     def test_pdf_dev_cli_local_jsonl_ocr(self):
         """magic_pdf_dev cli local ocr."""
+        time.sleep(2)
         jsonl_path = os.path.join(pdf_dev_path, 'line1.jsonl')
         cmd = 'magic-pdf-dev --jsonl %s --method %s' % (jsonl_path, 'ocr')
         logging.info(cmd)
         os.system(cmd)
 
+    @pytest.mark.skip(reason='out-of-date api')
     @pytest.mark.P1
     def test_pdf_dev_cli_local_jsonl_auto(self):
         """magic_pdf_dev cli local auto."""
+        time.sleep(2)
         jsonl_path = os.path.join(pdf_dev_path, 'line1.jsonl')
         cmd = 'magic-pdf-dev --jsonl %s --method %s' % (jsonl_path, 'auto')
         logging.info(cmd)
         os.system(cmd)
-
+    
+    @pytest.mark.skip(reason='out-of-date api')
     @pytest.mark.P1
     def test_pdf_dev_cli_s3_jsonl_txt(self):
         """magic_pdf_dev cli s3 txt."""
+        time.sleep(2)
         jsonl_path = os.path.join(pdf_dev_path, 'line1.jsonl')
         cmd = 'magic-pdf-dev --jsonl %s --method %s' % (jsonl_path, "txt")
         logging.info(cmd)
         os.system(cmd)
 
-
+    @pytest.mark.skip(reason='out-of-date api')
     @pytest.mark.P1
     def test_pdf_dev_cli_s3_jsonl_ocr(self):
         """magic_pdf_dev cli s3 ocr."""
+        time.sleep(2)
         jsonl_path = os.path.join(pdf_dev_path, 'line1.jsonl')
         cmd = 'magic-pdf-dev --jsonl %s --method %s' % (jsonl_path, 'ocr')
         logging.info(cmd)
         os.system(cmd)
 
+    @pytest.mark.skip(reason='out-of-date api')
     @pytest.mark.P1
     def test_pdf_dev_cli_s3_jsonl_auto(self):
         """magic_pdf_dev cli s3 auto."""
+        time.sleep(2)
         jsonl_path = os.path.join(pdf_dev_path, 'line1.jsonl')
         cmd = 'magic-pdf-dev --jsonl %s --method %s' % (jsonl_path, 'auto')
         logging.info(cmd)
         os.system(cmd)
 
-
     @pytest.mark.P1
     def test_pdf_dev_cli_pdf_json_auto(self):
         """magic_pdf_dev cli pdf+json auto."""
+        time.sleep(2)
         json_path = os.path.join(pdf_dev_path, 'test_model.json')
-        pdf_path = os.path.join(pdf_dev_path, 'pdf', 'research_report_1f978cd81fb7260c8f7644039ec2c054.pdf')
+        pdf_path = os.path.join(pdf_dev_path, 'pdf', 'test_rearch_report.pdf')
         cmd = 'magic-pdf-dev --pdf %s --json %s --method %s' % (pdf_path, json_path, 'auto')
         logging.info(cmd)
         os.system(cmd)
-    
+   
+    @pytest.mark.skip(reason='out-of-date api')
     @pytest.mark.P1
     def test_pdf_dev_cli_pdf_json_ocr(self):
         """magic_pdf_dev cli pdf+json ocr."""
+        time.sleep(2)
         json_path = os.path.join(pdf_dev_path, 'test_model.json')
-        pdf_path = os.path.join(pdf_dev_path, 'pdf', 'research_report_1f978cd81fb7260c8f7644039ec2c054.pdf')
+        pdf_path = os.path.join(pdf_dev_path, 'pdf', 'test_rearch_report.pdf')
         cmd = 'magic-pdf-dev --pdf %s --json %s --method %s' % (pdf_path, json_path, 'auto')
         logging.info(cmd)
         os.system(cmd)
-
-
+    
     @pytest.mark.P1
     def test_s3_sdk_suto(self):
-        pdf_ak = os.environ.get('pdf_ak', "")
+        """
+        test s3 sdk auto.
+        """
+        time.sleep(2)
+        pdf_ak = os.getenv('pdf_ak')
+        print (pdf_ak)
         pdf_sk = os.environ.get('pdf_sk', "")
         pdf_bucket = os.environ.get('bucket', "")
         pdf_endpoint = os.environ.get('pdf_endpoint', "")
         s3_pdf_path = conf.conf["s3_pdf_path"]
-        image_dir = "s3://" + pdf_bucket + "/mineru/test/test.md"
+        image_dir = "s3://" + pdf_bucket + "/mineru/test/output"
+        print (image_dir)
         s3pdf_cli = S3ReaderWriter(pdf_ak, pdf_sk, pdf_endpoint)
         s3image_cli = S3ReaderWriter(pdf_ak, pdf_sk, pdf_endpoint, parent_path=image_dir)
         pdf_bytes = s3pdf_cli.read(s3_pdf_path, mode=s3pdf_cli.MODE_BIN)
@@ -267,6 +287,60 @@ class TestCli:
         md_content = pipe.pipe_mk_markdown(image_dir, drop_mode="none")
         assert len(md_content) > 0
 
+    @pytest.mark.P1
+    def test_local_magic_pdf_open_st_table(self):
+        """magic pdf cli open st table."""
+        time.sleep(2)
+        pre_cmd = "cp ~/magic_pdf_st.json ~/magic-pdf.json"
+        print (pre_cmd)
+        os.system(pre_cmd)
+        pdf_path = os.path.join(pdf_dev_path, "pdf", "test_rearch_report.pdf")
+        common.delete_file(pdf_res_path)
+        cli_cmd = "magic-pdf -p %s -o %s" % (pdf_path, pdf_res_path)
+        os.system(cli_cmd)
+        res = common.check_latex_table_exists(os.path.join(pdf_res_path, "test_rearch_report", "auto", "test_rearch_report.md"))
+        assert res is True
+  
+    @pytest.mark.P1
+    def test_local_magic_pdf_open_html_table(self):
+        """magic pdf cli open html table."""
+        time.sleep(2)
+        pre_cmd = "cp ~/magic_pdf_html.json ~/magic-pdf.json"
+        os.system(pre_cmd)
+        pdf_path = os.path.join(pdf_dev_path, "pdf", "test_rearch_report.pdf")
+        common.delete_file(pdf_res_path)
+        cli_cmd = "magic-pdf -p %s -o %s" % (pdf_path, pdf_res_path)
+        os.system(cli_cmd)
+        res = common.check_html_table_exists(os.path.join(pdf_res_path, "test_rearch_report", "auto", "test_rearch_report.md"))
+        assert res is True
+    
+    @pytest.mark.P1
+    def test_magic_pdf_close_html_table_cpu(self):
+        """magic pdf cli close html table cpu mode."""
+        time.sleep(2)
+        pre_cmd = "cp ~/magic_pdf_html_table_cpu.json ~/magic-pdf.json"
+        os.system(pre_cmd)
+        pdf_path = os.path.join(pdf_dev_path, "pdf", "test_rearch_report.pdf")
+        common.delete_file(pdf_res_path)
+        cli_cmd = "magic-pdf -p %s -o %s" % (pdf_path, pdf_res_path)
+        os.system(cli_cmd)
+        res = common.check_html_table_exists(os.path.join(pdf_res_path, "test_rearch_report", "auto", "test_rearch_report.md"))
+        assert res is  True
+
+    @pytest.mark.P1
+    def test_local_magic_pdf_close_html_table(self):
+        """magic pdf cli close table."""
+        time.sleep(2)
+        pre_cmd = "cp ~/magic_pdf_close_table.json ~/magic-pdf.json"
+        os.system(pre_cmd)
+        pdf_path = os.path.join(pdf_dev_path, "pdf", "test_rearch_report.pdf")
+        common.delete_file(pdf_res_path)
+        cli_cmd = "magic-pdf -p %s -o %s" % (pdf_path, pdf_res_path)
+        os.system(cli_cmd)
+        res = common.check_close_tables(os.path.join(pdf_res_path, "test_rearch_report", "auto", "test_rearch_report.md"))
+        assert res is True
+    
 
+ 
 if __name__ == '__main__':
     pytest.main()

+ 0 - 0
tests/test_cli/test_magic-pdf-dev_cli.py


+ 0 - 36
tests/test_cli/test_performence.py

@@ -1,36 +0,0 @@
-"""
-test performance
-"""
-import os
-import shutil
-import json
-from lib import calculate_score
-import pytest
-from conf import conf
-
-code_path = os.environ.get('GITHUB_WORKSPACE')
-pdf_dev_path = conf.conf["pdf_dev_path"]
-pdf_res_path = conf.conf["pdf_res_path"]
-
-class TestTable():
-    """
-    test table
-    """
-    def test_perf_close_table(self):
-        """
-        test perf when close table
-        """
-
-
-
-
-def get_score():
-    """
-    get score
-    """
-    score = calculate_score.Scoring(os.path.join(pdf_dev_path, "result.json"))
-    score.calculate_similarity_total("mineru", pdf_dev_path)
-    res = score.summary_scores()
-    return res
-
-

+ 0 - 54
tests/test_cli/test_table.py

@@ -1,54 +0,0 @@
-"""
-test table case
-"""
-import os
-import shutil
-import json
-from lib import calculate_score
-import pytest
-from conf import conf
-
-code_path = os.environ.get('GITHUB_WORKSPACE')
-pdf_dev_path = conf.conf["pdf_dev_path"]
-pdf_res_path = conf.conf["pdf_res_path"]
-
-class TestTable():
-    """
-    test table
-    """
-    def test_paddle_table_master_cuda(self):
-        """
-        select table: paddle table master,mode is cuda
-        """
-    def test_paddle_table_master_cpu(self):
-        """
-        select table: paddle table master, mode is cpu
-        """
-    def test_st_table_cuda(self):
-        """
-        select table: ST, mode is cuda 
-        """
-
-    def test_st_table_cpu(self):
-        """
-        select table: ST, mode is cpu
-        """
-
-    def test_close_table_cuda(self):
-        """
-        close table, mode is cuda
-        """
-    
-
-
-
-def get_score():
-    """
-    get score
-    """
-    score = calculate_score.Scoring(os.path.join(pdf_dev_path, "result.json"))
-    score.calculate_similarity_total("mineru", pdf_dev_path)
-    res = score.summary_scores()
-    return res
-
-

+ 1 - 1
tests/unittest/test_table/test_tablemaster.py

@@ -7,7 +7,7 @@ class TestppTableModel:
         img = Image.open("tests/unittest/test_table/assets/table.jpg")
         # 修改table模型路径
         config = {"device": "cuda",
-                  "model_dir": "/home/quyuan/PDF-Extract-Kit/models/TabRec/TableMaster"}
+                  "model_dir": "/home/quyuan/.cache/modelscope/hub/opendatalab/PDF-Extract-Kit/models/TabRec/TableMaster"}
         table_model = ppTableModel(config)
         res = table_model.img2html(img)
         true_value = """<td><table  border="1"><thead><tr><td><b>Methods</b></td><td><b>R</b></td><td><b>P</b></td><td><b>F</b></td><td><b>FPS</b></td></tr></thead><tbody><tr><td>SegLink [26]</td><td>70.0</td><td>86.0</td><td>77.0</td><td>8.9</td></tr><tr><td>PixelLink [4]</td><td>73.2</td><td>83.0</td><td>77.8</td><td>-</td></tr><tr><td>TextSnake [18]</td><td>73.9</td><td>83.2</td><td>78.3</td><td>1.1</td></tr><tr><td>TextField [37]</td><td>75.9</td><td>87.4</td><td>81.3</td><td>5.2 </td></tr><tr><td>MSR[38]</td><td>76.7</td><td>87.4</td><td>81.7</td><td>-</td></tr><tr><td>FTSN[3]</td><td>77.1</td><td>87.6</td><td>82.0</td><td>-</td></tr><tr><td>LSE[30]</td><td>81.7</td><td>84.2</td><td>82.9</td><td>-</td></tr><tr><td>CRAFT [2]</td><td>78.2</td><td>88.2</td><td>82.9</td><td>8.6</td></tr><tr><td>MCN [16]</td><td>79</td><td>88.</td><td>83</td><td>-</td></tr><tr><td>ATRR[35]</td><td>82.1</td><td>85.2</td><td>83.6</td><td>-</td></tr><tr><td>PAN [34]</td><td>83.8</td><td>84.4</td><td>84.1</td><td>30.2</td></tr><tr><td>DB[12]</td><td>79.2</td><td>91.5</td><td>84.9</td><td>32.0</td></tr><tr><td>DRRG [41]</td><td>82.30</td><td>88.05</td><td>85.08</td><td>-</td></tr><tr><td>Ours (SynText)</td><td>80.68</td><td>85.40</td><td>82.97</td><td>12.68</td></tr><tr><td>Ours (MLT-17)</td><td>84.54</td><td>86.62</td><td>85.57</td><td>12.31</td></tr></tbody></table></td>\n"""