소스 검색

add tool link

quyuan 1 년 전
부모
커밋
229dc3c7aa
2개의 변경된 파일12개의 추가작업 그리고 11개의 파일을 삭제
  1. 1 2
      .github/workflows/benchmark.yml
  2. 11 9
      tools/benchmark.py

+ 1 - 2
.github/workflows/benchmark.yml

@@ -47,8 +47,7 @@ jobs:
     - name: get-benchmark-result
       run: |
         echo "start test"
-        cd tools && python text_badcase.py pdf_json_label_0306.json pdf_json_label_0229.json json_files.zip text_overall base_data_text.json --badcase_path  text_badcase --s3_bucket_name llm-process-pperf --s3_file_directory qa-validate/pdf-datasets/badcase --AWS_ACCESS_KEY 7X9CWNHIVOHH3LXRD5WK  --AWS_SECRET_KEY IHLyTsv7h4ArzReLWUGZNKvwqB7CMrRi6e7ZyUt0 --END_POINT_URL http://p-ceph-norm-inside.pjlab.org.cn:80
-        python ocr_badcase.py pdf_json_label_0306.json ocr_dataset.json json_files.zip ocr_overall base_data_ocr.json --badcase_path ocr_badcase --s3_bucket_name llm-process-pperf --s3_file_directory qa-validate/pdf-datasets/badcase --AWS_ACCESS_KEY 7X9CWNHIVOHH3LXRD5WK  --AWS_SECRET_KEY IHLyTsv7h4ArzReLWUGZNKvwqB7CMrRi6e7ZyUt0 --END_POINT_URL http://p-ceph-norm-inside.pjlab.org.cn:80
+        cd tools && python benchmark.py
   
   notify_to_feishu:
     if: ${{ always() && !cancelled() && contains(needs.*.result, 'failure') && (github.ref_name == 'master') }}

+ 11 - 9
tools/benchmark.py

@@ -7,22 +7,24 @@ pdf_res_path = "/home/quyuan/code/Magic-PDF/Magic-PDF/Magic-PDF/ci/magic-pdf"
 def test_cli():
     cmd = 'cd %s && export PYTHONPATH=. && find %s -type f -name "*.pdf" | xargs -I{} python magic_pdf/cli/magicpdf.py  pdf-command  --pdf {}' % (code_path, pdf_dev_path)
     os.system(cmd)
-    for annotaion_name in os.listdir(os.join(pdf_dev_path, "output")):
+    if not os.path.exists(os.path.join(pdf_dev_path, "output")):
+        os.makedirs(os.path.join(pdf_dev_path, "output"))
+    for annotaion_name in os.listdir(os.path.join(pdf_dev_path, "output")):
         if annotaion_name.endswith('.pdf'):
             for pdf_res_path  in os.listdir(pdf_res_path):
-                if ".md" in os.join(pdf_res_path, annotaion_name, "auto"):
+                if ".md" in os.path.join(pdf_res_path, annotaion_name, "auto"):
                     prefix = annotaion_name.split('_')[-2]
                     if not os.path.exists(os.join(pdf_dev_path, prefix)):
-                        os.makedirs(os.join(pdf_dev_path, prefix))
-                        shutil.copy(os.join(pdf_res_path, annotaion_name, "auto", annotaion_name + ".md"), os.join(pdf_dev_path, prefix, annotaion_name + ".md"))
+                        os.makedirs(os.path.join(pdf_dev_path, prefix))
+                        shutil.copy(os.path.join(pdf_res_path, annotaion_name, "auto", annotaion_name + ".md"), os.join(pdf_dev_path, prefix, annotaion_name + ".md"))
                    
 
 def calculate_score():
-    cmd = "cd %s && export PYTHONPATH=. && python tools/clean_photo.py --tool_name annotations --download_dir %s" % pdf_dev_path
+    cmd = "cd %s && export PYTHONPATH=. && python tools/clean_photo.py --tool_name annotations --download_dir %s" % (code_path, pdf_dev_path)
     os.system(cmd)
-    cmd = "cd %s && export PYTHONPATH=. && python tools/clean_photo.py --tool_name magicpdf --download_dir %s" % (pdf_dev_path)
+    cmd = "cd %s && export PYTHONPATH=. && python tools/clean_photo.py --tool_name magicpdf --download_dir %s" % (code_path, pdf_dev_path)
     os.system(cmd)
-    cmd = "cd %s && export PYTHONPATH=. && python tools/markdown_calculate.py --tool_name pdf-command --download_dir %s --results %s" % (pdf_dev_path, os.join(pdf_dev_path, "result.json"))
+    cmd = "cd %s && export PYTHONPATH=. && python tools/markdown_calculate.py --tool_name pdf-command --download_dir %s --results %s" % (code_path, pdf_dev_path, os.path.join(pdf_dev_path, "result.json"))
     os.system(cmd)
 
 
@@ -36,6 +38,6 @@ def extrat_zip(zip_file_path, extract_to_path):
 
 
 if __name__ == "__main__":
-    extrat_zip(os.join(pdf_dev_path, 'output.zip'), os.join(pdf_dev_path,'datasets'))
+    extrat_zip(os.path.join(pdf_dev_path, 'output.zip'), os.path.join(pdf_dev_path,'datasets'))
     test_cli()
-    calculate_score()
+    calculate_score()