Browse Source

update ci

quyuan 1 năm trước cách đây
mục cha
commit
275feb5c84
1 tập tin đã thay đổi với 21 bổ sung16 xóa
  1. 21 16
      tools/benchmark.py

+ 21 - 16
tools/benchmark.py

@@ -2,31 +2,36 @@ import zipfile
 import os
 import os
 import shutil
 import shutil
 code_path = os.environ.get('GITHUB_WORKSPACE')
 code_path = os.environ.get('GITHUB_WORKSPACE')
+#code_path = "/home/quyuan/actions-runner/_work/Magic-PDF/Magic-PDF.bk"
+#评测集存放路径
 pdf_dev_path = "/home/quyuan/data"
 pdf_dev_path = "/home/quyuan/data"
+#magicpdf跑测结果
 pdf_res_path = "/home/quyuan/code/Magic-PDF/Magic-PDF/Magic-PDF/ci/magic-pdf"
 pdf_res_path = "/home/quyuan/code/Magic-PDF/Magic-PDF/Magic-PDF/ci/magic-pdf"
+file_types = ["academic_literature", "atlas", "courseware", "colorful_textbook", "historical_documents", "notes", "ordinary_books", "ordinary_exam_paper", "ordinary_textbook", "research_report", "special_exam_paper"]
+#file_types = ["academic_literature"]
+
 def test_cli():
 def test_cli():
     magicpdf_path = os.path.join(pdf_dev_path, "output")
     magicpdf_path = os.path.join(pdf_dev_path, "output")
-    if not os.path.exists(magicpdf_path):
-        os.makedirs(magicpdf_path)
     cmd = 'cd %s && export PYTHONPATH=. && find %s -type f -name "*.pdf" | xargs -I{} python magic_pdf/cli/magicpdf.py  pdf-command  --pdf {}' % (code_path, magicpdf_path)
     cmd = 'cd %s && export PYTHONPATH=. && find %s -type f -name "*.pdf" | xargs -I{} python magic_pdf/cli/magicpdf.py  pdf-command  --pdf {}' % (code_path, magicpdf_path)
     os.system(cmd)
     os.system(cmd)
-   
-    for annotaion_name in os.walk(os.path.join(pdf_dev_path, "ci")):
-        if annotaion_name.endswith('.md'):
-            for pdf_res_path  in os.listdir(pdf_res_path):
-                if annotaion_name in os.path.join(pdf_res_path, annotaion_name, "auto"):
-                    prefix = annotaion_name.split('_')[-2]
-                    if not os.path.exists(os.join(pdf_dev_path, prefix)):
-                        #os.makedirs(os.path.join(pdf_dev_path, prefix))
-                        shutil.copy(os.path.join(pdf_res_path, annotaion_name.strip(".md"), "auto", annotaion_name), os.join(pdf_dev_path, "ci", prefix, annotaion_name))
-                   
+    for root, dirs, files in os.walk(pdf_res_path):
+         for magic_file in files:
+            for file_type in file_types:
+                target_dir = os.path.join(pdf_dev_path, "ci", file_type, "magicpdf")
+                if magic_file.endswith(".md") and magic_file.startswith(file_type):
+                    source_file = os.path.join(root, magic_file)
+                    target_file = os.path.join(pdf_dev_path, "ci", file_type, "magicpdf", magic_file)
+                    if not os.path.exists(target_dir):
+                         os.makedirs(target_dir) 
+                    shutil.copy(source_file, target_file)   
 
 
 def calculate_score():
 def calculate_score():
-    cmd = "cd %s && export PYTHONPATH=. && python tools/clean_photo.py --tool_name annotations --download_dir %s" % (code_path, pdf_dev_path)
+    data_path = os.path.join(pdf_dev_path, "ci")
+    cmd = "cd %s && export PYTHONPATH=. && python tools/clean_photo.py --tool_name annotations --download_dir %s" % (code_path, data_path)
     os.system(cmd)
     os.system(cmd)
-    cmd = "cd %s && export PYTHONPATH=. && python tools/clean_photo.py --tool_name magicpdf --download_dir %s" % (code_path, pdf_dev_path)
+    cmd = "cd %s && export PYTHONPATH=. && python tools/clean_photo.py --tool_name magicpdf --download_dir %s" % (code_path, data_path)
     os.system(cmd)
     os.system(cmd)
-    cmd = "cd %s && export PYTHONPATH=. && python tools/markdown_calculate.py --tool_name pdf-command --download_dir %s --results %s" % (code_path, pdf_dev_path, os.path.join(pdf_dev_path, "result.json"))
+    cmd = "cd %s && export PYTHONPATH=. && python tools/markdown_calculate.py --tool_name magicpdf --download_dir %s --results %s" % (code_path, data_path, os.path.join(data_path, "result.json"))
     os.system(cmd)
     os.system(cmd)
 
 
 
 
@@ -40,6 +45,6 @@ def extrat_zip(zip_file_path, extract_to_path):
 
 
 
 
 if __name__ == "__main__":
 if __name__ == "__main__":
-    extrat_zip(os.path.join(pdf_dev_path, 'output.zip'), os.path.join(pdf_dev_path,'datasets'))
+    extrat_zip(os.path.join(pdf_dev_path, 'output.zip'), os.path.join(pdf_dev_path))
     test_cli()
     test_cli()
     calculate_score()
     calculate_score()