benchmark.py 2.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253
  1. import zipfile
  2. import os
  3. import shutil
  4. code_path = os.environ.get('GITHUB_WORKSPACE')
  5. #code_path = "/home/quyuan/actions-runner/_work/Magic-PDF/Magic-PDF.bk"
  6. #评测集存放路径
  7. pdf_dev_path = "/home/quyuan/data"
  8. #magicpdf跑测结果
  9. pdf_res_path = "/home/quyuan/code/Magic-PDF/Magic-PDF/Magic-PDF/ci/magic-pdf"
  10. file_types = ["academic_literature", "atlas", "courseware", "colorful_textbook", "historical_documents", "notes", "ordinary_books", "ordinary_exam_paper", "ordinary_textbook", "research_report", "special_exam_paper"]
  11. #file_types = ["academic_literature"]
  12. def test_cli():
  13. magicpdf_path = os.path.join(pdf_dev_path, "output")
  14. cmd = 'cd %s && export PYTHONPATH=. && find %s -type f -name "*.pdf" | xargs -I{} python magic_pdf/cli/magicpdf.py pdf-command --pdf {}' % (code_path, magicpdf_path)
  15. os.system(cmd)
  16. rm_cmd = "rm -rf %s" % (pdf_res_path)
  17. os.system(rm_cmd)
  18. os.makedirs(pdf_res_path)
  19. for root, dirs, files in os.walk(pdf_res_path):
  20. for magic_file in files:
  21. for file_type in file_types:
  22. target_dir = os.path.join(pdf_dev_path, "ci", file_type, "magicpdf")
  23. if magic_file.endswith(".md") and magic_file.startswith(file_type):
  24. source_file = os.path.join(root, magic_file)
  25. target_file = os.path.join(pdf_dev_path, "ci", file_type, "magicpdf", magic_file)
  26. if not os.path.exists(target_dir):
  27. os.makedirs(target_dir)
  28. shutil.copy(source_file, target_file)
  29. def calculate_score():
  30. data_path = os.path.join(pdf_dev_path, "ci")
  31. cmd = "cd %s && export PYTHONPATH=. && python tools/clean_photo.py --tool_name annotations --download_dir %s" % (code_path, data_path)
  32. os.system(cmd)
  33. cmd = "cd %s && export PYTHONPATH=. && python tools/clean_photo.py --tool_name magicpdf --download_dir %s" % (code_path, data_path)
  34. os.system(cmd)
  35. cmd = "cd %s && export PYTHONPATH=. && python tools/markdown_calculate.py --tool_name magicpdf --download_dir %s --results %s" % (code_path, data_path, os.path.join(data_path, "result.json"))
  36. os.system(cmd)
  37. def extrat_zip(zip_file_path, extract_to_path):
  38. if zipfile.is_zipfile(zip_file_path):
  39. with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
  40. zip_ref.extractall(extract_to_path)
  41. print(f'Files extracted to {extract_to_path}')
  42. else:
  43. print(f'{zip_file_path} is not a zip file')
  44. if __name__ == "__main__":
  45. extrat_zip(os.path.join(pdf_dev_path, 'output.zip'), os.path.join(pdf_dev_path))
  46. test_cli()
  47. calculate_score()