test_bench_gpu.py 2.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182
  1. import pytest
  2. import os
  3. from conf import conf
  4. import os
  5. import json
  6. from magic_pdf.pipe.UNIPipe import UNIPipe
  7. from magic_pdf.rw.DiskReaderWriter import DiskReaderWriter
  8. from lib import calculate_score
  9. import shutil
  10. pdf_res_path = conf.conf["pdf_res_path"]
  11. code_path = conf.conf["code_path"]
  12. pdf_dev_path = conf.conf["pdf_dev_path"]
  13. class TestCliCuda:
  14. """
  15. test cli cuda
  16. """
  17. def test_pdf_sdk_cuda(self):
  18. """
  19. pdf sdk cuda
  20. """
  21. clean_magicpdf(pdf_res_path)
  22. pdf_to_markdown()
  23. fr = open(os.path.join(pdf_dev_path, "result.json"), "r", encoding="utf-8")
  24. lines = fr.readlines()
  25. last_line = lines[-1].strip()
  26. last_score = json.loads(last_line)
  27. last_simscore = last_score["average_sim_score"]
  28. last_editdistance = last_score["average_edit_distance"]
  29. last_bleu = last_score["average_bleu_score"]
  30. os.system(f"python tests/test_cli/lib/pre_clean.py --tool_name mineru --download_dir {pdf_dev_path}")
  31. now_score = get_score()
  32. print ("now_score:", now_score)
  33. if not os.path.exists(os.path.join(pdf_dev_path, "ci")):
  34. os.makedirs(os.path.join(pdf_dev_path, "ci"), exist_ok=True)
  35. fw = open(os.path.join(pdf_dev_path, "ci", "result.json"), "w+", encoding="utf-8")
  36. fw.write(json.dumps(now_score) + "\n")
  37. now_simscore = now_score["average_sim_score"]
  38. now_editdistance = now_score["average_edit_distance"]
  39. now_bleu = now_score["average_bleu_score"]
  40. assert last_simscore <= now_simscore
  41. assert last_editdistance <= now_editdistance
  42. assert last_bleu <= now_bleu
  43. def pdf_to_markdown():
  44. """
  45. pdf to md
  46. """
  47. demo_names = list()
  48. pdf_path = os.path.join(pdf_dev_path, "pdf")
  49. for pdf_file in os.listdir(pdf_path):
  50. if pdf_file.endswith('.pdf'):
  51. demo_names.append(pdf_file.split('.')[0])
  52. for demo_name in demo_names:
  53. pdf_path = os.path.join(pdf_dev_path, "pdf", f"{demo_name}.pdf")
  54. cmd = "magic-pdf pdf-command --pdf %s --inside_model true" % (pdf_path)
  55. os.system(cmd)
  56. dir_path = os.path.join(pdf_dev_path, "mineru")
  57. if not os.path.exists(dir_path):
  58. os.makedirs(dir_path, exist_ok=True)
  59. res_path = os.path.join(dir_path, f"{demo_name}.md")
  60. src_path = os.path.join(pdf_res_path, demo_name, "auto", f"{demo_name}.md")
  61. shutil.copy(src_path, res_path)
  62. def get_score():
  63. """
  64. get score
  65. """
  66. score = calculate_score.Scoring(os.path.join(pdf_dev_path, "result.json"))
  67. score.calculate_similarity_total("mineru", pdf_dev_path)
  68. res = score.summary_scores()
  69. return res
  70. def clean_magicpdf(pdf_res_path):
  71. """
  72. clean magicpdf
  73. """
  74. cmd = "rm -rf %s" % (pdf_res_path)
  75. os.system(cmd)