test_bench_gpu.py 2.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980
  1. import os
  2. from conf import conf
  3. import os
  4. import json
  5. from lib import calculate_score
  6. import shutil
  7. pdf_res_path = conf.conf["pdf_res_path"]
  8. code_path = conf.conf["code_path"]
  9. pdf_dev_path = conf.conf["pdf_dev_path"]
  10. class TestCliCuda:
  11. """
  12. test cli cuda
  13. """
  14. def test_pdf_sdk_cuda(self):
  15. """
  16. pdf sdk cuda
  17. """
  18. clean_magicpdf(pdf_res_path)
  19. pdf_to_markdown()
  20. fr = open(os.path.join(pdf_dev_path, "result.json"), "r", encoding="utf-8")
  21. lines = fr.readlines()
  22. last_line = lines[-1].strip()
  23. last_score = json.loads(last_line)
  24. last_simscore = last_score["average_sim_score"]
  25. last_editdistance = last_score["average_edit_distance"]
  26. last_bleu = last_score["average_bleu_score"]
  27. os.system(f"python tests/test_cli/lib/pre_clean.py --tool_name mineru --download_dir {pdf_dev_path}")
  28. now_score = get_score()
  29. print ("now_score:", now_score)
  30. if not os.path.exists(os.path.join(pdf_dev_path, "ci")):
  31. os.makedirs(os.path.join(pdf_dev_path, "ci"), exist_ok=True)
  32. fw = open(os.path.join(pdf_dev_path, "ci", "result.json"), "w+", encoding="utf-8")
  33. fw.write(json.dumps(now_score) + "\n")
  34. now_simscore = now_score["average_sim_score"]
  35. now_editdistance = now_score["average_edit_distance"]
  36. now_bleu = now_score["average_bleu_score"]
  37. assert last_simscore <= now_simscore
  38. assert last_editdistance <= now_editdistance
  39. assert last_bleu <= now_bleu
  40. def pdf_to_markdown():
  41. """
  42. pdf to md
  43. """
  44. demo_names = list()
  45. pdf_path = os.path.join(pdf_dev_path, "pdf")
  46. for pdf_file in os.listdir(pdf_path):
  47. if pdf_file.endswith('.pdf'):
  48. demo_names.append(pdf_file.split('.')[0])
  49. for demo_name in demo_names:
  50. pdf_path = os.path.join(pdf_dev_path, "pdf", f"{demo_name}.pdf")
  51. cmd = "magic-pdf pdf-command --pdf %s --inside_model true" % (pdf_path)
  52. os.system(cmd)
  53. dir_path = os.path.join(pdf_dev_path, "mineru")
  54. if not os.path.exists(dir_path):
  55. os.makedirs(dir_path, exist_ok=True)
  56. res_path = os.path.join(dir_path, f"{demo_name}.md")
  57. src_path = os.path.join(pdf_res_path, demo_name, "auto", f"{demo_name}.md")
  58. shutil.copy(src_path, res_path)
  59. def get_score():
  60. """
  61. get score
  62. """
  63. score = calculate_score.Scoring(os.path.join(pdf_dev_path, "result.json"))
  64. score.calculate_similarity_total("mineru", pdf_dev_path)
  65. res = score.summary_scores()
  66. return res
  67. def clean_magicpdf(pdf_res_path):
  68. """
  69. clean magicpdf
  70. """
  71. cmd = "rm -rf %s" % (pdf_res_path)
  72. os.system(cmd)