test_bench_gpu.py 2.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970
  1. import json
  2. import os
  3. import shutil
  4. from conf import conf
  5. from lib import calculate_score
  6. pdf_res_path = conf.conf['pdf_res_path']
  7. code_path = conf.conf['code_path']
  8. pdf_dev_path = conf.conf['pdf_dev_path']
  9. class TestCliCuda:
  10. """test cli cuda."""
  11. def test_pdf_sdk_cuda(self):
  12. """pdf sdk cuda."""
  13. clean_magicpdf(pdf_res_path)
  14. pdf_to_markdown()
  15. fr = open(os.path.join(pdf_dev_path, 'result.json'), 'r', encoding='utf-8')
  16. lines = fr.readlines()
  17. last_line = lines[-1].strip()
  18. last_score = json.loads(last_line)
  19. last_simscore = last_score['average_sim_score']
  20. last_editdistance = last_score['average_edit_distance']
  21. last_bleu = last_score['average_bleu_score']
  22. os.system(f'python tests/test_cli/lib/pre_clean.py --tool_name mineru --download_dir {pdf_dev_path}')
  23. now_score = get_score()
  24. print ('now_score:', now_score)
  25. if not os.path.exists(os.path.join(pdf_dev_path, 'ci')):
  26. os.makedirs(os.path.join(pdf_dev_path, 'ci'), exist_ok=True)
  27. fw = open(os.path.join(pdf_dev_path, 'ci', 'result.json'), 'w+', encoding='utf-8')
  28. fw.write(json.dumps(now_score) + '\n')
  29. now_simscore = now_score['average_sim_score']
  30. now_editdistance = now_score['average_edit_distance']
  31. now_bleu = now_score['average_bleu_score']
  32. assert last_simscore <= now_simscore
  33. assert last_editdistance <= now_editdistance
  34. assert last_bleu <= now_bleu
  35. def pdf_to_markdown():
  36. """pdf to md."""
  37. demo_names = list()
  38. pdf_path = os.path.join(pdf_dev_path, 'pdf')
  39. for pdf_file in os.listdir(pdf_path):
  40. if pdf_file.endswith('.pdf'):
  41. demo_names.append(pdf_file.split('.')[0])
  42. for demo_name in demo_names:
  43. pdf_path = os.path.join(pdf_dev_path, 'pdf', f'{demo_name}.pdf')
  44. cmd = 'magic-pdf pdf-command --pdf %s --inside_model true' % (pdf_path)
  45. os.system(cmd)
  46. dir_path = os.path.join(pdf_dev_path, 'mineru')
  47. if not os.path.exists(dir_path):
  48. os.makedirs(dir_path, exist_ok=True)
  49. res_path = os.path.join(dir_path, f'{demo_name}.md')
  50. src_path = os.path.join(pdf_res_path, demo_name, 'auto', f'{demo_name}.md')
  51. shutil.copy(src_path, res_path)
  52. def get_score():
  53. """get score."""
  54. score = calculate_score.Scoring(os.path.join(pdf_dev_path, 'result.json'))
  55. score.calculate_similarity_total('mineru', pdf_dev_path)
  56. res = score.summary_scores()
  57. return res
  58. def clean_magicpdf(pdf_res_path):
  59. """clean magicpdf."""
  60. cmd = 'rm -rf %s' % (pdf_res_path)
  61. os.system(cmd)