| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182 |
- import pytest
- import os
- from conf import conf
- import os
- import json
- from magic_pdf.pipe.UNIPipe import UNIPipe
- from magic_pdf.rw.DiskReaderWriter import DiskReaderWriter
- from lib import calculate_score
- import shutil
- pdf_res_path = conf.conf["pdf_res_path"]
- code_path = conf.conf["code_path"]
- pdf_dev_path = conf.conf["pdf_dev_path"]
- class TestCliCuda:
- """
- test cli cuda
- """
- def test_pdf_sdk_cuda(self):
- """
- pdf sdk cuda
- """
- clean_magicpdf(pdf_res_path)
- pdf_to_markdown()
- fr = open(os.path.join(pdf_dev_path, "result.json"), "r", encoding="utf-8")
- lines = fr.readlines()
- last_line = lines[-1].strip()
- last_score = json.loads(last_line)
- last_simscore = last_score["average_sim_score"]
- last_editdistance = last_score["average_edit_distance"]
- last_bleu = last_score["average_bleu_score"]
- os.system(f"python tests/test_cli/lib/pre_clean.py --tool_name mineru --download_dir {pdf_dev_path}")
- now_score = get_score()
- print ("now_score:", now_score)
- if not os.path.exists(os.path.join(pdf_dev_path, "ci")):
- os.makedirs(os.path.join(pdf_dev_path, "ci"), exist_ok=True)
- fw = open(os.path.join(pdf_dev_path, "ci", "result.json"), "w+", encoding="utf-8")
- fw.write(json.dumps(now_score) + "\n")
- now_simscore = now_score["average_sim_score"]
- now_editdistance = now_score["average_edit_distance"]
- now_bleu = now_score["average_bleu_score"]
- assert last_simscore <= now_simscore
- assert last_editdistance <= now_editdistance
- assert last_bleu <= now_bleu
- def pdf_to_markdown():
- """
- pdf to md
- """
- demo_names = list()
- pdf_path = os.path.join(pdf_dev_path, "pdf")
- for pdf_file in os.listdir(pdf_path):
- if pdf_file.endswith('.pdf'):
- demo_names.append(pdf_file.split('.')[0])
- for demo_name in demo_names:
- pdf_path = os.path.join(pdf_dev_path, "pdf", f"{demo_name}.pdf")
- cmd = "magic-pdf pdf-command --pdf %s --inside_model true" % (pdf_path)
- os.system(cmd)
- dir_path = os.path.join(pdf_dev_path, "mineru")
- if not os.path.exists(dir_path):
- os.makedirs(dir_path, exist_ok=True)
- res_path = os.path.join(dir_path, f"{demo_name}.md")
- src_path = os.path.join(pdf_res_path, demo_name, "auto", f"{demo_name}.md")
- shutil.copy(src_path, res_path)
- def get_score():
- """
- get score
- """
- score = calculate_score.Scoring(os.path.join(pdf_dev_path, "result.json"))
- score.calculate_similarity_total("mineru", pdf_dev_path)
- res = score.summary_scores()
- return res
- def clean_magicpdf(pdf_res_path):
- """
- clean magicpdf
- """
- cmd = "rm -rf %s" % (pdf_res_path)
- os.system(cmd)
|