|
|
@@ -0,0 +1,82 @@
|
|
|
+import pytest
|
|
|
+import os
|
|
|
+from conf import conf
|
|
|
+import os
|
|
|
+import json
|
|
|
+from magic_pdf.pipe.UNIPipe import UNIPipe
|
|
|
+from magic_pdf.rw.DiskReaderWriter import DiskReaderWriter
|
|
|
+from lib import calculate_score
|
|
|
+
|
|
|
+pdf_res_path = conf.conf["pdf_res_path"]
|
|
|
+code_path = conf.conf["code_path"]
|
|
|
+pdf_dev_path = conf.conf["pdf_dev_path"]
|
|
|
+class TestCliCuda:
|
|
|
+ """
|
|
|
+ test cli cuda
|
|
|
+ """
|
|
|
+ def test_pdf_sdk_cuda(self):
|
|
|
+ """
|
|
|
+ pdf sdk cuda
|
|
|
+ """
|
|
|
+ clean_magicpdf(pdf_res_path)
|
|
|
+ pdf_to_markdown()
|
|
|
+ fr = open(os.path.join(pdf_dev_path, "result.json"), "r", encoding="utf-8")
|
|
|
+ lines = fr.readlines()
|
|
|
+ last_line = lines[-1].strip()
|
|
|
+ last_score = json.loads(last_line)
|
|
|
+ last_simscore = last_score["average_sim_score"]
|
|
|
+ last_editdistance = last_score["average_edit_distance"]
|
|
|
+ last_bleu = last_score["average_bleu_score"]
|
|
|
+ os.system(f"python tests/test_cli/lib/pre_clean.py --tool_name mineru --download_dir {pdf_dev_path}")
|
|
|
+ now_score = get_score()
|
|
|
+ print ("now_score:", now_score)
|
|
|
+ if not os.path.exists(os.path.join(pdf_dev_path, "ci")):
|
|
|
+ os.makedirs(os.path.join(pdf_dev_path, "ci"), exist_ok=True)
|
|
|
+ fw = open(os.path.join(pdf_dev_path, "ci", "result.json"), "w+", encoding="utf-8")
|
|
|
+ fw.write(json.dumps(now_score) + "\n")
|
|
|
+ now_simscore = now_score["average_sim_score"]
|
|
|
+ now_editdistance = now_score["average_edit_distance"]
|
|
|
+ now_bleu = now_score["average_bleu_score"]
|
|
|
+ assert last_simscore <= now_simscore
|
|
|
+ assert last_editdistance <= now_editdistance
|
|
|
+ assert last_bleu <= now_bleu
|
|
|
+
|
|
|
+def pdf_to_markdown():
|
|
|
+ """
|
|
|
+ pdf to md
|
|
|
+ """
|
|
|
+ demo_names = list()
|
|
|
+ pdf_path = os.path.join(pdf_dev_path, "pdf")
|
|
|
+ for pdf_file in os.listdir(pdf_path):
|
|
|
+ if pdf_file.endswith('.pdf'):
|
|
|
+ demo_names.append(pdf_file.split('.')[0])
|
|
|
+ for demo_name in demo_names:
|
|
|
+ pdf_path = os.path.join(pdf_dev_path, "pdf", f"{demo_name}.pdf")
|
|
|
+ cmd = "magic-pdf pdf-command --pdf %s --inside_model true" % (pdf_path)
|
|
|
+ os.system(cmd)
|
|
|
+ dir_path = os.path.join(pdf_dev_path, "mineru")
|
|
|
+ if not os.path.exists(dir_path):
|
|
|
+ os.makedirs(dir_path, exist_ok=True)
|
|
|
+ res_path = os.path.join(dir_path, f"{demo_name}.md")
|
|
|
+ #src_path = os.path.join(pdf_res_path, "pdf", f"{demo_name}.pdf")
|
|
|
+ #shutil.copy(src_path, res_path)
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+def get_score():
|
|
|
+ """
|
|
|
+ get score
|
|
|
+ """
|
|
|
+ score = calculate_score.Scoring(os.path.join(pdf_dev_path, "result.json"))
|
|
|
+ score.calculate_similarity_total("mineru", pdf_dev_path)
|
|
|
+ res = score.summary_scores()
|
|
|
+ return res
|
|
|
+
|
|
|
+
|
|
|
+def clean_magicpdf(pdf_res_path):
|
|
|
+ """
|
|
|
+ clean magicpdf
|
|
|
+ """
|
|
|
+ cmd = "rm -rf %s" % (pdf_res_path)
|
|
|
+ os.system(cmd)
|
|
|
+
|