|
@@ -1,22 +1,27 @@
|
|
|
import os
|
|
import os
|
|
|
import json
|
|
import json
|
|
|
|
|
|
|
|
|
|
+from loguru import logger
|
|
|
|
|
+
|
|
|
from magic_pdf.pipe.UNIPipe import UNIPipe
|
|
from magic_pdf.pipe.UNIPipe import UNIPipe
|
|
|
from magic_pdf.rw.DiskReaderWriter import DiskReaderWriter
|
|
from magic_pdf.rw.DiskReaderWriter import DiskReaderWriter
|
|
|
|
|
|
|
|
-current_script_dir = os.path.dirname(os.path.abspath(__file__))
|
|
|
|
|
-demo_name = "demo1"
|
|
|
|
|
-pdf_path = os.path.join(current_script_dir, f"{demo_name}.pdf")
|
|
|
|
|
-model_path = os.path.join(current_script_dir, f"{demo_name}.json")
|
|
|
|
|
-pdf_bytes = open(pdf_path, "rb").read()
|
|
|
|
|
-model_json = json.loads(open(model_path, "r", encoding="utf-8").read())
|
|
|
|
|
-jso_useful_key = {"_pdf_type": "", "model_list": model_json}
|
|
|
|
|
-local_image_dir = os.path.join(current_script_dir, 'images')
|
|
|
|
|
-image_dir = str(os.path.basename(local_image_dir))
|
|
|
|
|
-image_writer = DiskReaderWriter(local_image_dir)
|
|
|
|
|
-pipe = UNIPipe(pdf_bytes, jso_useful_key, image_writer)
|
|
|
|
|
-pipe.pipe_classify()
|
|
|
|
|
-pipe.pipe_parse()
|
|
|
|
|
-md_content = pipe.pipe_mk_markdown(image_dir, drop_mode="none")
|
|
|
|
|
-with open(f"{demo_name}.md", "w", encoding="utf-8") as f:
|
|
|
|
|
- f.write(md_content)
|
|
|
|
|
|
|
+try:
|
|
|
|
|
+ current_script_dir = os.path.dirname(os.path.abspath(__file__))
|
|
|
|
|
+ demo_name = "demo1"
|
|
|
|
|
+ pdf_path = os.path.join(current_script_dir, f"{demo_name}.pdf")
|
|
|
|
|
+ model_path = os.path.join(current_script_dir, f"{demo_name}.json")
|
|
|
|
|
+ pdf_bytes = open(pdf_path, "rb").read()
|
|
|
|
|
+ model_json = json.loads(open(model_path, "r", encoding="utf-8").read())
|
|
|
|
|
+ jso_useful_key = {"_pdf_type": "", "model_list": model_json}
|
|
|
|
|
+ local_image_dir = os.path.join(current_script_dir, 'images')
|
|
|
|
|
+ image_dir = str(os.path.basename(local_image_dir))
|
|
|
|
|
+ image_writer = DiskReaderWriter(local_image_dir)
|
|
|
|
|
+ pipe = UNIPipe(pdf_bytes, jso_useful_key, image_writer)
|
|
|
|
|
+ pipe.pipe_classify()
|
|
|
|
|
+ pipe.pipe_parse()
|
|
|
|
|
+ md_content = pipe.pipe_mk_markdown(image_dir, drop_mode="none")
|
|
|
|
|
+ with open(f"{demo_name}.md", "w", encoding="utf-8") as f:
|
|
|
|
|
+ f.write(md_content)
|
|
|
|
|
+except Exception as e:
|
|
|
|
|
+ logger.exception(e)
|