demo.py 1.0 KB

123456789101112131415161718192021222324252627
  1. import os
  2. import json
  3. from loguru import logger
  4. from magic_pdf.pipe.UNIPipe import UNIPipe
  5. from magic_pdf.rw.DiskReaderWriter import DiskReaderWriter
  6. try:
  7. current_script_dir = os.path.dirname(os.path.abspath(__file__))
  8. demo_name = "demo1"
  9. pdf_path = os.path.join(current_script_dir, f"{demo_name}.pdf")
  10. model_path = os.path.join(current_script_dir, f"{demo_name}.json")
  11. pdf_bytes = open(pdf_path, "rb").read()
  12. model_json = json.loads(open(model_path, "r", encoding="utf-8").read())
  13. jso_useful_key = {"_pdf_type": "", "model_list": model_json}
  14. local_image_dir = os.path.join(current_script_dir, 'images')
  15. image_dir = str(os.path.basename(local_image_dir))
  16. image_writer = DiskReaderWriter(local_image_dir)
  17. pipe = UNIPipe(pdf_bytes, jso_useful_key, image_writer)
  18. pipe.pipe_classify()
  19. pipe.pipe_parse()
  20. md_content = pipe.pipe_mk_markdown(image_dir, drop_mode="none")
  21. with open(f"{demo_name}.md", "w", encoding="utf-8") as f:
  22. f.write(md_content)
  23. except Exception as e:
  24. logger.exception(e)