Jelajahi Sumber

Update demo.py

Xiaomeng Zhao 10 bulan lalu
induk
melakukan
3e8d8a3a3b
1 mengubah file dengan 14 tambahan dan 2 penghapusan
  1. 14 2
      demo/demo.py

+ 14 - 2
demo/demo.py

@@ -5,6 +5,7 @@ from magic_pdf.data.data_reader_writer import FileBasedDataWriter, FileBasedData
 from magic_pdf.data.dataset import PymuDocDataset
 from magic_pdf.model.doc_analyze_by_custom_model import doc_analyze
 from magic_pdf.config.enums import SupportedPdfParseMethod
+from magic_pdf.config.make_content_config import DropMode, MakeMode
 
 # args
 pdf_file_name = "demo1.pdf"  # replace with the real pdf path
@@ -19,7 +20,6 @@ os.makedirs(local_image_dir, exist_ok=True)
 image_writer, md_writer = FileBasedDataWriter(local_image_dir), FileBasedDataWriter(
     local_md_dir
 )
-image_dir = str(os.path.basename(local_image_dir))
 
 # read bytes
 reader1 = FileBasedDataReader("")
@@ -45,6 +45,9 @@ else:
 ### draw model result on each page
 infer_result.draw_model(os.path.join(local_md_dir, f"{name_without_suff}_model.pdf"))
 
+### get model inference result
+model_inference_result = infer_result.get_infer_res()
+
 ### draw layout result on each page
 pipe_result.draw_layout(os.path.join(local_md_dir, f"{name_without_suff}_layout.pdf"))
 
@@ -55,4 +58,13 @@ pipe_result.draw_span(os.path.join(local_md_dir, f"{name_without_suff}_spans.pdf
 pipe_result.dump_md(md_writer, f"{name_without_suff}.md", image_dir)
 
 ### dump content list
-pipe_result.dump_content_list(md_writer, f"{name_without_suff}_content_list.json", image_dir)
+pipe_result.dump_content_list(md_writer, f"{name_without_suff}_content_list.json", image_dir)
+
+### get markdown content
+md_content = pipe_result.get_markdown(image_dir, drop_mode=DropMode.WHOLE_PDF, md_make_mode=MakeMode.MM_MD)
+
+### get content list content
+content_list_content = pipe_result.get_content_list(image_dir, drop_mode=DropMode.NONE, md_make_mode=MakeMode.STANDARD_FORMAT)
+
+### get middle json
+middle_json_content = pipe_result.get_middle_json()