Răsfoiți Sursa

update cli output files

赵小蒙 1 an în urmă
părinte
comite
3b7342b894
1 a modificat fișierele cu 19 adăugiri și 3 ștergeri
  1. 19 3
      magic_pdf/cli/magicpdf.py

+ 19 - 3
magic_pdf/cli/magicpdf.py

@@ -100,18 +100,34 @@ def _do_parse(pdf_file_name, pdf_bytes, model_list, parse_method, image_writer,
     #              [pdf_file_name, pipe.pdf_mid_data['not_common_character_rate'], pipe.pdf_mid_data['not_printable_rate']])
     #              [pdf_file_name, pipe.pdf_mid_data['not_common_character_rate'], pipe.pdf_mid_data['not_printable_rate']])
 
 
     md_content = pipe.pipe_mk_markdown(image_dir, drop_mode=DropMode.NONE)
     md_content = pipe.pipe_mk_markdown(image_dir, drop_mode=DropMode.NONE)
+    '''写markdown'''
     md_writer.write(
     md_writer.write(
         content=md_content, path=f"{pdf_file_name}.md", mode=AbsReaderWriter.MODE_TXT
         content=md_content, path=f"{pdf_file_name}.md", mode=AbsReaderWriter.MODE_TXT
     )
     )
+    '''写middle_json'''
     md_writer.write(
     md_writer.write(
         content=json_parse.dumps(pipe.pdf_mid_data, ensure_ascii=False, indent=4),
         content=json_parse.dumps(pipe.pdf_mid_data, ensure_ascii=False, indent=4),
-        path=f"{pdf_file_name}.json",
+        path=f"{pdf_file_name}_middle.json",
         mode=AbsReaderWriter.MODE_TXT,
         mode=AbsReaderWriter.MODE_TXT,
     )
     )
-
+    '''写model_json'''
+    md_writer.write(
+        content=json_parse.dumps(pipe.model_list, ensure_ascii=False, indent=4),
+        path=f"{pdf_file_name}_model.json",
+        mode=AbsReaderWriter.MODE_TXT,
+    )
+    '''写源pdf'''
+    md_writer.write(
+        content=pdf_bytes,
+        path=f"{pdf_file_name}_origin.json",
+        mode=AbsReaderWriter.MODE_BIN,
+    )
     content_list = pipe.pipe_mk_uni_format(image_dir, drop_mode=DropMode.NONE)
     content_list = pipe.pipe_mk_uni_format(image_dir, drop_mode=DropMode.NONE)
+    '''写content_list'''
     md_writer.write(
     md_writer.write(
-        str(content_list), f"{pdf_file_name}.txt", AbsReaderWriter.MODE_TXT
+        content=json_parse.dumps(content_list, ensure_ascii=False, indent=4),
+        path=f"{pdf_file_name}_content_list.json",
+        mode=AbsReaderWriter.MODE_TXT
     )
     )