赵小蒙 1 жил өмнө
parent
commit
7b937d58b8

+ 2 - 2
magic_pdf/libs/pdf_image_tools.py

@@ -1,4 +1,4 @@
-
+from magic_pdf.io.AbsReaderWriter import AbsReaderWriter
 from magic_pdf.libs.commons import fitz
 from loguru import logger
 from magic_pdf.libs.commons import join_path
@@ -28,7 +28,7 @@ def cut_image(bbox: tuple, page_num: int, page: fitz.Page, return_path, imageWri
 
     byte_data = pix.tobytes(output='jpeg', jpg_quality=95)
 
-    imageWriter.write(content=byte_data, path=img_hash256_path, mode="binary")
+    imageWriter.write(byte_data, img_hash256_path, AbsReaderWriter.MODE_BIN)
 
     return img_hash256_path
 

+ 4 - 4
magic_pdf/pipe/UNIPipe.py

@@ -105,8 +105,8 @@ if __name__ == '__main__':
 
     pdf_file_path = r"linshixuqiu\25536-00.pdf"
     model_file_path = r"linshixuqiu\25536-00.json"
-    pdf_bytes = drw.read(path=pdf_file_path, mode=AbsReaderWriter.MODE_BIN)
-    model_json_txt = drw.read(path=model_file_path, mode=AbsReaderWriter.MODE_TXT)
+    pdf_bytes = drw.read(pdf_file_path, AbsReaderWriter.MODE_BIN)
+    model_json_txt = drw.read(model_file_path, AbsReaderWriter.MODE_TXT)
 
     pdf_type = UNIPipe.classify(pdf_bytes)
     logger.info(f"pdf_type is {pdf_type}")
@@ -122,5 +122,5 @@ if __name__ == '__main__':
 
     md_content = pipe.mk_markdown(pdf_mid_data, "imgs")
     md_writer = DiskReaderWriter(write_path)
-    md_writer.write(content=md_content, path="25536-00.md", mode=AbsReaderWriter.MODE_TXT)
-    md_writer.write(content=json.dumps(JsonCompressor.decompress_json(pdf_mid_data), ensure_ascii=False, indent=4), path="25536-00.json", mode=AbsReaderWriter.MODE_TXT)
+    md_writer.write(md_content, "25536-00.md", AbsReaderWriter.MODE_TXT)
+    md_writer.write(json.dumps(JsonCompressor.decompress_json(pdf_mid_data), ensure_ascii=False, indent=4), "25536-00.json", AbsReaderWriter.MODE_TXT)