Przeglądaj źródła

统一使用ocr组装markdown

赵小蒙 1 rok temu
rodzic
commit
1340a97ae2
1 zmienionych plików z 3 dodań i 2 usunięć
  1. 3 2
      magic_pdf/pipe/AbsPipe.py

+ 3 - 2
magic_pdf/pipe/AbsPipe.py

@@ -106,8 +106,9 @@ class AbsPipe(ABC):
         parse_type = pdf_mid_data["_parse_type"]
         pdf_info_list = pdf_mid_data["pdf_info"]
         if parse_type == AbsPipe.PIP_TXT:
-            content_list = mk_universal_format(pdf_info_list, img_buket_path)
-            md_content = mk_mm_markdown(content_list)
+            # content_list = mk_universal_format(pdf_info_list, img_buket_path)
+            # md_content = mk_mm_markdown(content_list)
+            md_content = ocr_mk_mm_markdown_with_para(pdf_info_list, img_buket_path)
         elif parse_type == AbsPipe.PIP_OCR:
             md_content = ocr_mk_mm_markdown_with_para(pdf_info_list, img_buket_path)
         return md_content