|
|
@@ -7,7 +7,7 @@ from magic_pdf.dict2md.ocr_mkcontent import (
|
|
|
ocr_mk_nlp_markdown,
|
|
|
ocr_mk_mm_markdown,
|
|
|
ocr_mk_mm_standard_format,
|
|
|
- ocr_mk_mm_markdown_with_para, ocr_mk_mm_markdown_with_para_and_pagination,
|
|
|
+ ocr_mk_mm_markdown_with_para, ocr_mk_mm_markdown_with_para_and_pagination, ocr_mk_nlp_markdown_with_para,
|
|
|
)
|
|
|
from magic_pdf.libs.commons import (
|
|
|
read_file,
|
|
|
@@ -510,7 +510,8 @@ def ocr_pdf_intermediate_dict_to_markdown_with_para(jso: dict, debug_mode=False)
|
|
|
pdf_intermediate_dict = jso["pdf_intermediate_dict"]
|
|
|
# 将 pdf_intermediate_dict 解压
|
|
|
pdf_intermediate_dict = JsonCompressor.decompress_json(pdf_intermediate_dict)
|
|
|
- markdown_content = ocr_mk_mm_markdown_with_para(pdf_intermediate_dict)
|
|
|
+ # markdown_content = ocr_mk_mm_markdown_with_para(pdf_intermediate_dict)
|
|
|
+ markdown_content = ocr_mk_nlp_markdown_with_para(pdf_intermediate_dict)
|
|
|
jso["content"] = markdown_content
|
|
|
logger.info(
|
|
|
f"book_name is:{get_data_source(jso)}/{jso['file_id']},markdown content length is {len(markdown_content)}",
|