Преглед изворни кода

fix: prevent processing of empty content lists in pipeline middle JSON handling

myhloli пре 2 недеља
родитељ
комит
b70f49522e

+ 2 - 0
mineru/backend/pipeline/pipeline_middle_json_mkcontent.py

@@ -287,6 +287,8 @@ def union_make(pdf_info_dict: list,
             output_content.extend(page_markdown)
             output_content.extend(page_markdown)
         elif make_mode == MakeMode.CONTENT_LIST:
         elif make_mode == MakeMode.CONTENT_LIST:
             para_blocks = (paras_of_layout or []) + (paras_of_discarded or [])
             para_blocks = (paras_of_layout or []) + (paras_of_discarded or [])
+            if not para_blocks:
+                continue
             for para_block in para_blocks:
             for para_block in para_blocks:
                 para_content = make_blocks_to_content_list(para_block, img_buket_path, page_idx, page_size)
                 para_content = make_blocks_to_content_list(para_block, img_buket_path, page_idx, page_size)
                 if para_content:
                 if para_content:

+ 2 - 0
mineru/backend/vlm/vlm_middle_json_mkcontent.py

@@ -255,6 +255,8 @@ def union_make(pdf_info_dict: list,
             output_content.extend(page_markdown)
             output_content.extend(page_markdown)
         elif make_mode == MakeMode.CONTENT_LIST:
         elif make_mode == MakeMode.CONTENT_LIST:
             para_blocks = (paras_of_layout or []) + (paras_of_discarded or [])
             para_blocks = (paras_of_layout or []) + (paras_of_discarded or [])
+            if not para_blocks:
+                continue
             for para_block in para_blocks:
             for para_block in para_blocks:
                 para_content = make_blocks_to_content_list(para_block, img_buket_path, page_idx, page_size)
                 para_content = make_blocks_to_content_list(para_block, img_buket_path, page_idx, page_size)
                 output_content.append(para_content)
                 output_content.append(para_content)