فهرست منبع

refactor: update make mode constants to improve content list handling

myhloli 5 ماه پیش
والد
کامیت
4eaa85fd31

+ 2 - 2
mineru/backend/pipeline/pipeline_middle_json_mkcontent.py

@@ -260,14 +260,14 @@ def union_make(pdf_info_dict: list,
         if make_mode in [MakeMode.MM_MD, MakeMode.NLP_MD]:
             page_markdown = make_blocks_to_markdown(paras_of_layout, make_mode, img_buket_path)
             output_content.extend(page_markdown)
-        elif make_mode == MakeMode.STANDARD_FORMAT:
+        elif make_mode == MakeMode.CONTENT_LIST:
             for para_block in paras_of_layout:
                 para_content = make_blocks_to_content_list(para_block, img_buket_path, page_idx)
                 output_content.append(para_content)
 
     if make_mode in [MakeMode.MM_MD, MakeMode.NLP_MD]:
         return '\n\n'.join(output_content)
-    elif make_mode == MakeMode.STANDARD_FORMAT:
+    elif make_mode == MakeMode.CONTENT_LIST:
         return output_content
     else:
         logger.error(f"Unsupported make mode: {make_mode}")

+ 2 - 2
mineru/backend/vlm/vlm_middle_json_mkcontent.py

@@ -186,14 +186,14 @@ def union_make(pdf_info_dict: list,
         if make_mode in [MakeMode.MM_MD, MakeMode.NLP_MD]:
             page_markdown = mk_blocks_to_markdown(paras_of_layout, make_mode, img_buket_path)
             output_content.extend(page_markdown)
-        elif make_mode == MakeMode.STANDARD_FORMAT:
+        elif make_mode == MakeMode.CONTENT_LIST:
             for para_block in paras_of_layout:
                 para_content = make_blocks_to_content_list(para_block, img_buket_path, page_idx)
                 output_content.append(para_content)
 
     if make_mode in [MakeMode.MM_MD, MakeMode.NLP_MD]:
         return '\n\n'.join(output_content)
-    elif make_mode == MakeMode.STANDARD_FORMAT:
+    elif make_mode == MakeMode.CONTENT_LIST:
         return output_content
     return None
 

+ 2 - 2
mineru/cli/common.py

@@ -143,7 +143,7 @@ def do_parse(
 
             if f_dump_content_list:
                 image_dir = str(os.path.basename(local_image_dir))
-                content_list = pipeline_union_make(pdf_info, MakeMode.STANDARD_FORMAT, image_dir)
+                content_list = pipeline_union_make(pdf_info, MakeMode.CONTENT_LIST, image_dir)
                 md_writer.write_string(
                     f"{pdf_file_name}_content_list.json",
                     json.dumps(content_list, ensure_ascii=False, indent=4),
@@ -200,7 +200,7 @@ def do_parse(
 
             if f_dump_content_list:
                 image_dir = str(os.path.basename(local_image_dir))
-                content_list = vlm_union_make(pdf_info, MakeMode.STANDARD_FORMAT, image_dir)
+                content_list = vlm_union_make(pdf_info, MakeMode.CONTENT_LIST, image_dir)
                 md_writer.write_string(
                     f"{pdf_file_name}_content_list.json",
                     json.dumps(content_list, ensure_ascii=False, indent=4),

+ 1 - 1
mineru/utils/enum_class.py

@@ -42,7 +42,7 @@ class CategoryId:
 class MakeMode:
     MM_MD = 'mm_markdown'
     NLP_MD = 'nlp_markdown'
-    STANDARD_FORMAT = 'standard_format'
+    CONTENT_LIST = 'content_list'
 
 
 class ModelPath: