Parcourir la source

feat: 修改合并函数参数名称,将output_format更改为output_type,统一输出格式参数

zhch158_admin il y a 3 semaines
Parent
commit
4383d618f7
1 fichiers modifiés avec 12 ajouts et 12 suppressions
  1. 12 12
      merger/merge_mineru_paddle_ocr.py

+ 12 - 12
merger/merge_mineru_paddle_ocr.py

@@ -13,7 +13,7 @@ except ImportError:
 
 
 def merge_single_file(mineru_file: Path, paddle_file: Path, output_dir: Path, 
-                     output_format: str, merger: MinerUPaddleOCRMerger) -> bool:
+                     output_type: str, merger: MinerUPaddleOCRMerger) -> bool:
     """
     合并单个文件
     
@@ -40,20 +40,20 @@ def merge_single_file(mineru_file: Path, paddle_file: Path, output_dir: Path,
         )
         
         # 生成 Markdown
-        if output_format in ['markdown', 'both']:
+        if output_type in ['markdown', 'both']:
             merger.generate_enhanced_markdown(merged_data, str(merged_md_path), mineru_file)
         
         # 保存 JSON
-        if output_format in ['json', 'both']:
+        if output_type in ['json', 'both']:
             with open(merged_json_path, 'w', encoding='utf-8') as f:
                 json.dump(merged_data, f, ensure_ascii=False, indent=2)
 
         print(f"  ✅ 合并完成")
         print(f"  📊 共处理了 {len(merged_data)} 个对象")
         print(f"  💾 输出文件:")
-        if output_format in ['markdown', 'both']:
+        if output_type in ['markdown', 'both']:
             print(f"    - {merged_md_path.name}")
-        if output_format in ['json', 'both']:
+        if output_type in ['json', 'both']:
             print(f"    - {merged_json_path.name}")
 
         return True
@@ -66,7 +66,7 @@ def merge_single_file(mineru_file: Path, paddle_file: Path, output_dir: Path,
 
 
 def merge_mineru_paddle_batch(mineru_dir: str, paddle_dir: str, output_dir: str,
-                              output_format: str = 'both',
+                              output_type: str = 'both',
                               look_ahead_window: int = 10, 
                               similarity_threshold: int = 80):
     """
@@ -109,7 +109,7 @@ def merge_mineru_paddle_batch(mineru_dir: str, paddle_dir: str, output_dir: str,
             failed_count += 1
             continue
 
-        if merge_single_file(mineru_file, paddle_file, output_path, output_format, merger):
+        if merge_single_file(mineru_file, paddle_file, output_path, output_type, merger):
             success_count += 1
         else:
             failed_count += 1
@@ -190,7 +190,7 @@ def main():
         help='输出目录(必需)'
     )
     output_group.add_argument(
-        '-f', '--format', 
+        '-f', '--output-type', 
         choices=['json', 'markdown', 'both'], 
         default='both', help='输出格式'
     )
@@ -211,7 +211,7 @@ def main():
     )
     
     args = parser.parse_args()
-    output_format = args.format.lower()
+    output_type = args.output_type.lower()
     
     # 验证参数
     if args.mineru_file and args.paddle_file:
@@ -242,7 +242,7 @@ def main():
             similarity_threshold=args.threshold
         )
         
-        success = merge_single_file(mineru_file, paddle_file, output_dir, output_format, merger)
+        success = merge_single_file(mineru_file, paddle_file, output_dir, output_type, merger)
         
         if success:
             print("\n✅ 处理完成!")
@@ -265,7 +265,7 @@ def main():
             args.mineru_dir,
             args.paddle_dir,
             args.output_dir,
-            output_format=output_format,
+            output_type=output_type,
             look_ahead_window=args.window,
             similarity_threshold=args.threshold
         )
@@ -293,7 +293,7 @@ if __name__ == "__main__":
             # "mineru-dir": "/Users/zhch158/workspace/data/流水分析/德_内蒙古银行照/mineru-vlm-2.5.3_Results",
             # "paddle-dir": "/Users/zhch158/workspace/data/流水分析/德_内蒙古银行照/data_PPStructureV3_Results",
             # "output-dir": "/Users/zhch158/workspace/data/流水分析/德_内蒙古银行照/merged_results",
-            "format": "both",
+            "output-type": "both",
             "window": "15",
             "threshold": "85"
         }