3 miesięcy temu · cc7098b12b
--- a/ocr_tools/universal_doc_parser/core/layout_model_router.py
+++ b/ocr_tools/universal_doc_parser/core/layout_model_router.py
@@ -39,8 +39,8 @@ class SmartLayoutRouter(BaseLayoutDetector):
 
				         self.scene_name = config.get('scene_name', None)
			
 
				         self.scene_strategy = config.get('scene_strategy', {})
			
 
				         self.default_model = config.get('default_model', None)
			
 
				-        # 调试模式支持
			
 
				-        self.debug_mode = config.get('debug_mode', False)
			
 
				+        # 调试模式支持（兼容 debug_mode 和 debug_options.enabled 两种配置方式）
			
 
				+        self.debug_mode = config.get('debug_mode', config.get('debug_options', {}).get('enabled', False))
			
 
				         self.output_dir = config.get('output_dir', None)
			
 
				         self.page_name = None  # 将在 detect 方法中设置
			
 
				         # 分数差距阈值：当模型间分数差距小于此值时，优先选择 docling
			
--- a/ocr_tools/universal_doc_parser/core/pipeline_manager_v2.py
+++ b/ocr_tools/universal_doc_parser/core/pipeline_manager_v2.py
@@ -99,14 +99,19 @@ class EnhancedDocPipeline:
 
				     
			
 
				     # ==================== 初始化 ====================
			
 
				     
			
 
				-    def __init__(self, config_path: str):
			
 
				+    def __init__(self, config_path_or_dict, config_is_dict=False):
			
 
				         """
			
 
				         初始化流水线
			
 
				         
			
 
				         Args:
			
 
				-            config_path: 配置文件路径
			
 
				+            config_path_or_dict: 配置文件路径或配置字典
			
 
				+            config_is_dict: 是否为配置字典（True 时 config_path_or_dict 是字典）
			
 
				         """
			
 
				-        self.config = ConfigManager.load_config(config_path)
			
 
				+        if config_is_dict:
			
 
				+            self.config = config_path_or_dict
			
 
				+        else:
			
 
				+            self.config = ConfigManager.load_config(config_path_or_dict)
			
 
				+            
			
 
				         self.scene_name = self.config.get('scene_name', 'unknown')
			
 
				         self.debug_mode = self.config.get('output', {}).get('debug_mode', False)
			
 
				         
			
@@ -429,11 +434,8 @@ class EnhancedDocPipeline:
 
				                     except Exception as e:
			
 
				                         logger.warning(f"⚠️ Pre-OCR text box detection for layout evaluation failed: {e}")
			
 
				             
			
 
				-            # 启用调试模式时，设置调试信息（参考 layout_model_router.py 的实现）
			
 
				-            if self.debug_mode:
			
 
				-                # 对于 SmartLayoutRouter 或普通 layout detector，都设置这些属性
			
 
				-                if hasattr(self.layout_detector, 'debug_mode'):
			
 
				-                    self.layout_detector.debug_mode = self.debug_mode  # type: ignore
			
 
				+            # 注入每页运行时信息（output_dir/page_name 仅在 layout detector 自身 debug 开启时才有意义）
			
 
				+            if hasattr(self.layout_detector, 'debug_mode') and self.layout_detector.debug_mode:  # type: ignore
			
 
				                 if output_dir and hasattr(self.layout_detector, 'output_dir'):
			
 
				                     self.layout_detector.output_dir = output_dir  # type: ignore
			
 
				                 if page_name and hasattr(self.layout_detector, 'page_name'):
			
--- a/ocr_tools/universal_doc_parser/core/pipeline_manager_v2_streaming.py
+++ b/ocr_tools/universal_doc_parser/core/pipeline_manager_v2_streaming.py
@@ -56,15 +56,16 @@ class StreamingDocPipeline(EnhancedDocPipeline):
 
				     - 最后统一生成完整Markdown（跨页表格合并）
			
 
				     """
			
 
				     
			
 
				-    def __init__(self, config_path: str, output_dir: str):
			
 
				+    def __init__(self, config_path_or_dict, output_dir: str, config_is_dict=False):
			
 
				         """
			
 
				         初始化流式处理流水线
			
 
				         
			
 
				         Args:
			
 
				-            config_path: 配置文件路径
			
 
				+            config_path_or_dict: 配置文件路径或配置字典
			
 
				             output_dir: 输出目录（用于立即保存每页结果）
			
 
				+            config_is_dict: 是否为配置字典（True 时 config_path_or_dict 是字典）
			
 
				         """
			
 
				-        super().__init__(config_path)
			
 
				+        super().__init__(config_path_or_dict, config_is_dict=config_is_dict)
			
 
				         self.output_dir = Path(output_dir)
			
 
				         self.output_dir.mkdir(parents=True, exist_ok=True)
			
 
				         
			
--- a/ocr_tools/universal_doc_parser/main_v2.py
+++ b/ocr_tools/universal_doc_parser/main_v2.py
@@ -90,14 +90,45 @@ def _handle_dry_run(args: argparse.Namespace) -> bool:
 
				     return False
			
 
				 
			
 
				 
			
 
				-def _create_pipeline(streaming: bool, config_path: str, output_dir: str):
			
 
				-    """创建并初始化处理流水线"""
			
 
				+def _create_pipeline(
			
 
				+    streaming: bool,
			
 
				+    config_path: str,
			
 
				+    output_dir: str,
			
 
				+    debug: bool = False,
			
 
				+    debug_layout: bool = False,
			
 
				+    debug_table: bool = False,
			
 
				+    debug_ocr: bool = False
			
 
				+):
			
 
				+    """
			
 
				+    创建并初始化处理流水线（应用 debug 覆盖）
			
 
				+    
			
 
				+    Args:
			
 
				+        streaming: 是否使用流式处理模式
			
 
				+        config_path: 配置文件路径
			
 
				+        output_dir: 输出目录
			
 
				+        debug: 全局 debug 开关
			
 
				+        debug_layout: 布局检测 debug 开关
			
 
				+        debug_table: 表格识别 debug 开关
			
 
				+        debug_ocr: OCR 识别 debug 开关
			
 
				+        
			
 
				+    Returns:
			
 
				+        初始化后的 pipeline 实例
			
 
				+    """
			
 
				+    # 1. 先加载配置
			
 
				+    from core.config_manager import ConfigManager
			
 
				+    config = ConfigManager.load_config(config_path)
			
 
				+    
			
 
				+    # 2. 应用 debug 覆盖（在创建 pipeline 之前）
			
 
				+    if debug or debug_layout or debug_table or debug_ocr:
			
 
				+        _apply_debug_overrides_to_config(config, debug, debug_layout, debug_table, debug_ocr)
			
 
				+    
			
 
				+    # 3. 创建 pipeline（adapter 会读取到已修改的 config）
			
 
				     if streaming:
			
 
				         logger.info("🔄 Using streaming processing mode (memory-efficient)")
			
 
				-        pipeline = StreamingDocPipeline(config_path, output_dir)
			
 
				+        pipeline = StreamingDocPipeline(config, output_dir, config_is_dict=True)
			
 
				     else:
			
 
				         logger.info("🔄 Using batch processing mode (all pages in memory)")
			
 
				-        pipeline = EnhancedDocPipeline(config_path)
			
 
				+        pipeline = EnhancedDocPipeline(config, config_is_dict=True)
			
 
				     
			
 
				     return pipeline
			
 
				 
			
@@ -122,6 +153,88 @@ def _get_default_output_config(debug: bool) -> dict:
 
				     }
			
 
				 
			
 
				 
			
 
				+def _apply_debug_overrides_to_config(
			
 
				+    config: dict,
			
 
				+    debug: bool,
			
 
				+    debug_layout: bool,
			
 
				+    debug_table: bool,
			
 
				+    debug_ocr: bool
			
 
				+):
			
 
				+    """
			
 
				+    应用命令行 debug 参数覆盖配置文件设置（在创建 pipeline 之前）
			
 
				+    
			
 
				+    优先级规则：
			
 
				+    1. --debug: 启用所有模块的 debug
			
 
				+    2. --debug-layout/--debug-table/--debug-ocr: 精细控制各模块
			
 
				+    3. 配置文件的 debug_options 只提供默认值
			
 
				+    
			
 
				+    Args:
			
 
				+        config: 配置字典（会被直接修改）
			
 
				+        debug: 全局 debug 开关
			
 
				+        debug_layout: 布局检测 debug 开关
			
 
				+        debug_table: 表格识别 debug 开关
			
 
				+        debug_ocr: OCR 识别 debug 开关
			
 
				+    """
			
 
				+    # 确定需要启用哪些模块的 debug
			
 
				+    enable_layout_debug = debug or debug_layout
			
 
				+    enable_table_debug = debug or debug_table
			
 
				+    enable_ocr_debug = debug or debug_ocr
			
 
				+    
			
 
				+    # 1. 布局检测 debug
			
 
				+    if enable_layout_debug:
			
 
				+        if 'layout_detection' in config:
			
 
				+            if 'debug_options' not in config['layout_detection']:
			
 
				+                config['layout_detection']['debug_options'] = {}
			
 
				+            config['layout_detection']['debug_options']['enabled'] = True
			
 
				+            logger.info("✅ 启用布局检测 debug 输出")
			
 
				+    
			
 
				+    # 2. 表格分类 debug
			
 
				+    if enable_table_debug:
			
 
				+        if 'table_classification' in config:
			
 
				+            if 'debug_options' not in config['table_classification']:
			
 
				+                config['table_classification']['debug_options'] = {}
			
 
				+            config['table_classification']['debug_options']['enabled'] = True
			
 
				+            logger.info("✅ 启用表格分类 debug 输出")
			
 
				+    
			
 
				+    # 3. 有线表格识别 debug
			
 
				+    if enable_table_debug:
			
 
				+        if 'table_recognition_wired' in config:
			
 
				+            if 'debug_options' not in config['table_recognition_wired']:
			
 
				+                config['table_recognition_wired']['debug_options'] = {}
			
 
				+            config['table_recognition_wired']['debug_options']['enabled'] = True
			
 
				+            logger.info("✅ 启用有线表格识别 debug 输出")
			
 
				+    
			
 
				+    # 4. OCR 识别 debug（如果有 debug_options）
			
 
				+    if enable_ocr_debug:
			
 
				+        if 'ocr_recognition' in config:
			
 
				+            if 'debug_options' not in config['ocr_recognition']:
			
 
				+                config['ocr_recognition']['debug_options'] = {}
			
 
				+            config['ocr_recognition']['debug_options']['enabled'] = True
			
 
				+            logger.info("✅ 启用 OCR 识别 debug 输出")
			
 
				+    
			
 
				+    # 5. 更新输出配置
			
 
				+    if enable_layout_debug or enable_ocr_debug or enable_table_debug:
			
 
				+        output_config = config.get('output', {})
			
 
				+        output_config['debug_mode'] = True
			
 
				+        if enable_layout_debug or enable_ocr_debug:
			
 
				+            output_config.setdefault('save_layout_image', True)
			
 
				+            output_config.setdefault('save_ocr_image', True)
			
 
				+    
			
 
				+    # 输出当前 debug 状态
			
 
				+    if debug:
			
 
				+        logger.info("🐛 全局 Debug 模式已启用（所有模块）")
			
 
				+    else:
			
 
				+        debug_modules = []
			
 
				+        if debug_layout:
			
 
				+            debug_modules.append("布局检测")
			
 
				+        if debug_table:
			
 
				+            debug_modules.append("表格识别")
			
 
				+        if debug_ocr:
			
 
				+            debug_modules.append("OCR识别")
			
 
				+        if debug_modules:
			
 
				+            logger.info(f"🐛 Debug 模式已启用: {', '.join(debug_modules)}")
			
 
				+
			
 
				+
			
 
				 def setup_logging(log_level: str = "INFO", log_file: Optional[str] = None):
			
 
				     """设置日志"""
			
 
				     logger.remove()
			
@@ -148,6 +261,9 @@ def process_single_input(
 
				     config_path: Path,
			
 
				     output_dir: Path,
			
 
				     debug: bool = False,
			
 
				+    debug_layout: bool = False,
			
 
				+    debug_table: bool = False,
			
 
				+    debug_ocr: bool = False,
			
 
				     scene: Optional[str] = None,
			
 
				     page_range: Optional[str] = None,
			
 
				     streaming: bool = False
			
@@ -159,7 +275,10 @@ def process_single_input(
 
				         input_path: 输入路径
			
 
				         config_path: 配置文件路径
			
 
				         output_dir: 输出目录
			
 
				-        debug: 是否开启debug模式
			
 
				+        debug: 全局debug开关（启用所有模块debug）
			
 
				+        debug_layout: 仅启用布局检测debug
			
 
				+        debug_table: 仅启用表格识别debug
			
 
				+        debug_ocr: 仅启用OCR识别debug
			
 
				         scene: 场景类型覆盖
			
 
				         page_range: 页面范围（如 "1-5,7,9-12"）
			
 
				         streaming: 是否使用流式处理模式（按页处理，立即保存，节省内存）
			
@@ -168,16 +287,17 @@ def process_single_input(
 
				         处理结果和输出路径
			
 
				     """
			
 
				     try:
			
 
				-        # 创建流水线
			
 
				-        pipeline = _create_pipeline(streaming, str(config_path), str(output_dir))
			
 
				+        # 创建流水线（debug 覆盖已在 _create_pipeline 中应用）
			
 
				+        pipeline = _create_pipeline(
			
 
				+            streaming, 
			
 
				+            str(config_path), 
			
 
				+            str(output_dir),
			
 
				+            debug=debug,
			
 
				+            debug_layout=debug_layout,
			
 
				+            debug_table=debug_table,
			
 
				+            debug_ocr=debug_ocr
			
 
				+        )
			
 
				         output_config = pipeline.config.get('output', {}) or _get_default_output_config(debug)
			
 
				-
			
 
				-        # 命令行 --debug 优先级最高：覆盖 yaml 中的所有 debug 设置
			
 
				-        if debug:
			
 
				-            pipeline.debug_mode = True
			
 
				-            output_config['debug_mode'] = True
			
 
				-            output_config.setdefault('save_layout_image', True)
			
 
				-            output_config.setdefault('save_ocr_image', True)
			
 
				         
			
 
				         use_context = not streaming and hasattr(pipeline, '__enter__')
			
 
				         if use_context:
			
@@ -324,9 +444,14 @@ def main():
 
				   # 处理图片目录
			
 
				   python main_v2.py -i ./images/ -c config/bank_statement_paddle_vl.yaml
			
 
				   
			
 
				-  # 开启debug模式（输出可视化图片）
			
 
				+  # 开启全局debug模式（所有模块输出可视化图片）
			
 
				   python main_v2.py -i doc.pdf -c config.yaml --debug
			
 
				   
			
 
				+  # 开启特定模块的debug（精细控制）
			
 
				+  python main_v2.py -i doc.pdf -c config.yaml --debug-layout        # 仅布局debug
			
 
				+  python main_v2.py -i doc.pdf -c config.yaml --debug-table         # 仅表格debug
			
 
				+  python main_v2.py -i doc.pdf -c config.yaml --debug-layout --debug-table  # 组合
			
 
				+  
			
 
				   # 指定输出目录
			
 
				   python main_v2.py -i doc.pdf -c config.yaml -o ./my_output/
			
 
				   
			
@@ -365,7 +490,22 @@ def main():
 
				     parser.add_argument(
			
 
				         "--debug",
			
 
				         action="store_true",
			
 
				-        help="开启debug模式（输出layout和OCR可视化图片）"
			
 
				+        help="开启全局debug模式（启用所有模块的调试输出）"
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        "--debug-layout",
			
 
				+        action="store_true",
			
 
				+        help="仅开启布局检测的debug输出"
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        "--debug-table",
			
 
				+        action="store_true",
			
 
				+        help="仅开启表格识别的debug输出"
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        "--debug-ocr",
			
 
				+        action="store_true",
			
 
				+        help="仅开启OCR识别的debug输出"
			
 
				     )
			
 
				     parser.add_argument(
			
 
				         "--log_level",
			
@@ -407,6 +547,9 @@ def main():
 
				         config_path=Path(args.config),
			
 
				         output_dir=Path(args.output_dir),
			
 
				         debug=args.debug,
			
 
				+        debug_layout=args.debug_layout,
			
 
				+        debug_table=args.debug_table,
			
 
				+        debug_ocr=args.debug_ocr,
			
 
				         scene=args.scene,
			
 
				         page_range=args.pages,
			
 
				         streaming=args.streaming