1 сар өмнө · 20b05456ab
--- a/ocr_tools/universal_doc_parser/core/pipeline_manager_v2.py
+++ b/ocr_tools/universal_doc_parser/core/pipeline_manager_v2.py
@@ -609,6 +609,26 @@ class EnhancedDocPipeline:
 
				         page_result['discarded_blocks'] = sorted_discarded
			
 
				         return page_result
			
 
				 
			
 
				+    def _build_table_module_debug_override(
			
 
				+        self,
			
 
				+        module_key: str,
			
 
				+        *,
			
 
				+        output_dir: Optional[str],
			
 
				+        prefix: Optional[str] = None,
			
 
				+        enabled: bool = False,
			
 
				+    ) -> Dict[str, Any]:
			
 
				+        """合并 yaml 中 table_* 的 debug_options，输出至 debug/{subdir}/。"""
			
 
				+        cfg_opts = self.config.get(module_key, {}).get('debug_options', {})
			
 
				+        if not isinstance(cfg_opts, dict):
			
 
				+            cfg_opts = {}
			
 
				+        override: Dict[str, Any] = dict(cfg_opts)
			
 
				+        override['enabled'] = bool(enabled or cfg_opts.get('enabled', False))
			
 
				+        if output_dir:
			
 
				+            override['output_dir'] = output_dir
			
 
				+        if prefix is not None:
			
 
				+            override['prefix'] = prefix
			
 
				+        return override
			
 
				+
			
 
				     def _is_page_ocr_debug_enabled(self) -> bool:
			
 
				         opts = self.config.get('ocr_recognition', {}).get('debug_options', {})
			
 
				         return isinstance(opts, dict) and bool(opts.get('enabled', False))
			
@@ -752,9 +772,10 @@ class EnhancedDocPipeline:
 
				                         cv2.rectangle(vis_image, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0, 0, 255), 2)
			
 
				             
			
 
				             # 保存对比图像
			
 
				-            debug_dir = Path(output_dir) / "debug_comparison"
			
 
				-            debug_dir.mkdir(parents=True, exist_ok=True)
			
 
				-            output_path = debug_dir / f"{page_name}_ocr_comparison.jpg"
			
 
				+            from ocr_utils.module_debug_viz import resolve_module_debug_dir
			
 
				+
			
 
				+            debug_dir = resolve_module_debug_dir(output_dir, "ocr_comparison")
			
 
				+            output_path = debug_dir / f"{page_name}_ocr_comparison.png"
			
 
				             cv2.imwrite(str(output_path), vis_image)
			
 
				             
			
 
				             # 保存对比 JSON
			
@@ -992,16 +1013,16 @@ class EnhancedDocPipeline:
 
				                     bbox = item.get('bbox', [])
			
 
				                     table_img = CoordinateUtils.crop_region(detection_image, bbox)
			
 
				                     
			
 
				-                    # 构造调试选项
			
 
				-                    cls_debug_opts = {'enabled': debug_mode}
			
 
				-                    if output_dir:
			
 
				-                        cls_debug_opts['output_dir'] = output_dir
			
 
				-                    if basename:
			
 
				-                        cls_debug_opts['prefix'] = f"{basename}_{idx}"
			
 
				-                    
			
 
				+                    cls_debug_opts = self._build_table_module_debug_override(
			
 
				+                        'table_classification',
			
 
				+                        output_dir=output_dir,
			
 
				+                        prefix=f"{basename}_{idx}" if basename else None,
			
 
				+                        enabled=debug_mode,
			
 
				+                    )
			
 
				+
			
 
				                     cls_result = self.table_classifier.classify(
			
 
				-                        table_img, 
			
 
				-                        debug_options=cls_debug_opts
			
 
				+                        table_img,
			
 
				+                        debug_options=cls_debug_opts,
			
 
				                     )
			
 
				                     table_type = cls_result.get('table_type', 'wireless')
			
 
				                     confidence = cls_result.get('confidence', 0.0)
			
@@ -1019,11 +1040,18 @@ class EnhancedDocPipeline:
 
				                 if should_use_wired:
			
 
				                     # 有线表格路径：UNet 识别
			
 
				                     logger.info(f"🔷 Table {idx}: Using wired UNet recognition")
			
 
				+                    wired_debug_opts = self._build_table_module_debug_override(
			
 
				+                        'table_recognition_wired',
			
 
				+                        output_dir=output_dir,
			
 
				+                        prefix=f"{basename}_{idx}" if basename else None,
			
 
				+                        enabled=debug_mode,
			
 
				+                    )
			
 
				                     element = self.element_processors.process_table_element_wired(
			
 
				                         detection_image, item, scale, pre_matched_spans=spans, pdf_type=pdf_type,
			
 
				                         output_dir=output_dir, basename=f"{basename}_{idx}",
			
 
				                         normalize_numbers=normalize_numbers,
			
 
				                         debug_mode=debug_mode,
			
 
				+                        debug_options=wired_debug_opts,
			
 
				                     )
			
 
				                     # 如果有线识别失败（返回空 HTML），fallback 到 VLM
			
 
				                     if not element['content'].get('html') and not element['content'].get('cells'):