Эх сурвалжийг харах

feat(增强调试功能): 在EnhancedDocPipeline类中添加_build_table_module_debug_override方法,以支持更灵活的调试选项构建,更新调试输出目录路径,优化表格模块的调试配置,提升调试过程的可定制性和准确性。

zhch158_admin 5 өдөр өмнө
parent
commit
20b05456ab

+ 40 - 12
ocr_tools/universal_doc_parser/core/pipeline_manager_v2.py

@@ -609,6 +609,26 @@ class EnhancedDocPipeline:
         page_result['discarded_blocks'] = sorted_discarded
         return page_result
 
+    def _build_table_module_debug_override(
+        self,
+        module_key: str,
+        *,
+        output_dir: Optional[str],
+        prefix: Optional[str] = None,
+        enabled: bool = False,
+    ) -> Dict[str, Any]:
+        """合并 yaml 中 table_* 的 debug_options,输出至 debug/{subdir}/。"""
+        cfg_opts = self.config.get(module_key, {}).get('debug_options', {})
+        if not isinstance(cfg_opts, dict):
+            cfg_opts = {}
+        override: Dict[str, Any] = dict(cfg_opts)
+        override['enabled'] = bool(enabled or cfg_opts.get('enabled', False))
+        if output_dir:
+            override['output_dir'] = output_dir
+        if prefix is not None:
+            override['prefix'] = prefix
+        return override
+
     def _is_page_ocr_debug_enabled(self) -> bool:
         opts = self.config.get('ocr_recognition', {}).get('debug_options', {})
         return isinstance(opts, dict) and bool(opts.get('enabled', False))
@@ -752,9 +772,10 @@ class EnhancedDocPipeline:
                         cv2.rectangle(vis_image, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0, 0, 255), 2)
             
             # 保存对比图像
-            debug_dir = Path(output_dir) / "debug_comparison"
-            debug_dir.mkdir(parents=True, exist_ok=True)
-            output_path = debug_dir / f"{page_name}_ocr_comparison.jpg"
+            from ocr_utils.module_debug_viz import resolve_module_debug_dir
+
+            debug_dir = resolve_module_debug_dir(output_dir, "ocr_comparison")
+            output_path = debug_dir / f"{page_name}_ocr_comparison.png"
             cv2.imwrite(str(output_path), vis_image)
             
             # 保存对比 JSON
@@ -992,16 +1013,16 @@ class EnhancedDocPipeline:
                     bbox = item.get('bbox', [])
                     table_img = CoordinateUtils.crop_region(detection_image, bbox)
                     
-                    # 构造调试选项
-                    cls_debug_opts = {'enabled': debug_mode}
-                    if output_dir:
-                        cls_debug_opts['output_dir'] = output_dir
-                    if basename:
-                        cls_debug_opts['prefix'] = f"{basename}_{idx}"
-                    
+                    cls_debug_opts = self._build_table_module_debug_override(
+                        'table_classification',
+                        output_dir=output_dir,
+                        prefix=f"{basename}_{idx}" if basename else None,
+                        enabled=debug_mode,
+                    )
+
                     cls_result = self.table_classifier.classify(
-                        table_img, 
-                        debug_options=cls_debug_opts
+                        table_img,
+                        debug_options=cls_debug_opts,
                     )
                     table_type = cls_result.get('table_type', 'wireless')
                     confidence = cls_result.get('confidence', 0.0)
@@ -1019,11 +1040,18 @@ class EnhancedDocPipeline:
                 if should_use_wired:
                     # 有线表格路径:UNet 识别
                     logger.info(f"🔷 Table {idx}: Using wired UNet recognition")
+                    wired_debug_opts = self._build_table_module_debug_override(
+                        'table_recognition_wired',
+                        output_dir=output_dir,
+                        prefix=f"{basename}_{idx}" if basename else None,
+                        enabled=debug_mode,
+                    )
                     element = self.element_processors.process_table_element_wired(
                         detection_image, item, scale, pre_matched_spans=spans, pdf_type=pdf_type,
                         output_dir=output_dir, basename=f"{basename}_{idx}",
                         normalize_numbers=normalize_numbers,
                         debug_mode=debug_mode,
+                        debug_options=wired_debug_opts,
                     )
                     # 如果有线识别失败(返回空 HTML),fallback 到 VLM
                     if not element['content'].get('html') and not element['content'].get('cells'):