Просмотр исходного кода

feat(pipeline_manager): 添加调试模式参数以增强元素处理的灵活性

zhch158_admin 2 недель назад
Родитель
Сommit
c669c3c99a
1 измененных файлов с 4 добавлено и 1 удалено
  1. 4 1
      ocr_tools/universal_doc_parser/core/pipeline_manager_v2.py

+ 4 - 1
ocr_tools/universal_doc_parser/core/pipeline_manager_v2.py

@@ -559,6 +559,7 @@ class EnhancedDocPipeline:
             output_dir=output_dir,
             basename=page_name,
             normalize_numbers=normalize_numbers,
+            debug_mode=self.debug_mode,
         )
         
         # 7. 按阅读顺序排序
@@ -823,6 +824,7 @@ class EnhancedDocPipeline:
         output_dir: Optional[str] = None,
         basename: Optional[str] = None,
         normalize_numbers: bool = True,
+        debug_mode: bool = False,
     ) -> tuple:
         """
         处理所有分类后的元素
@@ -915,7 +917,7 @@ class EnhancedDocPipeline:
                     table_img = CoordinateUtils.crop_region(detection_image, bbox)
                     
                     # 构造调试选项
-                    cls_debug_opts = {}
+                    cls_debug_opts = {'enabled': debug_mode}
                     if output_dir:
                         cls_debug_opts['output_dir'] = output_dir
                     if basename:
@@ -945,6 +947,7 @@ class EnhancedDocPipeline:
                         detection_image, item, scale, pre_matched_spans=spans, pdf_type=pdf_type,
                         output_dir=output_dir, basename=f"{basename}_{idx}",
                         normalize_numbers=normalize_numbers,
+                        debug_mode=debug_mode,
                     )
                     # 如果有线识别失败(返回空 HTML),fallback 到 VLM
                     if not element['content'].get('html') and not element['content'].get('cells'):