Преглед на файлове

feat(增强调试选项): 在ElementProcessors类中添加debug_options参数,以支持更灵活的调试配置,优化调试选项的构造逻辑,提升调试过程的可定制性和准确性。

zhch158_admin преди 4 дни
родител
ревизия
b2e8f25369
променени са 1 файла, в които са добавени 10 реда и са изтрити 8 реда
  1. 10 8
      ocr_tools/universal_doc_parser/core/element_processors.py

+ 10 - 8
ocr_tools/universal_doc_parser/core/element_processors.py

@@ -366,6 +366,7 @@ class ElementProcessors:
         basename: Optional[str] = None,
         normalize_numbers: bool = True,
         debug_mode: bool = False,
+        debug_options: Optional[Dict[str, Any]] = None,
     ) -> Dict[str, Any]:
         """
         使用 UNet 有线表格识别处理表格元素
@@ -399,20 +400,21 @@ class ElementProcessors:
             if not self.wired_table_recognizer:
                 raise RuntimeError("Wired table recognizer not available")
             
-            # 构造调试选项覆盖
-            debug_opts_override = {'enabled': debug_mode}
-            if output_dir:
-                debug_opts_override['output_dir'] = output_dir
-            if basename:
-                # 使用完整 basename 作为前缀 (如 "filename_page_001")
-                debug_opts_override['prefix'] = basename
+            if debug_options is not None:
+                debug_opts_override = dict(debug_options)
+            else:
+                debug_opts_override = {'enabled': debug_mode}
+                if output_dir:
+                    debug_opts_override['output_dir'] = output_dir
+                if basename:
+                    debug_opts_override['prefix'] = basename
 
             wired_res = self.wired_table_recognizer.recognize(
                 table_image=cropped_table,
                 # ocr_boxes=ocr_boxes_for_wired,
                 ocr_boxes=ocr_boxes,
                 pdf_type=pdf_type,
-                debug_options=debug_opts_override
+                debug_options=debug_opts_override,
             )
             
             if not (wired_res.get('html') or wired_res.get('cells')):