|
|
@@ -366,6 +366,7 @@ class ElementProcessors:
|
|
|
basename: Optional[str] = None,
|
|
|
normalize_numbers: bool = True,
|
|
|
debug_mode: bool = False,
|
|
|
+ debug_options: Optional[Dict[str, Any]] = None,
|
|
|
) -> Dict[str, Any]:
|
|
|
"""
|
|
|
使用 UNet 有线表格识别处理表格元素
|
|
|
@@ -399,20 +400,21 @@ class ElementProcessors:
|
|
|
if not self.wired_table_recognizer:
|
|
|
raise RuntimeError("Wired table recognizer not available")
|
|
|
|
|
|
- # 构造调试选项覆盖
|
|
|
- debug_opts_override = {'enabled': debug_mode}
|
|
|
- if output_dir:
|
|
|
- debug_opts_override['output_dir'] = output_dir
|
|
|
- if basename:
|
|
|
- # 使用完整 basename 作为前缀 (如 "filename_page_001")
|
|
|
- debug_opts_override['prefix'] = basename
|
|
|
+ if debug_options is not None:
|
|
|
+ debug_opts_override = dict(debug_options)
|
|
|
+ else:
|
|
|
+ debug_opts_override = {'enabled': debug_mode}
|
|
|
+ if output_dir:
|
|
|
+ debug_opts_override['output_dir'] = output_dir
|
|
|
+ if basename:
|
|
|
+ debug_opts_override['prefix'] = basename
|
|
|
|
|
|
wired_res = self.wired_table_recognizer.recognize(
|
|
|
table_image=cropped_table,
|
|
|
# ocr_boxes=ocr_boxes_for_wired,
|
|
|
ocr_boxes=ocr_boxes,
|
|
|
pdf_type=pdf_type,
|
|
|
- debug_options=debug_opts_override
|
|
|
+ debug_options=debug_opts_override,
|
|
|
)
|
|
|
|
|
|
if not (wired_res.get('html') or wired_res.get('cells')):
|