|
|
@@ -559,6 +559,7 @@ class EnhancedDocPipeline:
|
|
|
output_dir=output_dir,
|
|
|
basename=page_name,
|
|
|
normalize_numbers=normalize_numbers,
|
|
|
+ debug_mode=self.debug_mode,
|
|
|
)
|
|
|
|
|
|
# 7. 按阅读顺序排序
|
|
|
@@ -823,6 +824,7 @@ class EnhancedDocPipeline:
|
|
|
output_dir: Optional[str] = None,
|
|
|
basename: Optional[str] = None,
|
|
|
normalize_numbers: bool = True,
|
|
|
+ debug_mode: bool = False,
|
|
|
) -> tuple:
|
|
|
"""
|
|
|
处理所有分类后的元素
|
|
|
@@ -915,7 +917,7 @@ class EnhancedDocPipeline:
|
|
|
table_img = CoordinateUtils.crop_region(detection_image, bbox)
|
|
|
|
|
|
# 构造调试选项
|
|
|
- cls_debug_opts = {}
|
|
|
+ cls_debug_opts = {'enabled': debug_mode}
|
|
|
if output_dir:
|
|
|
cls_debug_opts['output_dir'] = output_dir
|
|
|
if basename:
|
|
|
@@ -945,6 +947,7 @@ class EnhancedDocPipeline:
|
|
|
detection_image, item, scale, pre_matched_spans=spans, pdf_type=pdf_type,
|
|
|
output_dir=output_dir, basename=f"{basename}_{idx}",
|
|
|
normalize_numbers=normalize_numbers,
|
|
|
+ debug_mode=debug_mode,
|
|
|
)
|
|
|
# 如果有线识别失败(返回空 HTML),fallback 到 VLM
|
|
|
if not element['content'].get('html') and not element['content'].get('cells'):
|