1 miesiąc temu · 3867618ad1
--- a/ocr_tools/universal_doc_parser/config/bank_statement_glm_vl.yaml
+++ b/ocr_tools/universal_doc_parser/config/bank_statement_glm_vl.yaml
@@ -11,6 +11,8 @@ input:
 
				 
			
 
				 preprocessor:
			
 
				   module: "mineru"
			
 
				+  # 页级预处理顺序：orient_first=先扶正再去水印（银行斜纹水印推荐）；watermark_first=兼容旧行为
			
 
				+  order: orient_first
			
 
				   orientation_classifier:
			
 
				     enabled: true
			
 
				     model_name: "paddle_orientation_classification"
			
@@ -21,10 +23,30 @@ preprocessor:
 
				   # 水印去除配置（适用于银行流水浅色斜向文字水印）
			
 
				   # -------------------------------------------------------
			
 
				   watermark_removal:
			
 
				-    enabled: true           # 是否启用水印去除
			
 
				-    threshold: 160          # 灰度阈值（140-180）：高于此值视为水印变白
			
 
				-                            # 值越大保守（残留水印），值越小激进（损失浅色正文）
			
 
				-    morph_close_kernel: 0   # 形态学闭运算核大小（像素），默认的 morph_kernel 改为 0（非二值图像时形态学闭运算会适得其反）
			
 
				+    enabled: false           # 是否启用水印去除
			
 
				+    method: threshold # threshold | masked | masked_adaptive
			
 
				+    threshold: 175          # 全局阈值或掩膜失败时的回退阈值（140-180）
			
 
				+    morph_close_kernel: 0   # 去水印后灰度图闭运算，0 跳过
			
 
				+    # 去水印后对比度增强（text_restore 将笔画拉深，比全局 gamma 更接近原图）
			
 
				+    contrast_enhancement:
			
 
				+      enabled: true
			
 
				+      method: text_restore   # text_restore | clahe | gamma | linear
			
 
				+      text_black_target: 85  # 略提高，减轻去水印后笔画被拉花（原 75 过深）
			
 
				+      background_threshold: 248
			
 
				+      text_lo_percentile: 1.0
			
 
				+      text_hi_percentile: 99.0
			
 
				+      gamma: 0.75            # method=gamma 时生效
			
 
				+      clip_limit: 2.0        # method=clahe
			
 
				+      tile_grid_size: 8
			
 
				+      black_percentile: 2.0  # method=linear
			
 
				+      white_percentile: 98.0
			
 
				+    debug_options:
			
 
				+      enabled: false              # 由命令行 --debug / --debug-layout 统一控制
			
 
				+      output_dir: null            # null 时使用 pipeline 输出目录
			
 
				+      prefix: ""                  # 文件名前缀（运行时注入 page_name）
			
 
				+      subdir: watermark_removal   # 输出至 debug/watermark_removal/
			
 
				+      save_compare: true          # 保存左右对比图 *_watermark_compare.*
			
 
				+      image_format: "png"         # jpg / png
			
 
				 
			
 
				 # ============================================================
			
 
				 # Layout 检测配置 - 使用 PP-DocLayoutV3
			
@@ -46,11 +68,16 @@ layout_detection:
 
				     min_text_width_ratio: 0.4         # 最小宽度占比（40%）
			
 
				     min_text_height_ratio: 0.3        # 最小高度占比（30%）
			
 
				 
			
 
				-  # Debug 可视化配置
			
 
				+  # Debug 可视化（底图为 inference_image，与 Layout 检测输入一致）
			
 
				   debug_options:
			
 
				-    enabled: true               # 是否开启调试可视化输出
			
 
				-    output_dir: null             # 调试输出目录；null不输出
			
 
				-    prefix: ""                  # 保存文件名前缀（如设置为页码）
			
 
				+    enabled: true              # 由命令行 --debug / --debug-layout 控制
			
 
				+    output_dir: null            # null 时由 pipeline 按页注入
			
 
				+    prefix: ""
			
 
				+    subdir: layout_detection    # 输出至 debug/layout_detection/
			
 
				+    save_raw: true              # 后处理前
			
 
				+    save_post_processed: true   # 后处理后
			
 
				+    save_json: true
			
 
				+    image_format: "png"
			
 
				 
			
 
				 # ============================================================
			
 
				 # VL识别配置 - 使用 GLM-OCR
			
@@ -103,6 +130,16 @@ ocr_recognition:
 
				   batch_size: 8
			
 
				   device: "cpu"
			
 
				 
			
 
				+
			
 
				+  # Debug 可视化（底图为 inference_image，与整页 OCR 输入一致）
			
 
				+  debug_options:
			
 
				+    enabled: false              # 由命令行 --debug / --debug-ocr 控制
			
 
				+    output_dir: null
			
 
				+    prefix: ""
			
 
				+    subdir: ocr_recognition     # 输出至 debug/ocr_recognition/
			
 
				+    save_json: true
			
 
				+    image_format: png
			
 
				+
			
 
				 # ============================================================
			
 
				 # 输出配置
			
 
				 # ============================================================
			
--- a/ocr_tools/universal_doc_parser/config/bank_statement_glm_vl_local.yaml
+++ b/ocr_tools/universal_doc_parser/config/bank_statement_glm_vl_local.yaml
@@ -14,6 +14,8 @@ input:
 
				 
			
 
				 preprocessor:
			
 
				   module: "mineru"
			
 
				+  # 页级预处理顺序：orient_first=先扶正再去水印（银行斜纹水印推荐）；watermark_first=兼容旧行为
			
 
				+  order: orient_first
			
 
				   orientation_classifier:
			
 
				     enabled: true
			
 
				     model_name: "paddle_orientation_classification"
			
@@ -25,9 +27,29 @@ preprocessor:
 
				   # -------------------------------------------------------
			
 
				   watermark_removal:
			
 
				     enabled: false           # 是否启用水印去除
			
 
				-    threshold: 160          # 灰度阈值（140-180）：高于此值视为水印变白
			
 
				-                            # 值越大保守（残留水印），值越小激进（损失浅色正文）
			
 
				-    morph_close_kernel: 0   # 形态学闭运算核大小（像素），默认的 morph_kernel 改为 0（非二值图像时形态学闭运算会适得其反）
			
 
				+    method: threshold # threshold | masked | masked_adaptive
			
 
				+    threshold: 175          # 全局阈值或掩膜失败时的回退阈值（140-180）
			
 
				+    morph_close_kernel: 0   # 去水印后灰度图闭运算，0 跳过
			
 
				+    # 去水印后对比度增强（text_restore 将笔画拉深，比全局 gamma 更接近原图）
			
 
				+    contrast_enhancement:
			
 
				+      enabled: true
			
 
				+      method: text_restore   # text_restore | clahe | gamma | linear
			
 
				+      text_black_target: 85  # 略提高，减轻去水印后笔画被拉花（原 75 过深）
			
 
				+      background_threshold: 248
			
 
				+      text_lo_percentile: 1.0
			
 
				+      text_hi_percentile: 99.0
			
 
				+      gamma: 0.75            # method=gamma 时生效
			
 
				+      clip_limit: 2.0        # method=clahe
			
 
				+      tile_grid_size: 8
			
 
				+      black_percentile: 2.0  # method=linear
			
 
				+      white_percentile: 98.0
			
 
				+    debug_options:
			
 
				+      enabled: false              # 由命令行 --debug / --debug-layout 统一控制
			
 
				+      output_dir: null            # null 时使用 pipeline 输出目录
			
 
				+      prefix: ""                  # 文件名前缀（运行时注入 page_name）
			
 
				+      subdir: watermark_removal   # 输出至 debug/watermark_removal/
			
 
				+      save_compare: true          # 保存左右对比图 *_watermark_compare.*
			
 
				+      image_format: "png"         # jpg / png
			
 
				 
			
 
				 # ============================================================
			
 
				 # Layout 检测配置 - 智能路由器（按场景直接选择模型）
			
@@ -71,11 +93,16 @@ layout_detection:
 
				     min_text_width_ratio: 0.4         # 最小宽度占比（40%）
			
 
				     min_text_height_ratio: 0.3        # 最小高度占比（30%）
			
 
				 
			
 
				-  # Debug 可视化配置
			
 
				+  # Debug 可视化（底图为 inference_image，与 Layout 检测输入一致）
			
 
				   debug_options:
			
 
				-    enabled: false              # 由命令行 --debug 统一控制，勿在此 hardcode true
			
 
				-    output_dir: null             # 调试输出目录；null不输出
			
 
				-    prefix: ""                  # 保存文件名前缀（如设置为页码）
			
 
				+    enabled: false              # 由命令行 --debug / --debug-layout 控制
			
 
				+    output_dir: null            # null 时由 pipeline 按页注入
			
 
				+    prefix: ""
			
 
				+    subdir: layout_detection    # 输出至 debug/layout_detection/
			
 
				+    save_raw: true              # 后处理前
			
 
				+    save_post_processed: true   # 后处理后
			
 
				+    save_json: true
			
 
				+    image_format: "png"
			
 
				 
			
 
				 # ============================================================
			
 
				 # OCR 识别配置
			
@@ -89,6 +116,16 @@ ocr_recognition:
 
				   batch_size: 8
			
 
				   device: "cpu"
			
 
				 
			
 
				+
			
 
				+  # Debug 可视化（底图为 inference_image，与整页 OCR 输入一致）
			
 
				+  debug_options:
			
 
				+    enabled: false              # 由命令行 --debug / --debug-ocr 控制
			
 
				+    output_dir: null
			
 
				+    prefix: ""
			
 
				+    subdir: ocr_recognition     # 输出至 debug/ocr_recognition/
			
 
				+    save_json: true
			
 
				+    image_format: png
			
 
				+
			
 
				 # ============================================================
			
 
				 # 表格分类配置（自动区分有线/无线表格）
			
 
				 # ============================================================
			
@@ -100,11 +137,12 @@ table_classification:
 
				 
			
 
				   # Debug 可视化配置
			
 
				   debug_options:
			
 
				-    enabled: false              # 由命令行 --debug 统一控制，勿在此 hardcode true
			
 
				-    output_dir: null             # 调试输出目录；null不输出
			
 
				-    save_table_lines: true       # 保存表格线可视化（unet横线/竖线叠加）
			
 
				-    image_format: "png"          # 可视化图片格式：png/jpg
			
 
				-    prefix: ""                  # 保存文件名前缀（如设置为页码/表格序号）
			
 
				+    enabled: false              # 由命令行 --debug / --debug-table 统一控制
			
 
				+    output_dir: null            # null 时由 pipeline 按页注入
			
 
				+    prefix: ""
			
 
				+    subdir: table_classification  # 输出至 debug/table_classification/
			
 
				+    save_table_lines: true      # paddle 线条检测叠加图
			
 
				+    image_format: "png"
			
 
				 
			
 
				 # ============================================================
			
 
				 # 有线表格识别专用配置（MinerU UNet）
			
@@ -144,14 +182,16 @@ table_recognition_wired:
 
				 
			
 
				   # Debug 可视化配置
			
 
				   debug_options:
			
 
				-    enabled: false              # 由命令行 --debug 统一控制，勿在此 hardcode true
			
 
				-    output_dir: null             # 调试输出目录；null不输出
			
 
				-    save_table_lines: true       # 保存表格线可视化（unet横线/竖线叠加）
			
 
				-    save_connected_components: true  # 保存连通域提取的单元格图
			
 
				-    save_grid_structure: true    # 保存逻辑网格结构（row/col/rowspan/colspan）
			
 
				-    save_text_overlay: true      # 保存文本填充覆盖图
			
 
				-    image_format: "png"          # 可视化图片格式：png/jpg
			
 
				-    prefix: ""                  # 保存文件名前缀（如设置为页码/表格序号）
			
 
				+    enabled: false              # 由命令行 --debug / --debug-table 统一控制
			
 
				+    output_dir: null            # null 时由 pipeline 按页注入
			
 
				+    prefix: ""
			
 
				+    subdir: table_recognition_wired  # 输出至 debug/table_recognition_wired/
			
 
				+    save_table_lines: true
			
 
				+    save_connected_components: true
			
 
				+    save_grid_structure: true
			
 
				+    save_text_overlay: true
			
 
				+    image_format: "png"
			
 
				+    # 单元格二次 OCR 裁剪图：debug/table_recognition_wired/tablecell_ocr/
			
 
				 
			
 
				 # ============================================================
			
 
				 # VL识别配置 - 使用 GLM-OCR（无线表格 + seal识别）
			
--- a/ocr_tools/universal_doc_parser/config/bank_statement_mineru_vl.yaml
+++ b/ocr_tools/universal_doc_parser/config/bank_statement_mineru_vl.yaml
@@ -11,8 +11,10 @@ input:
 
				 
			
 
				 preprocessor:
			
 
				   module: "mineru"
			
 
				+  # 页级预处理顺序：orient_first=先扶正再去水印（银行斜纹水印推荐）；watermark_first=兼容旧行为
			
 
				+  order: orient_first
			
 
				   orientation_classifier:
			
 
				-    enabled: true  # 扫描件自动开启，数字PDF自动跳过
			
 
				+    enabled: true
			
 
				     model_name: "paddle_orientation_classification"
			
 
				     model_dir: null  # 使用默认路径
			
 
				   unwarping:
			
@@ -21,10 +23,30 @@ preprocessor:
 
				   # 水印去除配置（适用于银行流水浅色斜向文字水印）
			
 
				   # -------------------------------------------------------
			
 
				   watermark_removal:
			
 
				-    enabled: true           # 是否启用水印去除
			
 
				-    threshold: 160          # 灰度阈值（140-180）：高于此值视为水印变白
			
 
				-                            # 值越大保守（残留水印），值越小激进（损失浅色正文）
			
 
				-    morph_close_kernel: 0   # 形态学闭运算核大小（像素），默认的 morph_kernel 改为 0（非二值图像时形态学闭运算会适得其反）
			
 
				+    enabled: false           # 是否启用水印去除
			
 
				+    method: threshold # threshold | masked | masked_adaptive
			
 
				+    threshold: 175          # 全局阈值或掩膜失败时的回退阈值（140-180）
			
 
				+    morph_close_kernel: 0   # 去水印后灰度图闭运算，0 跳过
			
 
				+    # 去水印后对比度增强（text_restore 将笔画拉深，比全局 gamma 更接近原图）
			
 
				+    contrast_enhancement:
			
 
				+      enabled: true
			
 
				+      method: text_restore   # text_restore | clahe | gamma | linear
			
 
				+      text_black_target: 85  # 略提高，减轻去水印后笔画被拉花（原 75 过深）
			
 
				+      background_threshold: 248
			
 
				+      text_lo_percentile: 1.0
			
 
				+      text_hi_percentile: 99.0
			
 
				+      gamma: 0.75            # method=gamma 时生效
			
 
				+      clip_limit: 2.0        # method=clahe
			
 
				+      tile_grid_size: 8
			
 
				+      black_percentile: 2.0  # method=linear
			
 
				+      white_percentile: 98.0
			
 
				+    debug_options:
			
 
				+      enabled: false              # 由命令行 --debug / --debug-layout 统一控制
			
 
				+      output_dir: null            # null 时使用 pipeline 输出目录
			
 
				+      prefix: ""                  # 文件名前缀（运行时注入 page_name）
			
 
				+      subdir: watermark_removal   # 输出至 debug/watermark_removal/
			
 
				+      save_compare: true          # 保存左右对比图 *_watermark_compare.*
			
 
				+      image_format: "png"         # jpg / png
			
 
				 
			
 
				 layout_detection:
			
 
				   # MinerU-VL layout（通过 VLM 服务做版式检测）
			
@@ -43,12 +65,16 @@ layout_detection:
 
				     min_text_width_ratio: 0.4         # 最小宽度占比（40%）
			
 
				     min_text_height_ratio: 0.3        # 最小高度占比（30%）
			
 
				 
			
 
				-  # Debug 可视化配置（与 MinerUWiredTableRecognizer.DebugOptions 对齐）
			
 
				-  # 默认关闭。开启后将保存：layout检测结果
			
 
				+  # Debug 可视化（底图为 inference_image，与 Layout 检测输入一致）
			
 
				   debug_options:
			
 
				-    enabled: true               # 是否开启调试可视化输出
			
 
				-    output_dir: null             # 调试输出目录；null不输出
			
 
				-    prefix: ""                  # 保存文件名前缀（如设置为页码）
			
 
				+    enabled: true              # 由命令行 --debug / --debug-layout 控制
			
 
				+    output_dir: null            # null 时由 pipeline 按页注入
			
 
				+    prefix: ""
			
 
				+    subdir: layout_detection    # 输出至 debug/layout_detection/
			
 
				+    save_raw: true              # 后处理前
			
 
				+    save_post_processed: true   # 后处理后
			
 
				+    save_json: true
			
 
				+    image_format: "png"
			
 
				 
			
 
				 # ============================================================
			
 
				 # VL识别配置（表格、公式）
			
@@ -78,6 +104,16 @@ ocr_recognition:
 
				   batch_size: 8
			
 
				   device: "cpu"
			
 
				 
			
 
				+
			
 
				+  # Debug 可视化（底图为 inference_image，与整页 OCR 输入一致）
			
 
				+  debug_options:
			
 
				+    enabled: false              # 由命令行 --debug / --debug-ocr 控制
			
 
				+    output_dir: null
			
 
				+    prefix: ""
			
 
				+    subdir: ocr_recognition     # 输出至 debug/ocr_recognition/
			
 
				+    save_json: true
			
 
				+    image_format: png
			
 
				+
			
 
				 # ============================================================
			
 
				 # 输出配置
			
 
				 # ============================================================
			
--- a/ocr_tools/universal_doc_parser/config/bank_statement_paddle_vl.yaml
+++ b/ocr_tools/universal_doc_parser/config/bank_statement_paddle_vl.yaml
@@ -11,6 +11,8 @@ input:
 
				 
			
 
				 preprocessor:
			
 
				   module: "mineru"
			
 
				+  # 页级预处理顺序：orient_first=先扶正再去水印（银行斜纹水印推荐）；watermark_first=兼容旧行为
			
 
				+  order: orient_first
			
 
				   orientation_classifier:
			
 
				     enabled: true
			
 
				     model_name: "paddle_orientation_classification"
			
@@ -21,10 +23,30 @@ preprocessor:
 
				   # 水印去除配置（适用于银行流水浅色斜向文字水印）
			
 
				   # -------------------------------------------------------
			
 
				   watermark_removal:
			
 
				-    enabled: true           # 是否启用水印去除
			
 
				-    threshold: 160          # 灰度阈值（140-180）：高于此值视为水印变白
			
 
				-                            # 值越大保守（残留水印），值越小激进（损失浅色正文）
			
 
				-    morph_close_kernel: 0   # 形态学闭运算核大小（像素），默认的 morph_kernel 改为 0（非二值图像时形态学闭运算会适得其反）
			
 
				+    enabled: false           # 是否启用水印去除
			
 
				+    method: threshold # threshold | masked | masked_adaptive
			
 
				+    threshold: 175          # 全局阈值或掩膜失败时的回退阈值（140-180）
			
 
				+    morph_close_kernel: 0   # 去水印后灰度图闭运算，0 跳过
			
 
				+    # 去水印后对比度增强（text_restore 将笔画拉深，比全局 gamma 更接近原图）
			
 
				+    contrast_enhancement:
			
 
				+      enabled: true
			
 
				+      method: text_restore   # text_restore | clahe | gamma | linear
			
 
				+      text_black_target: 85  # 略提高，减轻去水印后笔画被拉花（原 75 过深）
			
 
				+      background_threshold: 248
			
 
				+      text_lo_percentile: 1.0
			
 
				+      text_hi_percentile: 99.0
			
 
				+      gamma: 0.75            # method=gamma 时生效
			
 
				+      clip_limit: 2.0        # method=clahe
			
 
				+      tile_grid_size: 8
			
 
				+      black_percentile: 2.0  # method=linear
			
 
				+      white_percentile: 98.0
			
 
				+    debug_options:
			
 
				+      enabled: false              # 由命令行 --debug / --debug-layout 统一控制
			
 
				+      output_dir: null            # null 时使用 pipeline 输出目录
			
 
				+      prefix: ""                  # 文件名前缀（运行时注入 page_name）
			
 
				+      subdir: watermark_removal   # 输出至 debug/watermark_removal/
			
 
				+      save_compare: true          # 保存左右对比图 *_watermark_compare.*
			
 
				+      image_format: "png"         # jpg / png
			
 
				 
			
 
				 layout_detection:
			
 
				   # module: "paddle"
			
@@ -48,12 +70,16 @@ layout_detection:
 
				     min_text_width_ratio: 0.4         # 最小宽度占比（40%）
			
 
				     min_text_height_ratio: 0.3        # 最小高度占比（30%）
			
 
				 
			
 
				-  # Debug 可视化配置（与 MinerUWiredTableRecognizer.DebugOptions 对齐）
			
 
				-  # 默认关闭。开启后将保存：layout检测结果
			
 
				+  # Debug 可视化（底图为 inference_image，与 Layout 检测输入一致）
			
 
				   debug_options:
			
 
				-    enabled: true               # 是否开启调试可视化输出
			
 
				-    output_dir: null             # 调试输出目录；null不输出
			
 
				-    prefix: ""                  # 保存文件名前缀（如设置为页码）
			
 
				+    enabled: true              # 由命令行 --debug / --debug-layout 控制
			
 
				+    output_dir: null            # null 时由 pipeline 按页注入
			
 
				+    prefix: ""
			
 
				+    subdir: layout_detection    # 输出至 debug/layout_detection/
			
 
				+    save_raw: true              # 后处理前
			
 
				+    save_post_processed: true   # 后处理后
			
 
				+    save_json: true
			
 
				+    image_format: "png"
			
 
				 
			
 
				 # ============================================================
			
 
				 # VL识别配置（表格、公式）
			
@@ -84,6 +110,16 @@ ocr_recognition:
 
				   batch_size: 8
			
 
				   device: "cpu"
			
 
				 
			
 
				+
			
 
				+  # Debug 可视化（底图为 inference_image，与整页 OCR 输入一致）
			
 
				+  debug_options:
			
 
				+    enabled: false              # 由命令行 --debug / --debug-ocr 控制
			
 
				+    output_dir: null
			
 
				+    prefix: ""
			
 
				+    subdir: ocr_recognition     # 输出至 debug/ocr_recognition/
			
 
				+    save_json: true
			
 
				+    image_format: png
			
 
				+
			
 
				 # ============================================================
			
 
				 # 输出配置
			
 
				 # ============================================================
			
--- a/ocr_tools/universal_doc_parser/config/bank_statement_paddle_vl_local.yaml
+++ b/ocr_tools/universal_doc_parser/config/bank_statement_paddle_vl_local.yaml
@@ -14,6 +14,8 @@ input:
 
				 
			
 
				 preprocessor:
			
 
				   module: "mineru"
			
 
				+  # 页级预处理顺序：orient_first=先扶正再去水印（银行斜纹水印推荐）；watermark_first=兼容旧行为
			
 
				+  order: orient_first
			
 
				   orientation_classifier:
			
 
				     enabled: true
			
 
				     model_name: "paddle_orientation_classification"
			
@@ -24,10 +26,30 @@ preprocessor:
 
				   # 水印去除配置（适用于银行流水浅色斜向文字水印）
			
 
				   # -------------------------------------------------------
			
 
				   watermark_removal:
			
 
				-    enabled: true           # 是否启用水印去除
			
 
				-    threshold: 160          # 灰度阈值（140-180）：高于此值视为水印变白
			
 
				-                            # 值越大保守（残留水印），值越小激进（损失浅色正文）
			
 
				-    morph_close_kernel: 0   # 形态学闭运算核大小（像素），默认的 morph_kernel 改为 0（非二值图像时形态学闭运算会适得其反）
			
 
				+    enabled: false           # 是否启用水印去除
			
 
				+    method: threshold # threshold | masked | masked_adaptive
			
 
				+    threshold: 175          # 全局阈值或掩膜失败时的回退阈值（140-180）
			
 
				+    morph_close_kernel: 0   # 去水印后灰度图闭运算，0 跳过
			
 
				+    # 去水印后对比度增强（text_restore 将笔画拉深，比全局 gamma 更接近原图）
			
 
				+    contrast_enhancement:
			
 
				+      enabled: true
			
 
				+      method: text_restore   # text_restore | clahe | gamma | linear
			
 
				+      text_black_target: 85  # 略提高，减轻去水印后笔画被拉花（原 75 过深）
			
 
				+      background_threshold: 248
			
 
				+      text_lo_percentile: 1.0
			
 
				+      text_hi_percentile: 99.0
			
 
				+      gamma: 0.75            # method=gamma 时生效
			
 
				+      clip_limit: 2.0        # method=clahe
			
 
				+      tile_grid_size: 8
			
 
				+      black_percentile: 2.0  # method=linear
			
 
				+      white_percentile: 98.0
			
 
				+    debug_options:
			
 
				+      enabled: false              # 由命令行 --debug / --debug-layout 统一控制
			
 
				+      output_dir: null            # null 时使用 pipeline 输出目录
			
 
				+      prefix: ""                  # 文件名前缀（运行时注入 page_name）
			
 
				+      subdir: watermark_removal   # 输出至 debug/watermark_removal/
			
 
				+      save_compare: true          # 保存左右对比图 *_watermark_compare.*
			
 
				+      image_format: "png"         # jpg / png
			
 
				 
			
 
				 # ============================================================
			
 
				 # Layout 检测配置 - 智能路由器（按场景直接选择模型）
			
@@ -71,11 +93,16 @@ layout_detection:
 
				     min_text_width_ratio: 0.4         # 最小宽度占比（40%）
			
 
				     min_text_height_ratio: 0.3        # 最小高度占比（30%）
			
 
				 
			
 
				-  # Debug 可视化配置
			
 
				+  # Debug 可视化（底图为 inference_image，与 Layout 检测输入一致）
			
 
				   debug_options:
			
 
				-    enabled: false              # 由命令行 --debug 统一控制，勿在此 hardcode true
			
 
				-    output_dir: null             # 调试输出目录；null不输出
			
 
				-    prefix: ""                  # 保存文件名前缀（如设置为页码）
			
 
				+    enabled: false              # 由命令行 --debug / --debug-layout 控制
			
 
				+    output_dir: null            # null 时由 pipeline 按页注入
			
 
				+    prefix: ""
			
 
				+    subdir: layout_detection    # 输出至 debug/layout_detection/
			
 
				+    save_raw: true              # 后处理前
			
 
				+    save_post_processed: true   # 后处理后
			
 
				+    save_json: true
			
 
				+    image_format: "png"
			
 
				 
			
 
				 # ============================================================
			
 
				 # OCR 识别配置
			
@@ -89,6 +116,16 @@ ocr_recognition:
 
				   batch_size: 8
			
 
				   device: "cpu"
			
 
				 
			
 
				+
			
 
				+  # Debug 可视化（底图为 inference_image，与整页 OCR 输入一致）
			
 
				+  debug_options:
			
 
				+    enabled: false              # 由命令行 --debug / --debug-ocr 控制
			
 
				+    output_dir: null
			
 
				+    prefix: ""
			
 
				+    subdir: ocr_recognition     # 输出至 debug/ocr_recognition/
			
 
				+    save_json: true
			
 
				+    image_format: png
			
 
				+
			
 
				 # ============================================================
			
 
				 # 表格分类配置（自动区分有线/无线表格）
			
 
				 # ============================================================
			
@@ -100,11 +137,12 @@ table_classification:
 
				 
			
 
				   # Debug 可视化配置
			
 
				   debug_options:
			
 
				-    enabled: false              # 由命令行 --debug 统一控制，勿在此 hardcode true
			
 
				-    output_dir: null             # 调试输出目录；null不输出
			
 
				-    save_table_lines: true       # 保存表格线可视化（unet横线/竖线叠加）
			
 
				-    image_format: "png"          # 可视化图片格式：png/jpg
			
 
				-    prefix: ""                  # 保存文件名前缀（如设置为页码/表格序号）
			
 
				+    enabled: false              # 由命令行 --debug / --debug-table 统一控制
			
 
				+    output_dir: null            # null 时由 pipeline 按页注入
			
 
				+    prefix: ""
			
 
				+    subdir: table_classification  # 输出至 debug/table_classification/
			
 
				+    save_table_lines: true      # paddle 线条检测叠加图
			
 
				+    image_format: "png"
			
 
				 
			
 
				 # ============================================================
			
 
				 # 有线表格识别专用配置（MinerU UNet）
			
@@ -144,14 +182,16 @@ table_recognition_wired:
 
				 
			
 
				   # Debug 可视化配置
			
 
				   debug_options:
			
 
				-    enabled: false              # 由命令行 --debug 统一控制，勿在此 hardcode true
			
 
				-    output_dir: null             # 调试输出目录；null不输出
			
 
				-    save_table_lines: true       # 保存表格线可视化（unet横线/竖线叠加）
			
 
				-    save_connected_components: true  # 保存连通域提取的单元格图
			
 
				-    save_grid_structure: true    # 保存逻辑网格结构（row/col/rowspan/colspan）
			
 
				-    save_text_overlay: true      # 保存文本填充覆盖图
			
 
				-    image_format: "png"          # 可视化图片格式：png/jpg
			
 
				-    prefix: ""                  # 保存文件名前缀（如设置为页码/表格序号）
			
 
				+    enabled: false              # 由命令行 --debug / --debug-table 统一控制
			
 
				+    output_dir: null            # null 时由 pipeline 按页注入
			
 
				+    prefix: ""
			
 
				+    subdir: table_recognition_wired  # 输出至 debug/table_recognition_wired/
			
 
				+    save_table_lines: true
			
 
				+    save_connected_components: true
			
 
				+    save_grid_structure: true
			
 
				+    save_text_overlay: true
			
 
				+    image_format: "png"
			
 
				+    # 单元格二次 OCR 裁剪图：debug/table_recognition_wired/tablecell_ocr/
			
 
				 
			
 
				 # ============================================================
			
 
				 # VL识别配置 - 使用 PaddleOcr-VL（无线表格 + seal识别）
			
--- a/ocr_tools/universal_doc_parser/config/bank_statement_smart_router.yaml
+++ b/ocr_tools/universal_doc_parser/config/bank_statement_smart_router.yaml
@@ -13,20 +13,43 @@ input:
 
				 
			
 
				 preprocessor:
			
 
				   module: "mineru"
			
 
				+  # 页级预处理顺序：orient_first=先扶正再去水印（银行斜纹水印推荐）；watermark_first=兼容旧行为
			
 
				+  order: orient_first
			
 
				   orientation_classifier:
			
 
				     enabled: true
			
 
				+    model_name: "paddle_orientation_classification"
			
 
				+    model_dir: null  # 使用默认路径
			
 
				+  unwarping:
			
 
				+    enabled: false
			
 
				   # -------------------------------------------------------
			
 
				   # 水印去除配置（适用于银行流水浅色斜向文字水印）
			
 
				   # -------------------------------------------------------
			
 
				   watermark_removal:
			
 
				-    enabled: true           # 是否启用水印去除
			
 
				-    threshold: 160          # 灰度阈值（140-180）：高于此值视为水印变白
			
 
				-                            # 值越大保守（残留水印），值越小激进（损失浅色正文）
			
 
				-    morph_close_kernel: 0   # 形态学闭运算核大小（像素），默认的 morph_kernel 改为 0（非二值图像时形态学闭运算会适得其反）
			
 
				-
			
 
				-# ============================================================
			
 
				-# 智能布局模型路由器配置
			
 
				-# ============================================================
			
 
				+    enabled: false           # 是否启用水印去除
			
 
				+    method: threshold # threshold | masked | masked_adaptive
			
 
				+    threshold: 175          # 全局阈值或掩膜失败时的回退阈值（140-180）
			
 
				+    morph_close_kernel: 0   # 去水印后灰度图闭运算，0 跳过
			
 
				+    # 去水印后对比度增强（text_restore 将笔画拉深，比全局 gamma 更接近原图）
			
 
				+    contrast_enhancement:
			
 
				+      enabled: true
			
 
				+      method: text_restore   # text_restore | clahe | gamma | linear
			
 
				+      text_black_target: 85  # 略提高，减轻去水印后笔画被拉花（原 75 过深）
			
 
				+      background_threshold: 248
			
 
				+      text_lo_percentile: 1.0
			
 
				+      text_hi_percentile: 99.0
			
 
				+      gamma: 0.75            # method=gamma 时生效
			
 
				+      clip_limit: 2.0        # method=clahe
			
 
				+      tile_grid_size: 8
			
 
				+      black_percentile: 2.0  # method=linear
			
 
				+      white_percentile: 98.0
			
 
				+    debug_options:
			
 
				+      enabled: false              # 由命令行 --debug / --debug-layout 统一控制
			
 
				+      output_dir: null            # null 时使用 pipeline 输出目录
			
 
				+      prefix: ""                  # 文件名前缀（运行时注入 page_name）
			
 
				+      subdir: watermark_removal   # 输出至 debug/watermark_removal/
			
 
				+      save_compare: true          # 保存左右对比图 *_watermark_compare.*
			
 
				+      image_format: "png"         # jpg / png
			
 
				+
			
 
				 layout_detection:
			
 
				   module: "smart_router"
			
 
				   strategy: "ocr_eval"  # ocr_eval（推荐，基于OCR评估选择最佳）, auto（快速模式，基于文档特征）
			
@@ -81,6 +104,16 @@ ocr_recognition:
 
				   batch_size: 8
			
 
				   device: "cpu"
			
 
				 
			
 
				+
			
 
				+  # Debug 可视化（底图为 inference_image，与整页 OCR 输入一致）
			
 
				+  debug_options:
			
 
				+    enabled: true              # 由命令行 --debug / --debug-ocr 控制
			
 
				+    output_dir: null
			
 
				+    prefix: ""
			
 
				+    subdir: ocr_recognition     # 输出至 debug/ocr_recognition/
			
 
				+    save_json: true
			
 
				+    image_format: png
			
 
				+
			
 
				 # 表格分类配置（自动区分有线/无线表格）
			
 
				 table_classification:
			
 
				   enabled: true               # 是否启用自动表格分类（默认关闭，使用手动配置）
			
@@ -88,14 +121,18 @@ table_classification:
 
				   confidence_threshold: 0.5   # 分类置信度阈值
			
 
				   batch_size: 16              # 批处理大小
			
 
				 
			
 
				-  # Debug 可视化配置（与 MinerUWiredTableRecognizer.DebugOptions 对齐）
			
 
				-  # 默认关闭。开启后将保存：表格线
			
 
				+
			
 
				+
			
 
				+  # Debug 可视化（底图为 inference_image，与 Layout 检测输入一致）
			
 
				   debug_options:
			
 
				-    enabled: true               # 是否开启调试可视化输出
			
 
				-    output_dir: null             # 调试输出目录；null不输出
			
 
				-    save_table_lines: true       # 保存表格线可视化（unet横线/竖线叠加）
			
 
				-    image_format: "png"          # 可视化图片格式：png/jpg
			
 
				-    prefix: ""                  # 保存文件名前缀（如设置为页码/表格序号）
			
 
				+    enabled: true              # 由命令行 --debug / --debug-layout 控制
			
 
				+    output_dir: null            # null 时由 pipeline 按页注入
			
 
				+    prefix: ""
			
 
				+    subdir: layout_detection    # 输出至 debug/layout_detection/
			
 
				+    save_raw: true              # 后处理前
			
 
				+    save_post_processed: true   # 后处理后
			
 
				+    save_json: true
			
 
				+    image_format: "png"
			
 
				 
			
 
				 # 有线表格识别专用配置
			
 
				 table_recognition_wired:
			
@@ -111,17 +148,14 @@ table_recognition_wired:
 
				   # 是否启用倾斜矫正
			
 
				   enable_deskew: true
			
 
				 
			
 
				-  # Debug 可视化配置（与 MinerUWiredTableRecognizer.DebugOptions 对齐）
			
 
				-  # 默认关闭。开启后将保存：表格线、连通域、逻辑网格结构、文本覆盖可视化。
			
 
				+  # Debug 可视化配置
			
 
				   debug_options:
			
 
				-    enabled: true               # 是否开启调试可视化输出
			
 
				-    output_dir: null             # 调试输出目录；null不输出
			
 
				-    save_table_lines: true       # 保存表格线可视化（unet横线/竖线叠加）
			
 
				-    save_connected_components: true  # 保存连通域提取的单元格图
			
 
				-    save_grid_structure: true    # 保存逻辑网格结构（row/col/rowspan/colspan）
			
 
				-    save_text_overlay: true      # 保存文本填充覆盖图
			
 
				-    image_format: "png"          # 可视化图片格式：png/jpg
			
 
				-    prefix: ""                  # 保存文件名前缀（如设置为页码/表格序号）
			
 
				+    enabled: true              # 由命令行 --debug / --debug-table 统一控制
			
 
				+    output_dir: null            # null 时由 pipeline 按页注入
			
 
				+    prefix: ""
			
 
				+    subdir: table_classification  # 输出至 debug/table_classification/
			
 
				+    save_table_lines: true      # paddle 线条检测叠加图
			
 
				+    image_format: "png"
			
 
				 
			
 
				 # VLM 表格识别配置（当分类为 'wireless' 时使用）
			
 
				 vl_recognition:
			
--- a/ocr_tools/universal_doc_parser/config/bank_statement_yusys_v4.yaml
+++ b/ocr_tools/universal_doc_parser/config/bank_statement_yusys_v4.yaml
@@ -13,6 +13,8 @@ input:
 
				 
			
 
				 preprocessor:
			
 
				   module: "mineru"
			
 
				+  # 页级预处理顺序：orient_first=先扶正再去水印（银行斜纹水印推荐）；watermark_first=兼容旧行为
			
 
				+  order: orient_first
			
 
				   orientation_classifier:
			
 
				     enabled: true
			
 
				     model_name: "paddle_orientation_classification"
			
@@ -23,10 +25,30 @@ preprocessor:
 
				   # 水印去除配置（适用于银行流水浅色斜向文字水印）
			
 
				   # -------------------------------------------------------
			
 
				   watermark_removal:
			
 
				-    enabled: true           # 是否启用水印去除
			
 
				-    threshold: 160          # 灰度阈值（140-180）：高于此值视为水印变白
			
 
				-                            # 值越大保守（残留水印），值越小激进（损失浅色正文）
			
 
				-    morph_close_kernel: 0   # 形态学闭运算核大小（像素），默认的 morph_kernel 改为 0（非二值图像时形态学闭运算会适得其反）
			
 
				+    enabled: false           # 是否启用水印去除
			
 
				+    method: threshold # threshold | masked | masked_adaptive
			
 
				+    threshold: 175          # 全局阈值或掩膜失败时的回退阈值（140-180）
			
 
				+    morph_close_kernel: 0   # 去水印后灰度图闭运算，0 跳过
			
 
				+    # 去水印后对比度增强（text_restore 将笔画拉深，比全局 gamma 更接近原图）
			
 
				+    contrast_enhancement:
			
 
				+      enabled: true
			
 
				+      method: text_restore   # text_restore | clahe | gamma | linear
			
 
				+      text_black_target: 85  # 略提高，减轻去水印后笔画被拉花（原 75 过深）
			
 
				+      background_threshold: 248
			
 
				+      text_lo_percentile: 1.0
			
 
				+      text_hi_percentile: 99.0
			
 
				+      gamma: 0.75            # method=gamma 时生效
			
 
				+      clip_limit: 2.0        # method=clahe
			
 
				+      tile_grid_size: 8
			
 
				+      black_percentile: 2.0  # method=linear
			
 
				+      white_percentile: 98.0
			
 
				+    debug_options:
			
 
				+      enabled: false              # 由命令行 --debug / --debug-layout 统一控制
			
 
				+      output_dir: null            # null 时使用 pipeline 输出目录
			
 
				+      prefix: ""                  # 文件名前缀（运行时注入 page_name）
			
 
				+      subdir: watermark_removal   # 输出至 debug/watermark_removal/
			
 
				+      save_compare: true          # 保存左右对比图 *_watermark_compare.*
			
 
				+      image_format: "png"         # jpg / png
			
 
				 
			
 
				 # ============================================================
			
 
				 # Layout 检测配置 - 智能路由器（按场景直接选择模型）
			
@@ -70,11 +92,16 @@ layout_detection:
 
				     min_text_width_ratio: 0.4         # 最小宽度占比（40%）
			
 
				     min_text_height_ratio: 0.3        # 最小高度占比（30%）
			
 
				 
			
 
				-  # Debug 可视化配置
			
 
				+  # Debug 可视化（底图为 inference_image，与 Layout 检测输入一致）
			
 
				   debug_options:
			
 
				-    enabled: false              # 由命令行 --debug 统一控制，勿在此 hardcode true
			
 
				-    output_dir: null             # 调试输出目录；null不输出
			
 
				-    prefix: ""                  # 保存文件名前缀（如设置为页码）
			
 
				+    enabled: false              # 由命令行 --debug / --debug-layout 控制
			
 
				+    output_dir: null            # null 时由 pipeline 按页注入
			
 
				+    prefix: ""
			
 
				+    subdir: layout_detection    # 输出至 debug/layout_detection/
			
 
				+    save_raw: true              # 后处理前
			
 
				+    save_post_processed: true   # 后处理后
			
 
				+    save_json: true
			
 
				+    image_format: "png"
			
 
				 
			
 
				 # ============================================================
			
 
				 # OCR 识别配置
			
@@ -88,6 +115,16 @@ ocr_recognition:
 
				   batch_size: 8
			
 
				   device: "cpu"
			
 
				 
			
 
				+
			
 
				+  # Debug 可视化（底图为 inference_image，与整页 OCR 输入一致）
			
 
				+  debug_options:
			
 
				+    enabled: false              # 由命令行 --debug / --debug-ocr 控制
			
 
				+    output_dir: null
			
 
				+    prefix: ""
			
 
				+    subdir: ocr_recognition     # 输出至 debug/ocr_recognition/
			
 
				+    save_json: true
			
 
				+    image_format: png
			
 
				+
			
 
				 # ============================================================
			
 
				 # 表格分类配置（自动区分有线/无线表格）
			
 
				 # ============================================================
			
@@ -99,11 +136,12 @@ table_classification:
 
				 
			
 
				   # Debug 可视化配置
			
 
				   debug_options:
			
 
				-    enabled: false              # 由命令行 --debug 统一控制，勿在此 hardcode true
			
 
				-    output_dir: null             # 调试输出目录；null不输出
			
 
				-    save_table_lines: true       # 保存表格线可视化（unet横线/竖线叠加）
			
 
				-    image_format: "png"          # 可视化图片格式：png/jpg
			
 
				-    prefix: ""                  # 保存文件名前缀（如设置为页码/表格序号）
			
 
				+    enabled: false              # 由命令行 --debug / --debug-table 统一控制
			
 
				+    output_dir: null            # null 时由 pipeline 按页注入
			
 
				+    prefix: ""
			
 
				+    subdir: table_classification  # 输出至 debug/table_classification/
			
 
				+    save_table_lines: true      # paddle 线条检测叠加图
			
 
				+    image_format: "png"
			
 
				 
			
 
				 # ============================================================
			
 
				 # 有线表格识别专用配置（MinerU UNet）
			
@@ -143,14 +181,16 @@ table_recognition_wired:
 
				 
			
 
				   # Debug 可视化配置
			
 
				   debug_options:
			
 
				-    enabled: false              # 由命令行 --debug 统一控制，勿在此 hardcode true
			
 
				-    output_dir: null             # 调试输出目录；null不输出
			
 
				-    save_table_lines: true       # 保存表格线可视化（unet横线/竖线叠加）
			
 
				-    save_connected_components: true  # 保存连通域提取的单元格图
			
 
				-    save_grid_structure: true    # 保存逻辑网格结构（row/col/rowspan/colspan）
			
 
				-    save_text_overlay: true      # 保存文本填充覆盖图
			
 
				-    image_format: "png"          # 可视化图片格式：png/jpg
			
 
				-    prefix: ""                  # 保存文件名前缀（如设置为页码/表格序号）
			
 
				+    enabled: false              # 由命令行 --debug / --debug-table 统一控制
			
 
				+    output_dir: null            # null 时由 pipeline 按页注入
			
 
				+    prefix: ""
			
 
				+    subdir: table_recognition_wired  # 输出至 debug/table_recognition_wired/
			
 
				+    save_table_lines: true
			
 
				+    save_connected_components: true
			
 
				+    save_grid_structure: true
			
 
				+    save_text_overlay: true
			
 
				+    image_format: "png"
			
 
				+    # 单元格二次 OCR 裁剪图：debug/table_recognition_wired/tablecell_ocr/
			
 
				 
			
 
				 # ============================================================
			
 
				 # VL识别配置 - 使用 GLM-OCR（无线表格 + seal识别）