Sfoglia il codice sorgente

feat(更新银行流水配置): 修改bank_statement_glm_vl_local.yaml以禁用水印去除功能,更新bank_statement_yusys_local.yaml以增强水印处理配置,添加新参数和调试选项,提升处理灵活性和准确性。

zhch158_admin 5 giorni fa
parent
commit
ad60ed5eca

+ 1 - 1
ocr_tools/universal_doc_parser/config/bank_statement_glm_vl_local.yaml

@@ -24,7 +24,7 @@ preprocessor:
   # 水印去除配置(适用于银行流水浅色斜向文字水印)
   # -------------------------------------------------------
   watermark_removal:
-    enabled: true           # 是否启用水印去除
+    enabled: false           # 是否启用水印去除
     threshold: 160          # 灰度阈值(140-180):高于此值视为水印变白
                             # 值越大保守(残留水印),值越小激进(损失浅色正文)
     morph_close_kernel: 0   # 形态学闭运算核大小(像素),默认的 morph_kernel 改为 0(非二值图像时形态学闭运算会适得其反)

+ 87 - 7
ocr_tools/universal_doc_parser/config/bank_statement_yusys_local.yaml

@@ -14,6 +14,8 @@ input:
 
 preprocessor:
   module: "mineru"
+  # 页级预处理顺序:orient_first=先扶正再去水印(银行斜纹水印推荐);watermark_first=兼容旧行为
+  order: orient_first
   orientation_classifier:
     enabled: true
     model_name: "paddle_orientation_classification"
@@ -25,9 +27,73 @@ preprocessor:
   # -------------------------------------------------------
   watermark_removal:
     enabled: true           # 是否启用水印去除
-    threshold: 160          # 灰度阈值(140-180):高于此值视为水印变白
-                            # 值越大保守(残留水印),值越小激进(损失浅色正文)
-    morph_close_kernel: 0   # 形态学闭运算核大小(像素),默认的 morph_kernel 改为 0(非二值图像时形态学闭运算会适得其反)
+    method: masked_adaptive # threshold | masked | masked_adaptive
+    threshold: 175          # 全局阈值或掩膜失败时的回退阈值(140-180)
+    morph_close_kernel: 0   # 去水印后灰度图闭运算,0 跳过
+    mask:
+      mask_mode: light_on_white     # light_on_white | diagonal_midtone
+      text_protect_gray_max: 130    # gray<=130 正文硬保护,永不置白
+      light_gray_low: 236           # 浅色候选(geom_candidate 用)
+      light_gray_high: 253
+      whiten_gray_low: 200          # 几何带内置白灰度下限(方案 E,低于 candidate)
+      direction_filter: hough       # hough=方案C斜向线段 | block=旧分块梯度
+      morph_close_kernel: 0
+      morph_dilate_kernel: 0
+      min_component_area: 200
+      debug_block_maps: true        # 输出 diag/hv 热力图
+      debug_block_size: 48
+      hough_midtone_low: 200        # Canny 仅在中间调带
+      hough_midtone_high: 254
+      hough_canny_low: 30
+      hough_canny_high: 100
+      hough_threshold: 25
+      hough_min_line_length: 35
+      hough_max_line_gap: 18
+      hough_line_thickness: 12
+      hough_band_dilate_radius: 16
+      hough_use_angle_statistics: true   # 角度直方图统计主峰
+      hough_angle_tolerance: 5.0       # 与主峰角度差≤该值(度)
+      hough_secondary_peak_ratio: 0.35 # 次峰相对主峰权重
+      hough_min_length_percentile: 25.0  # 过滤短线段
+      midtone_low: 95
+      midtone_high: 235           # diagonal_midtone 模式用
+      remove_horizontal_vertical: true
+      diagonal_enhance: true
+      diagonal_kernel_length: 25
+      horizontal_kernel_length: 35
+      vertical_kernel_length: 35
+      morph_open_kernel: 2
+      dmorph_close_kernel: 3
+      text_protect_percentile: 10.0
+      background_threshold: 248
+      seal_protect: true
+    adaptive:
+      whiten_mode: mask_fill       # mask_fill=掩膜内一律置白 | threshold_in_mask
+      text_percentile: 10.0
+      watermark_percentile: 70.0   # threshold_in_mask 时生效
+      background_percentile: 95.0
+      background_threshold: 248
+      wm_margin: 12
+      text_protect_max: 120
+    # 去水印后对比度增强(text_restore 将笔画拉深,比全局 gamma 更接近原图)
+    contrast_enhancement:
+      enabled: true
+      method: text_restore   # text_restore | clahe | gamma | linear
+      text_black_target: 85  # 略提高,减轻去水印后笔画被拉花(原 75 过深)
+      background_threshold: 248
+      text_lo_percentile: 1.0
+      text_hi_percentile: 99.0
+      gamma: 0.75            # method=gamma 时生效
+      clip_limit: 2.0        # method=clahe
+      tile_grid_size: 8
+      black_percentile: 2.0  # method=linear
+      white_percentile: 98.0
+    debug_options:
+      enabled: false              # 由命令行 --debug / --debug-layout 统一控制
+      output_dir: null            # null 时使用 pipeline 输出目录
+      prefix: ""                  # 文件名前缀(运行时注入 page_name)
+      save_compare: true          # 保存左右对比图 *_watermark_compare.*
+      image_format: "png"         # jpg / png
 
 # ============================================================
 # Layout 检测配置 - 智能路由器(按场景直接选择模型)
@@ -71,11 +137,16 @@ layout_detection:
     min_text_width_ratio: 0.4         # 最小宽度占比(40%)
     min_text_height_ratio: 0.3        # 最小高度占比(30%)
 
-  # Debug 可视化配置
+  # Debug 可视化(底图为 inference_image,与 Layout 检测输入一致)
   debug_options:
-    enabled: false              # 由命令行 --debug 统一控制,勿在此 hardcode true
-    output_dir: null             # 调试输出目录;null不输出
-    prefix: ""                  # 保存文件名前缀(如设置为页码)
+    enabled: false              # 由命令行 --debug / --debug-layout 控制
+    output_dir: null            # null 时由 pipeline 按页注入
+    prefix: ""
+    subdir: layout_detection    # 输出至 debug_comparison/layout_detection/
+    save_raw: true              # 后处理前
+    save_post_processed: true   # 后处理后
+    save_json: true
+    image_format: "png"
 
 # ============================================================
 # OCR 识别配置
@@ -89,6 +160,15 @@ ocr_recognition:
   batch_size: 8
   device: "cpu"
 
+  # Debug 可视化(底图为 inference_image,与整页 OCR 输入一致)
+  debug_options:
+    enabled: false              # 由命令行 --debug / --debug-ocr 控制
+    output_dir: null
+    prefix: ""
+    subdir: ocr_recognition     # 输出至 debug_comparison/ocr_recognition/
+    save_json: true
+    image_format: png
+
 # ============================================================
 # 表格分类配置(自动区分有线/无线表格)
 # ============================================================