Quellcode durchsuchen

feat(优化银行对账单水印去除与单元格二次OCR配置): 更新bank_statement_yusys_local.yaml,简化水印去除配置,增强对水印检测的支持,新增单元格预处理选项,提升OCR处理的灵活性和准确性。

zhch158_admin vor 4 Tagen
Ursprung
Commit
6f98aaba58
1 geänderte Dateien mit 45 neuen und 71 gelöschten Zeilen
  1. 45 71
      ocr_tools/universal_doc_parser/config/bank_statement_yusys_local.yaml

+ 45 - 71
ocr_tools/universal_doc_parser/config/bank_statement_yusys_local.yaml

@@ -22,79 +22,24 @@ preprocessor:
     model_dir: null  # 使用默认路径
   unwarping:
     enabled: false
-  # -------------------------------------------------------
-  # 水印去除配置(适用于银行流水浅色斜向文字水印)
-  # -------------------------------------------------------
+  # 页级水印(细参见 ocr_utils/watermark/presets.py PAGE_WATERMARK_PRESETS)
   watermark_removal:
-    enabled: false           # 是否启用水印去除
-    method: masked_adaptive # threshold | masked | masked_adaptive
-    threshold: 175          # 全局阈值或掩膜失败时的回退阈值(140-180)
-    morph_close_kernel: 0   # 去水印后灰度图闭运算,0 跳过
-    mask:
-      mask_mode: light_on_white     # light_on_white | diagonal_midtone
-      text_protect_gray_max: 130    # gray<=130 正文硬保护,永不置白
-      light_gray_low: 236           # 浅色候选(geom_candidate 用)
-      light_gray_high: 253
-      whiten_gray_low: 200          # 几何带内置白灰度下限(方案 E,低于 candidate)
-      direction_filter: hough       # hough=方案C斜向线段 | block=旧分块梯度
-      morph_close_kernel: 0
-      morph_dilate_kernel: 0
-      min_component_area: 200
-      debug_block_maps: true        # 输出 diag/hv 热力图
-      debug_block_size: 48
-      hough_midtone_low: 200        # Canny 仅在中间调带
-      hough_midtone_high: 254
-      hough_canny_low: 30
-      hough_canny_high: 100
-      hough_threshold: 25
-      hough_min_line_length: 35
-      hough_max_line_gap: 18
-      hough_line_thickness: 12
-      hough_band_dilate_radius: 16
-      hough_use_angle_statistics: true   # 角度直方图统计主峰
-      hough_angle_tolerance: 5.0       # 与主峰角度差≤该值(度)
-      hough_secondary_peak_ratio: 0.35 # 次峰相对主峰权重
-      hough_min_length_percentile: 25.0  # 过滤短线段
-      midtone_low: 95
-      midtone_high: 235           # diagonal_midtone 模式用
-      remove_horizontal_vertical: true
-      diagonal_enhance: true
-      diagonal_kernel_length: 25
-      horizontal_kernel_length: 35
-      vertical_kernel_length: 35
-      morph_open_kernel: 2
-      dmorph_close_kernel: 3
-      text_protect_percentile: 10.0
-      background_threshold: 248
-      seal_protect: true
-    adaptive:
-      whiten_mode: mask_fill       # mask_fill=掩膜内一律置白 | threshold_in_mask
-      text_percentile: 10.0
-      watermark_percentile: 70.0   # threshold_in_mask 时生效
-      background_percentile: 95.0
-      background_threshold: 248
-      wm_margin: 12
-      text_protect_max: 120
-    # 去水印后对比度增强(text_restore 将笔画拉深,比全局 gamma 更接近原图)
+    enabled: false
+    detect_before_remove: true
+    method: masked_adaptive   # threshold | masked | masked_adaptive
+    threshold: 175
+    morph_close_kernel: 0
     contrast_enhancement:
       enabled: true
-      method: text_restore   # text_restore | clahe | gamma | linear
-      text_black_target: 85  # 略提高,减轻去水印后笔画被拉花(原 75 过深)
-      background_threshold: 248
-      text_lo_percentile: 1.0
-      text_hi_percentile: 99.0
-      gamma: 0.75            # method=gamma 时生效
-      clip_limit: 2.0        # method=clahe
-      tile_grid_size: 8
-      black_percentile: 2.0  # method=linear
-      white_percentile: 98.0
+      method: text_restore
+      text_black_target: 85
     debug_options:
-      enabled: false              # 由命令行 --debug / --debug-layout 统一控制
-      output_dir: null            # null 时使用 pipeline 输出目录
-      prefix: ""                  # 文件名前缀(运行时注入 page_name)
-      subdir: watermark_removal   # 输出至 debug/watermark_removal/
-      save_compare: true          # 保存左右对比图 *_watermark_compare.*
-      image_format: "png"         # jpg / png
+      enabled: false
+      output_dir: null
+      prefix: ""
+      subdir: watermark_removal
+      save_compare: true
+      image_format: "png"
 
 # ============================================================
 # Layout 检测配置 - 智能路由器(按场景直接选择模型)
@@ -224,13 +169,42 @@ table_recognition_wired:
     # 功能开关
     enable_ocr_compensation: true      # 启用OCR边缘补偿
 
-  # 单元格二次 OCR(det 分行 + 整格兜底 + 低分块过滤
+  # 单元格二次 OCR(det 分行 + 整格/条带兜底 + 低分笔画增强重试
   second_pass_ocr:
+    reocr_mode: bank_statement       # 表体空单元必跑 + 同行多数非空则空格也跑
+    header_row: 0                    # 表头行号(0=首行)
+    row_peer_min_nonempty: 5         # 同行至少 N 个非空格时,本格空也触发二次 OCR
     line_min_score: 0.8              # 低于此分的分行从文本与计分中丢弃
     drop_low_score_blocks: true
-    whole_cell_fallback: true        # 整格 det+rec + 条带扫描,与分行择优
+    whole_cell_fallback: true        # 整格 det=False 兜底 + 条带扫描
     prefer_whole_on_tie: true
+    whole_longer_min_extra_chars: 2  # 整格/条带文本比分行多长至少 N 字则优先
     strip_fallback_aspect_ratio: 1.8 # 高/宽>=该值且仅检出<=1行时滑动条带分行
+    cell_preprocess:
+      watermark:
+        enabled: true
+        method: masked_adaptive
+      denoise:
+        enabled: false   # 小格 median 易糊笔画;lab 用 --denoise 对比
+        method: median
+      contrast:
+        enabled: false
+        method: text_restore
+        text_black_target: 88
+      light:
+        upscale_min_side: 64
+      enhance_retry:
+        enabled: false
+        score_below: 0.90
+        min_chars: 4
+        short_text_in_tall_cell: true
+        contrast:
+          enabled: true
+          method: text_restore
+          text_black_target: 75
+        sharpen:
+          enabled: false
+          amount: 0.3
 
   # Debug 可视化配置
   debug_options: