ソースを参照

fix(调整水印处理与单元格预处理配置): 更新bank_statement_yusys_local.yaml中的水印处理方法和对比度增强设置,调整阈值和启用状态,以优化OCR处理效果和灵活性。

zhch158_admin 3 日 前
コミット
70f36c0904

+ 9 - 19
ocr_tools/universal_doc_parser/config/bank_statement_yusys_local.yaml

@@ -26,11 +26,10 @@ preprocessor:
   watermark_removal:
     enabled: false
     detect_before_remove: true
-    method: masked_adaptive   # threshold | masked | masked_adaptive
+    method: threshold   # threshold | masked | masked_adaptive
     threshold: 175
-    morph_close_kernel: 0
     contrast_enhancement:
-      enabled: true
+      enabled: false
       method: text_restore
       text_black_target: 85
     debug_options:
@@ -180,31 +179,22 @@ table_recognition_wired:
     prefer_whole_on_tie: true
     whole_longer_min_extra_chars: 2  # 整格/条带文本比分行多长至少 N 字则优先
     strip_fallback_aspect_ratio: 1.8 # 高/宽>=该值且仅检出<=1行时滑动条带分行
+    suspicious_short_min_chars: 4    # 高分但过短仍跑整格/条带兜底(与 enhance_retry 无关)
     cell_preprocess:
       watermark:
         enabled: true
-        method: masked_adaptive
+        method: threshold
       denoise:
         enabled: false   # 小格 median 易糊笔画;lab 用 --denoise 对比
-        method: median
       contrast:
-        enabled: false
+        enabled: false   # Pass1 去水印后可选;lab 对比 text_restore
         method: text_restore
         text_black_target: 88
       light:
-        upscale_min_side: 64
-      enhance_retry:
-        enabled: false
-        score_below: 0.90
-        min_chars: 4
-        short_text_in_tall_cell: true
-        contrast:
-          enabled: true
-          method: text_restore
-          text_black_target: 75
-        sharpen:
-          enabled: false
-          amount: 0.3
+        upscale_min_side: 192  # 128, 192 用于难例日期列
+    enhance_retry:
+      enabled: false
+      # enabled: true 时 Pass2 预处理,默认见代码(cell_preprocess.enhance_retry 已废弃)
 
   # Debug 可视化配置
   debug_options: