Ver Fonte

fix(优化单元格二次OCR配置): 更新bank_statement_yusys_local.yaml中的二次OCR配置,调整参数以增强水印去除和对比度处理,提升OCR处理的准确性与灵活性。

zhch158_admin há 2 dias atrás
pai
commit
6d733e96c5

+ 17 - 22
ocr_tools/universal_doc_parser/config/bank_statement_yusys_local.yaml

@@ -168,33 +168,28 @@ table_recognition_wired:
     # 功能开关
     enable_ocr_compensation: true      # 启用OCR边缘补偿
 
-  # 单元格二次 OCR(det 分行 + 整格/条带兜底 + 低分笔画增强重试
+  # 单元格二次 OCR(参数对齐 cell_sweep lab:threshold_t150_cl_1.0_8_ob_u128 / Pass2 tile=4
   second_pass_ocr:
-    reocr_mode: bank_statement       # 表体空单元必跑 + 同行多数非空则空格也跑
-    header_row: 0                    # 表头行号(0=首行)
-    row_peer_min_nonempty: 5         # 同行至少 N 个非空格时,本格空也触发二次 OCR
-    line_min_score: 0.8              # 低于此分的分行从文本与计分中丢弃
-    drop_low_score_blocks: true
-    whole_cell_fallback: true        # 整格 det=False 兜底 + 条带扫描
-    prefer_whole_on_tie: true
-    whole_longer_min_extra_chars: 2  # 整格/条带文本比分行多长至少 N 字则优先
-    strip_fallback_aspect_ratio: 1.8 # 高/宽>=该值且仅检出<=1行时滑动条带分行
-    suspicious_short_min_chars: 4    # 高分但过短仍跑整格/条带兜底(与 enhance_retry 无关)
+    reocr_mode: bank_statement
+    line_min_score: 0.8
     cell_preprocess:
       watermark:
         enabled: true
         method: threshold
-      denoise:
-        enabled: false   # 小格 median 易糊笔画;lab 用 --denoise 对比
-      contrast:
-        enabled: false   # Pass1 去水印后可选;lab 对比 text_restore
-        method: text_restore
-        text_black_target: 88
-      light:
-        upscale_min_side: 192  # 128, 192 用于难例日期列
-    enhance_retry:
-      enabled: false
-      # enabled: true 时 Pass2 预处理,默认见代码(cell_preprocess.enhance_retry 已废弃)
+        threshold: 150
+      contrast:                      # Pass1:去水印后 CLAHE
+        enabled: true
+        method: clahe
+        clip_limit: 1.0
+        tile_grid_size: 8
+      upscale_min_side: 128
+      enhance_retry:                   # Pass2:低分/难例再试,更细 tile
+        enabled: true
+        contrast:
+          enabled: true
+          method: clahe
+          clip_limit: 1.0
+          tile_grid_size: 4
 
   # Debug 可视化配置
   debug_options: