|
|
@@ -168,33 +168,28 @@ table_recognition_wired:
|
|
|
# 功能开关
|
|
|
enable_ocr_compensation: true # 启用OCR边缘补偿
|
|
|
|
|
|
- # 单元格二次 OCR(det 分行 + 整格/条带兜底 + 低分笔画增强重试)
|
|
|
+ # 单元格二次 OCR(参数对齐 cell_sweep lab:threshold_t150_cl_1.0_8_ob_u128 / Pass2 tile=4)
|
|
|
second_pass_ocr:
|
|
|
- reocr_mode: bank_statement # 表体空单元必跑 + 同行多数非空则空格也跑
|
|
|
- header_row: 0 # 表头行号(0=首行)
|
|
|
- row_peer_min_nonempty: 5 # 同行至少 N 个非空格时,本格空也触发二次 OCR
|
|
|
- line_min_score: 0.8 # 低于此分的分行从文本与计分中丢弃
|
|
|
- drop_low_score_blocks: true
|
|
|
- whole_cell_fallback: true # 整格 det=False 兜底 + 条带扫描
|
|
|
- prefer_whole_on_tie: true
|
|
|
- whole_longer_min_extra_chars: 2 # 整格/条带文本比分行多长至少 N 字则优先
|
|
|
- strip_fallback_aspect_ratio: 1.8 # 高/宽>=该值且仅检出<=1行时滑动条带分行
|
|
|
- suspicious_short_min_chars: 4 # 高分但过短仍跑整格/条带兜底(与 enhance_retry 无关)
|
|
|
+ reocr_mode: bank_statement
|
|
|
+ line_min_score: 0.8
|
|
|
cell_preprocess:
|
|
|
watermark:
|
|
|
enabled: true
|
|
|
method: threshold
|
|
|
- denoise:
|
|
|
- enabled: false # 小格 median 易糊笔画;lab 用 --denoise 对比
|
|
|
- contrast:
|
|
|
- enabled: false # Pass1 去水印后可选;lab 对比 text_restore
|
|
|
- method: text_restore
|
|
|
- text_black_target: 88
|
|
|
- light:
|
|
|
- upscale_min_side: 192 # 128, 192 用于难例日期列
|
|
|
- enhance_retry:
|
|
|
- enabled: false
|
|
|
- # enabled: true 时 Pass2 预处理,默认见代码(cell_preprocess.enhance_retry 已废弃)
|
|
|
+ threshold: 150
|
|
|
+ contrast: # Pass1:去水印后 CLAHE
|
|
|
+ enabled: true
|
|
|
+ method: clahe
|
|
|
+ clip_limit: 1.0
|
|
|
+ tile_grid_size: 8
|
|
|
+ upscale_min_side: 128
|
|
|
+ enhance_retry: # Pass2:低分/难例再试,更细 tile
|
|
|
+ enabled: true
|
|
|
+ contrast:
|
|
|
+ enabled: true
|
|
|
+ method: clahe
|
|
|
+ clip_limit: 1.0
|
|
|
+ tile_grid_size: 4
|
|
|
|
|
|
# Debug 可视化配置
|
|
|
debug_options:
|