|
@@ -22,34 +22,23 @@ preprocessor:
|
|
|
model_dir: null # 使用默认路径
|
|
model_dir: null # 使用默认路径
|
|
|
unwarping:
|
|
unwarping:
|
|
|
enabled: false
|
|
enabled: false
|
|
|
- # -------------------------------------------------------
|
|
|
|
|
- # 水印去除配置(适用于银行流水浅色斜向文字水印)
|
|
|
|
|
- # -------------------------------------------------------
|
|
|
|
|
|
|
+ # 页级水印(细参见 ocr_utils/watermark/presets.py PAGE_WATERMARK_PRESETS)
|
|
|
watermark_removal:
|
|
watermark_removal:
|
|
|
- enabled: false # 是否启用水印去除
|
|
|
|
|
- method: threshold # threshold | masked | masked_adaptive
|
|
|
|
|
- threshold: 175 # 全局阈值或掩膜失败时的回退阈值(140-180)
|
|
|
|
|
- morph_close_kernel: 0 # 去水印后灰度图闭运算,0 跳过
|
|
|
|
|
- # 去水印后对比度增强(text_restore 将笔画拉深,比全局 gamma 更接近原图)
|
|
|
|
|
|
|
+ enabled: false
|
|
|
|
|
+ detect_before_remove: true
|
|
|
|
|
+ method: threshold # threshold | masked | masked_adaptive
|
|
|
|
|
+ threshold: 175
|
|
|
contrast_enhancement:
|
|
contrast_enhancement:
|
|
|
- enabled: true
|
|
|
|
|
- method: text_restore # text_restore | clahe | gamma | linear
|
|
|
|
|
- text_black_target: 85 # 略提高,减轻去水印后笔画被拉花(原 75 过深)
|
|
|
|
|
- background_threshold: 248
|
|
|
|
|
- text_lo_percentile: 1.0
|
|
|
|
|
- text_hi_percentile: 99.0
|
|
|
|
|
- gamma: 0.75 # method=gamma 时生效
|
|
|
|
|
- clip_limit: 2.0 # method=clahe
|
|
|
|
|
- tile_grid_size: 8
|
|
|
|
|
- black_percentile: 2.0 # method=linear
|
|
|
|
|
- white_percentile: 98.0
|
|
|
|
|
|
|
+ enabled: false
|
|
|
|
|
+ method: text_restore
|
|
|
|
|
+ text_black_target: 85
|
|
|
debug_options:
|
|
debug_options:
|
|
|
- enabled: false # 由命令行 --debug / --debug-layout 统一控制
|
|
|
|
|
- output_dir: null # null 时使用 pipeline 输出目录
|
|
|
|
|
- prefix: "" # 文件名前缀(运行时注入 page_name)
|
|
|
|
|
- subdir: watermark_removal # 输出至 debug/watermark_removal/
|
|
|
|
|
- save_compare: true # 保存左右对比图 *_watermark_compare.*
|
|
|
|
|
- image_format: "png" # jpg / png
|
|
|
|
|
|
|
+ enabled: false
|
|
|
|
|
+ output_dir: null
|
|
|
|
|
+ prefix: ""
|
|
|
|
|
+ subdir: watermark_removal
|
|
|
|
|
+ save_compare: true
|
|
|
|
|
+ image_format: "png"
|
|
|
|
|
|
|
|
# ============================================================
|
|
# ============================================================
|
|
|
# Layout 检测配置 - 智能路由器(按场景直接选择模型)
|
|
# Layout 检测配置 - 智能路由器(按场景直接选择模型)
|