""" 银行流水等场景的水印去除预设(页级 / 单元格级)。 对外 YAML 只需 method、enabled、contrast_enhancement 等少量键; mask / hough / adaptive 细参由此模块提供,避免配置漂移。 """ from __future__ import annotations import copy from typing import Any, Dict, Literal, Optional Scope = Literal["page", "cell"] Method = Literal["threshold", "masked", "masked_adaptive"] _DETECT_DEFAULT: Dict[str, Any] = { "ratio_threshold": 0.025, "midtone_low": 100, "midtone_high": 220, "check_diagonal": True, "diagonal_angle_range": (30, 60), } _MASK_PAGE: Dict[str, Any] = { "mask_mode": "light_on_white", "text_protect_gray_max": 130, "light_gray_low": 236, "light_gray_high": 253, "whiten_gray_low": 200, "direction_filter": "hough", "morph_close_kernel": 0, "morph_dilate_kernel": 0, "min_component_area": 200, "debug_block_maps": False, "debug_block_size": 48, "hough_midtone_low": 200, "hough_midtone_high": 254, "hough_canny_low": 30, "hough_canny_high": 100, "hough_threshold": 25, "hough_min_line_length": 35, "hough_max_line_gap": 18, "hough_line_thickness": 12, "hough_band_dilate_radius": 16, "hough_use_angle_statistics": True, "hough_angle_tolerance": 5.0, "hough_secondary_peak_ratio": 0.35, "hough_min_length_percentile": 25.0, "midtone_low": 95, "midtone_high": 235, "remove_horizontal_vertical": True, "diagonal_enhance": True, "diagonal_kernel_length": 25, "horizontal_kernel_length": 35, "vertical_kernel_length": 35, "morph_open_kernel": 2, "dmorph_close_kernel": 3, "text_protect_percentile": 10.0, "background_threshold": 248, "seal_protect": True, } _MASK_CELL: Dict[str, Any] = { **_MASK_PAGE, "min_component_area": 60, "hough_min_line_length": 18, "hough_max_line_gap": 12, "hough_line_thickness": 8, "hough_band_dilate_radius": 10, "hough_threshold": 20, "text_protect_gray_max": 125, } _ADAPTIVE_PAGE: Dict[str, Any] = { "whiten_mode": "mask_fill", "text_percentile": 10.0, "watermark_percentile": 70.0, "background_percentile": 95.0, "background_threshold": 248, "wm_margin": 12, "text_protect_max": 120, } _ADAPTIVE_CELL: Dict[str, Any] = { **_ADAPTIVE_PAGE, "text_protect_max": 110, "wm_margin": 10, } _CONTRAST_PAGE_DEFAULT: Dict[str, Any] = { "enabled": True, "method": "text_restore", "text_black_target": 85, "background_threshold": 248, "text_lo_percentile": 1.0, "text_hi_percentile": 99.0, } _CONTRAST_CELL_DEFAULT: Dict[str, Any] = { "enabled": False, "method": "text_restore", "text_black_target": 88, "background_threshold": 248, "text_lo_percentile": 1.0, "text_hi_percentile": 99.0, } def _base_preset(scope: Scope, method: Method) -> Dict[str, Any]: mask = _MASK_CELL if scope == "cell" else _MASK_PAGE adaptive = _ADAPTIVE_CELL if scope == "cell" else _ADAPTIVE_PAGE contrast = ( copy.deepcopy(_CONTRAST_CELL_DEFAULT) if scope == "cell" else copy.deepcopy(_CONTRAST_PAGE_DEFAULT) ) threshold = 175 if scope == "page" else 155 cfg: Dict[str, Any] = { "enabled": True, "detect_before_remove": scope == "page", "detect": copy.deepcopy(_DETECT_DEFAULT), "method": method, "threshold": threshold, "morph_close_kernel": 0, "contrast_enhancement": contrast, "debug_options": { "enabled": False, "save_compare": True, "image_format": "png", "subdir": "watermark_removal", }, } if method in ("masked", "masked_adaptive"): cfg["mask"] = copy.deepcopy(mask) if method == "masked_adaptive": cfg["adaptive"] = copy.deepcopy(adaptive) return cfg PAGE_WATERMARK_PRESETS: Dict[str, Dict[str, Any]] = { "threshold": _base_preset("page", "threshold"), "masked": _base_preset("page", "masked"), "masked_adaptive": _base_preset("page", "masked_adaptive"), } CELL_WATERMARK_PRESETS: Dict[str, Dict[str, Any]] = { "threshold": _base_preset("cell", "threshold"), "masked": _base_preset("cell", "masked"), "masked_adaptive": _base_preset("cell", "masked_adaptive"), } def get_preset(scope: Scope, method: str) -> Dict[str, Any]: method = method or "masked_adaptive" presets = CELL_WATERMARK_PRESETS if scope == "cell" else PAGE_WATERMARK_PRESETS if method not in presets: method = "masked_adaptive" return copy.deepcopy(presets[method]) def _deep_merge(base: Dict[str, Any], override: Dict[str, Any]) -> Dict[str, Any]: out = copy.deepcopy(base) for k, v in override.items(): if k in out and isinstance(out[k], dict) and isinstance(v, dict): out[k] = _deep_merge(out[k], v) else: out[k] = copy.deepcopy(v) return out def merge_watermark_config( scope: Scope, user_cfg: Optional[Dict[str, Any]] = None, *, method: Optional[str] = None, ) -> Dict[str, Any]: """将用户 YAML 片段与 scope 预设合并;保留旧版 mask/adaptive 全量覆盖能力。""" user_cfg = user_cfg or {} m = method or user_cfg.get("method") or "masked_adaptive" merged = get_preset(scope, str(m)) for key in ( "enabled", "detect_before_remove", "method", "threshold", "morph_close_kernel", ): if key in user_cfg: merged[key] = user_cfg[key] for nested in ("detect", "mask", "adaptive", "contrast_enhancement", "debug_options"): if nested in user_cfg and isinstance(user_cfg[nested], dict): merged[nested] = _deep_merge(merged.get(nested) or {}, user_cfg[nested]) if method: merged["method"] = method return merged