| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197 |
- """
- 银行流水等场景的水印去除预设(页级 / 单元格级)。
- 对外 YAML 只需 method、enabled、contrast_enhancement 等少量键;
- mask / hough / adaptive 细参由此模块提供,避免配置漂移。
- """
- from __future__ import annotations
- import copy
- from typing import Any, Dict, Literal, Optional
- Scope = Literal["page", "cell"]
- Method = Literal["threshold", "masked", "masked_adaptive"]
- _DETECT_DEFAULT: Dict[str, Any] = {
- "ratio_threshold": 0.025,
- "midtone_low": 100,
- "midtone_high": 220,
- "check_diagonal": True,
- "diagonal_angle_range": (30, 60),
- }
- _MASK_PAGE: Dict[str, Any] = {
- "mask_mode": "light_on_white",
- "text_protect_gray_max": 130,
- "light_gray_low": 236,
- "light_gray_high": 253,
- "whiten_gray_low": 200,
- "direction_filter": "hough",
- "morph_close_kernel": 0,
- "morph_dilate_kernel": 0,
- "min_component_area": 200,
- "debug_block_maps": False,
- "debug_block_size": 48,
- "hough_midtone_low": 200,
- "hough_midtone_high": 254,
- "hough_canny_low": 30,
- "hough_canny_high": 100,
- "hough_threshold": 25,
- "hough_min_line_length": 35,
- "hough_max_line_gap": 18,
- "hough_line_thickness": 12,
- "hough_band_dilate_radius": 16,
- "hough_use_angle_statistics": True,
- "hough_angle_tolerance": 5.0,
- "hough_secondary_peak_ratio": 0.35,
- "hough_min_length_percentile": 25.0,
- "midtone_low": 95,
- "midtone_high": 235,
- "remove_horizontal_vertical": True,
- "diagonal_enhance": True,
- "diagonal_kernel_length": 25,
- "horizontal_kernel_length": 35,
- "vertical_kernel_length": 35,
- "morph_open_kernel": 2,
- "dmorph_close_kernel": 3,
- "text_protect_percentile": 10.0,
- "background_threshold": 248,
- "seal_protect": True,
- }
- _MASK_CELL: Dict[str, Any] = {
- **_MASK_PAGE,
- "min_component_area": 60,
- "hough_min_line_length": 18,
- "hough_max_line_gap": 12,
- "hough_line_thickness": 8,
- "hough_band_dilate_radius": 10,
- "hough_threshold": 20,
- "text_protect_gray_max": 125,
- }
- _ADAPTIVE_PAGE: Dict[str, Any] = {
- "whiten_mode": "mask_fill",
- "text_percentile": 10.0,
- "watermark_percentile": 70.0,
- "background_percentile": 95.0,
- "background_threshold": 248,
- "wm_margin": 12,
- "text_protect_max": 120,
- }
- _ADAPTIVE_CELL: Dict[str, Any] = {
- **_ADAPTIVE_PAGE,
- "text_protect_max": 110,
- "wm_margin": 10,
- }
- _CONTRAST_PAGE_DEFAULT: Dict[str, Any] = {
- "enabled": True,
- "method": "text_restore",
- "text_black_target": 85,
- "background_threshold": 248,
- "text_lo_percentile": 1.0,
- "text_hi_percentile": 99.0,
- }
- _CONTRAST_CELL_DEFAULT: Dict[str, Any] = {
- "enabled": False,
- "method": "text_restore",
- "text_black_target": 88,
- "background_threshold": 248,
- "text_lo_percentile": 1.0,
- "text_hi_percentile": 99.0,
- }
- def _base_preset(scope: Scope, method: Method) -> Dict[str, Any]:
- mask = _MASK_CELL if scope == "cell" else _MASK_PAGE
- adaptive = _ADAPTIVE_CELL if scope == "cell" else _ADAPTIVE_PAGE
- contrast = (
- copy.deepcopy(_CONTRAST_CELL_DEFAULT)
- if scope == "cell"
- else copy.deepcopy(_CONTRAST_PAGE_DEFAULT)
- )
- threshold = 175 if scope == "page" else 170
- cfg: Dict[str, Any] = {
- "enabled": True,
- "detect_before_remove": scope == "page",
- "detect": copy.deepcopy(_DETECT_DEFAULT),
- "method": method,
- "threshold": threshold,
- "morph_close_kernel": 0,
- "contrast_enhancement": contrast,
- "debug_options": {
- "enabled": False,
- "save_compare": True,
- "image_format": "png",
- "subdir": "watermark_removal",
- },
- }
- if method in ("masked", "masked_adaptive"):
- cfg["mask"] = copy.deepcopy(mask)
- if method == "masked_adaptive":
- cfg["adaptive"] = copy.deepcopy(adaptive)
- return cfg
- PAGE_WATERMARK_PRESETS: Dict[str, Dict[str, Any]] = {
- "threshold": _base_preset("page", "threshold"),
- "masked": _base_preset("page", "masked"),
- "masked_adaptive": _base_preset("page", "masked_adaptive"),
- }
- CELL_WATERMARK_PRESETS: Dict[str, Dict[str, Any]] = {
- "threshold": _base_preset("cell", "threshold"),
- "masked": _base_preset("cell", "masked"),
- "masked_adaptive": _base_preset("cell", "masked_adaptive"),
- }
- def get_preset(scope: Scope, method: str) -> Dict[str, Any]:
- method = method or "masked_adaptive"
- presets = CELL_WATERMARK_PRESETS if scope == "cell" else PAGE_WATERMARK_PRESETS
- if method not in presets:
- method = "masked_adaptive"
- return copy.deepcopy(presets[method])
- def _deep_merge(base: Dict[str, Any], override: Dict[str, Any]) -> Dict[str, Any]:
- out = copy.deepcopy(base)
- for k, v in override.items():
- if k in out and isinstance(out[k], dict) and isinstance(v, dict):
- out[k] = _deep_merge(out[k], v)
- else:
- out[k] = copy.deepcopy(v)
- return out
- def merge_watermark_config(
- scope: Scope,
- user_cfg: Optional[Dict[str, Any]] = None,
- *,
- method: Optional[str] = None,
- ) -> Dict[str, Any]:
- """将用户 YAML 片段与 scope 预设合并;保留旧版 mask/adaptive 全量覆盖能力。"""
- user_cfg = user_cfg or {}
- m = method or user_cfg.get("method") or "masked_adaptive"
- merged = get_preset(scope, str(m))
- for key in (
- "enabled",
- "detect_before_remove",
- "method",
- "threshold",
- "morph_close_kernel",
- ):
- if key in user_cfg:
- merged[key] = user_cfg[key]
- for nested in ("detect", "mask", "adaptive", "contrast_enhancement", "debug_options"):
- if nested in user_cfg and isinstance(user_cfg[nested], dict):
- merged[nested] = _deep_merge(merged.get(nested) or {}, user_cfg[nested])
- if method:
- merged["method"] = method
- return merged
|