1 개월 전 · 75d01a1ed5
--- a/ocr_utils/watermark/__init__.py
+++ b/ocr_utils/watermark/__init__.py
@@ -0,0 +1,50 @@
 
				+"""水印处理：预设、门面、算法与 PDF/调试能力。"""
			
 
				+from ocr_utils.watermark.algorithms import (
			
 
				+    build_watermark_mask,
			
 
				+    detect_watermark,
			
 
				+    remove_watermark_masked_adaptive,
			
 
				+    render_ratio_heatmap,
			
 
				+    save_watermark_mask_debug_layers,
			
 
				+)
			
 
				+from ocr_utils.watermark.contrast import (
			
 
				+    apply_contrast_enhancement_config,
			
 
				+    enhance_document_contrast,
			
 
				+)
			
 
				+from ocr_utils.watermark.debug import save_watermark_removal_debug
			
 
				+from ocr_utils.watermark.pdf import (
			
 
				+    remove_txt_pdf_watermark,
			
 
				+    scan_pdf_watermark_xobjs,
			
 
				+)
			
 
				+from ocr_utils.watermark.presets import (
			
 
				+    CELL_WATERMARK_PRESETS,
			
 
				+    PAGE_WATERMARK_PRESETS,
			
 
				+    get_preset,
			
 
				+    merge_watermark_config,
			
 
				+)
			
 
				+from ocr_utils.watermark.processor import WatermarkProcessor
			
 
				+from ocr_utils.watermark.removal import (
			
 
				+    remove_watermark_from_image,
			
 
				+    remove_watermark_from_image_rgb,
			
 
				+    render_watermark_mask_overlay,
			
 
				+)
			
 
				+
			
 
				+__all__ = [
			
 
				+    "CELL_WATERMARK_PRESETS",
			
 
				+    "PAGE_WATERMARK_PRESETS",
			
 
				+    "WatermarkProcessor",
			
 
				+    "apply_contrast_enhancement_config",
			
 
				+    "build_watermark_mask",
			
 
				+    "detect_watermark",
			
 
				+    "enhance_document_contrast",
			
 
				+    "get_preset",
			
 
				+    "merge_watermark_config",
			
 
				+    "remove_txt_pdf_watermark",
			
 
				+    "remove_watermark_from_image",
			
 
				+    "remove_watermark_from_image_rgb",
			
 
				+    "remove_watermark_masked_adaptive",
			
 
				+    "render_ratio_heatmap",
			
 
				+    "render_watermark_mask_overlay",
			
 
				+    "save_watermark_mask_debug_layers",
			
 
				+    "save_watermark_removal_debug",
			
 
				+    "scan_pdf_watermark_xobjs",
			
 
				+]
			
--- a/ocr_utils/watermark/algorithms.py
+++ b/ocr_utils/watermark/algorithms.py
@@ -0,0 +1,1095 @@
 
				+"""水印 掩膜与去水印算法（由 ocr_utils.watermark_utils 迁入）。"""
			
 
				+from __future__ import annotations
			
 
				+
			
 
				+import json
			
 
				+import re
			
 
				+from pathlib import Path
			
 
				+from typing import Any, Dict, Optional, Tuple, Union
			
 
				+
			
 
				+import cv2
			
 
				+import numpy as np
			
 
				+from loguru import logger
			
 
				+from PIL import Image
			
 
				+
			
 
				+def detect_watermark(
			
 
				+    image: Union[np.ndarray, Image.Image],
			
 
				+    midtone_low: int = 100,
			
 
				+    midtone_high: int = 220,
			
 
				+    ratio_threshold: float = 0.03,
			
 
				+    check_diagonal: bool = True,
			
 
				+    diagonal_angle_range: tuple = (30, 60),
			
 
				+) -> bool:
			
 
				+    """
			
 
				+    检测图像中是否存在浅色斜向文字水印（银行流水类文档水印检测）。
			
 
				+
			
 
				+    原理：
			
 
				+    1. 将图像转为灰度，提取「中间调」像素（midtone_low ~ midtone_high），
			
 
				+       这些像素既不是纯白背景，也不是深黑正文，是浅灰水印的典型范围。
			
 
				+    2. 若中间调像素占比超过 ratio_threshold，初步判定存在水印。
			
 
				+    3. 若 check_diagonal=True，进一步用 Hough 直线变换验证中间调区域
			
 
				+       是否呈现斜向（diagonal_angle_range 度）纹理，以排除灰色背景误报。
			
 
				+
			
 
				+    Args:
			
 
				+        image: 输入图像，支持 PIL.Image 或 np.ndarray（BGR/RGB/灰度）。
			
 
				+        midtone_low: 中间调下限（默认 100），低于此视为深色正文。
			
 
				+        midtone_high: 中间调上限（默认 220），高于此视为纯白背景。
			
 
				+        ratio_threshold: 中间调像素占全图比例阈值（默认 0.03 即 3%）。
			
 
				+        check_diagonal: 是否进行斜向纹理验证（默认 True）。
			
 
				+        diagonal_angle_range: 斜向角度范围（度），默认 (30, 60)，含 45° 斜水印。
			
 
				+
			
 
				+    Returns:
			
 
				+        True 表示检测到水印，False 表示未检测到。
			
 
				+    """
			
 
				+    if isinstance(image, Image.Image):
			
 
				+        pil_img = image.convert('RGB') if image.mode == 'RGBA' else image
			
 
				+        np_img = np.array(pil_img)
			
 
				+        gray = cv2.cvtColor(np_img, cv2.COLOR_RGB2GRAY) if np_img.ndim == 3 else np_img
			
 
				+    else:
			
 
				+        np_img = image
			
 
				+        gray = cv2.cvtColor(np_img, cv2.COLOR_BGR2GRAY) if np_img.ndim == 3 else np_img
			
 
				+
			
 
				+    midtone_mask = (gray > midtone_low) & (gray < midtone_high)
			
 
				+    ratio = midtone_mask.sum() / gray.size
			
 
				+
			
 
				+    if ratio < ratio_threshold:
			
 
				+        return False
			
 
				+
			
 
				+    if not check_diagonal:
			
 
				+        return True
			
 
				+
			
 
				+    midtone_uint8 = (midtone_mask.astype(np.uint8)) * 255
			
 
				+    edges = cv2.Canny(midtone_uint8, 50, 150, apertureSize=3)
			
 
				+    lines = cv2.HoughLines(edges, rho=1, theta=np.pi / 180, threshold=80)
			
 
				+
			
 
				+    if lines is None:
			
 
				+        return False
			
 
				+
			
 
				+    low_rad = np.deg2rad(diagonal_angle_range[0])
			
 
				+    high_rad = np.deg2rad(diagonal_angle_range[1])
			
 
				+    diagonal_count = 0
			
 
				+    for line in lines:
			
 
				+        theta = line[0][1]
			
 
				+        if low_rad <= theta <= high_rad or (np.pi - high_rad) <= theta <= (np.pi - low_rad):
			
 
				+            diagonal_count += 1
			
 
				+
			
 
				+    return diagonal_count > 0
			
 
				+
			
 
				+
			
 
				+def _local_std_map(gray: np.ndarray, window: int = 5) -> np.ndarray:
			
 
				+    """局部标准差图（返回值与输入同形状）。"""
			
 
				+    gray = np.asarray(gray, dtype=np.float32)
			
 
				+    size = max(3, int(window))
			
 
				+    kernel = np.ones((size, size), dtype=np.float32) / (size * size)
			
 
				+    mean = cv2.filter2D(gray, -1, kernel)
			
 
				+    sq_mean = cv2.filter2D(gray * gray, -1, kernel)
			
 
				+    var = sq_mean - mean * mean
			
 
				+    var = np.maximum(var, 0)
			
 
				+    return np.sqrt(var)
			
 
				+
			
 
				+
			
 
				+def _line_structuring_kernel(length: int, angle_deg: float) -> np.ndarray:
			
 
				+    """生成指定角度、长度的线形结构元（用于斜向水印形态学）。"""
			
 
				+    length = max(3, int(length))
			
 
				+    k = np.zeros((length, length), np.uint8)
			
 
				+    c = length // 2
			
 
				+    rad = np.deg2rad(angle_deg)
			
 
				+    dx = int(round(np.cos(rad) * (c - 1)))
			
 
				+    dy = int(round(np.sin(rad) * (c - 1)))
			
 
				+    cv2.line(k, (c - dx, c - dy), (c + dx, c + dy), 1, thickness=1)
			
 
				+    return k
			
 
				+
			
 
				+
			
 
				+def _line_angle_deg(x1: int, y1: int, x2: int, y2: int) -> float:
			
 
				+    """线段方向角 [0, 180)（无向）。"""
			
 
				+    ang = float(np.degrees(np.arctan2(y2 - y1, x2 - x1)))
			
 
				+    if ang < 0:
			
 
				+        ang += 180.0
			
 
				+    return ang
			
 
				+
			
 
				+
			
 
				+def _angle_in_diagonal_ranges(
			
 
				+    angle_deg: float,
			
 
				+    ranges: Tuple[Tuple[float, float], Tuple[float, float]] = ((35.0, 55.0), (125.0, 145.0)),
			
 
				+) -> bool:
			
 
				+    for lo, hi in ranges:
			
 
				+        if lo <= angle_deg <= hi:
			
 
				+            return True
			
 
				+    return False
			
 
				+
			
 
				+
			
 
				+def _angle_distance_deg(a: float, b: float) -> float:
			
 
				+    """无向角距离 [0, 90]。"""
			
 
				+    d = abs(float(a) - float(b)) % 180.0
			
 
				+    return min(d, 180.0 - d)
			
 
				+
			
 
				+
			
 
				+def _line_length(x1: int, y1: int, x2: int, y2: int) -> float:
			
 
				+    return float(np.hypot(x2 - x1, y2 - y1))
			
 
				+
			
 
				+
			
 
				+def _find_dominant_diagonal_angles(
			
 
				+    segments: list,
			
 
				+    *,
			
 
				+    angle_ranges: Tuple[Tuple[float, float], Tuple[float, float]] = ((25.0, 65.0), (115.0, 155.0)),
			
 
				+    smooth_sigma: float = 2.0,
			
 
				+    secondary_peak_ratio: float = 0.35,
			
 
				+) -> Tuple[list, np.ndarray]:
			
 
				+    """
			
 
				+    按线段长度加权统计角度直方图，取主峰（及次峰）作为本页水印固定方向。
			
 
				+
			
 
				+    Returns:
			
 
				+        dominant_angles: 1~2 个主导角度（度）
			
 
				+        hist_smooth: 长度 180 的平滑直方图
			
 
				+    """
			
 
				+    hist = np.zeros(180, dtype=np.float64)
			
 
				+    for x1, y1, x2, y2, ang, length in segments:
			
 
				+        if not _angle_in_diagonal_ranges(ang, angle_ranges):
			
 
				+            continue
			
 
				+        hist[int(ang) % 180] += length
			
 
				+
			
 
				+    if hist.sum() <= 0:
			
 
				+        return [], hist
			
 
				+
			
 
				+    ksize = max(3, int(smooth_sigma * 4) | 1)
			
 
				+    hist_smooth = cv2.GaussianBlur(
			
 
				+        hist.reshape(1, 180).astype(np.float32), (ksize, 1), smooth_sigma
			
 
				+    ).flatten().astype(np.float64)
			
 
				+
			
 
				+    peaks: list = []
			
 
				+    for lo, hi in angle_ranges:
			
 
				+        lo_i, hi_i = int(lo), int(hi)
			
 
				+        sub = hist_smooth[lo_i : hi_i + 1]
			
 
				+        if sub.size == 0 or sub.max() <= 0:
			
 
				+            continue
			
 
				+        peak_ang = lo_i + int(sub.argmax())
			
 
				+        peaks.append((peak_ang, float(sub.max())))
			
 
				+
			
 
				+    if not peaks:
			
 
				+        return [], hist_smooth
			
 
				+
			
 
				+    peaks.sort(key=lambda x: -x[1])
			
 
				+    dominant: list = [peaks[0][0]]
			
 
				+    for ang, val in peaks[1:]:
			
 
				+        if val >= peaks[0][1] * secondary_peak_ratio:
			
 
				+            if all(_angle_distance_deg(ang, d) > 15 for d in dominant):
			
 
				+                dominant.append(ang)
			
 
				+    return dominant, hist_smooth
			
 
				+
			
 
				+
			
 
				+def _render_angle_histogram(hist: np.ndarray, dominant_angles: list) -> np.ndarray:
			
 
				+    """角度直方图 debug 图（BGR）。"""
			
 
				+    h_img, w_img = 120, 360
			
 
				+    canvas = np.ones((h_img, w_img, 3), dtype=np.uint8) * 255
			
 
				+    if hist.max() <= 0:
			
 
				+        return canvas
			
 
				+    norm = (hist / hist.max() * (h_img - 20)).astype(np.int32)
			
 
				+    for i, h in enumerate(norm):
			
 
				+        x = int(i * (w_img - 1) / 179)
			
 
				+        cv2.line(canvas, (x, h_img - 10), (x, h_img - 10 - int(h)), (180, 180, 180), 1)
			
 
				+    for ang in dominant_angles:
			
 
				+        x = int(ang * (w_img - 1) / 179)
			
 
				+        cv2.line(canvas, (x, 0), (x, h_img - 1), (0, 0, 255), 2)
			
 
				+    cv2.putText(canvas, "angle (deg)", (w_img // 2 - 40, h_img - 2), cv2.FONT_HERSHEY_SIMPLEX, 0.35, (0, 0, 0), 1)
			
 
				+    return canvas
			
 
				+
			
 
				+
			
 
				+def _build_diag_hough_region_mask(
			
 
				+    gray: np.ndarray,
			
 
				+    *,
			
 
				+    midtone_low: int = 200,
			
 
				+    midtone_high: int = 254,
			
 
				+    canny_low: int = 30,
			
 
				+    canny_high: int = 100,
			
 
				+    hough_threshold: int = 30,
			
 
				+    min_line_length: int = 40,
			
 
				+    max_line_gap: int = 15,
			
 
				+    angle_ranges: Tuple[Tuple[float, float], Tuple[float, float]] = ((25.0, 65.0), (115.0, 155.0)),
			
 
				+    angle_tolerance: float = 5.0,
			
 
				+    use_angle_statistics: bool = True,
			
 
				+    secondary_peak_ratio: float = 0.35,
			
 
				+    min_length_percentile: float = 25.0,
			
 
				+    line_thickness: int = 10,
			
 
				+    band_dilate_radius: int = 12,
			
 
				+) -> Tuple[np.ndarray, Dict[str, Any]]:
			
 
				+    """
			
 
				+    方案 C：Canny + HoughLinesP + 角度直方图统计主峰，仅保留与本页水印方向一致的线段。
			
 
				+    """
			
 
				+    gray_u8 = np.asarray(gray, dtype=np.uint8)
			
 
				+    band = ((gray_u8 >= midtone_low) & (gray_u8 < midtone_high)).astype(np.uint8) * 255
			
 
				+    edges = cv2.Canny(band, int(canny_low), int(canny_high), apertureSize=3)
			
 
				+
			
 
				+    lines_p = cv2.HoughLinesP(
			
 
				+        edges,
			
 
				+        rho=1,
			
 
				+        theta=np.pi / 180,
			
 
				+        threshold=int(hough_threshold),
			
 
				+        minLineLength=int(min_line_length),
			
 
				+        maxLineGap=int(max_line_gap),
			
 
				+    )
			
 
				+
			
 
				+    line_mask = np.zeros_like(gray_u8, dtype=np.uint8)
			
 
				+    lines_all_bgr = cv2.cvtColor(gray_u8, cv2.COLOR_GRAY2BGR)
			
 
				+    lines_filt_bgr = cv2.cvtColor(gray_u8, cv2.COLOR_GRAY2BGR)
			
 
				+    diag_candidates: list = []
			
 
				+    total_lines = 0
			
 
				+
			
 
				+    if lines_p is not None:
			
 
				+        for seg in lines_p:
			
 
				+            x1, y1, x2, y2 = [int(v) for v in seg[0]]
			
 
				+            total_lines += 1
			
 
				+            ang = _line_angle_deg(x1, y1, x2, y2)
			
 
				+            length = _line_length(x1, y1, x2, y2)
			
 
				+            if not _angle_in_diagonal_ranges(ang, angle_ranges):
			
 
				+                continue
			
 
				+            diag_candidates.append((x1, y1, x2, y2, ang, length))
			
 
				+            cv2.line(lines_all_bgr, (x1, y1), (x2, y2), (128, 128, 128), 1)
			
 
				+
			
 
				+    dominant_angles: list = []
			
 
				+    hist_smooth = np.zeros(180, dtype=np.float64)
			
 
				+    if use_angle_statistics and diag_candidates:
			
 
				+        dominant_angles, hist_smooth = _find_dominant_diagonal_angles(
			
 
				+            diag_candidates,
			
 
				+            angle_ranges=angle_ranges,
			
 
				+            secondary_peak_ratio=secondary_peak_ratio,
			
 
				+        )
			
 
				+
			
 
				+    def _angle_matches(ang: float) -> bool:
			
 
				+        if not use_angle_statistics or not dominant_angles:
			
 
				+            return True
			
 
				+        return any(_angle_distance_deg(ang, d) <= angle_tolerance for d in dominant_angles)
			
 
				+
			
 
				+    angle_matched = [
			
 
				+        s for s in diag_candidates if _angle_matches(s[4])
			
 
				+    ]
			
 
				+    if angle_matched and min_length_percentile > 0:
			
 
				+        lengths = np.array([s[5] for s in angle_matched], dtype=np.float32)
			
 
				+        len_th = float(np.percentile(lengths, min_length_percentile))
			
 
				+        angle_matched = [s for s in angle_matched if s[5] >= len_th]
			
 
				+
			
 
				+    matched_keys = {(s[0], s[1], s[2], s[3]) for s in angle_matched}
			
 
				+    kept_lines: list = []
			
 
				+    for x1, y1, x2, y2, ang, _length in angle_matched:
			
 
				+        kept_lines.append((x1, y1, x2, y2, ang))
			
 
				+        cv2.line(line_mask, (x1, y1), (x2, y2), 255, thickness=int(line_thickness))
			
 
				+        cv2.line(lines_filt_bgr, (x1, y1), (x2, y2), (0, 0, 255), 2)
			
 
				+    for x1, y1, x2, y2, _ang, _length in diag_candidates:
			
 
				+        if (x1, y1, x2, y2) not in matched_keys:
			
 
				+            cv2.line(lines_filt_bgr, (x1, y1), (x2, y2), (0, 180, 255), 1)
			
 
				+
			
 
				+    geom = line_mask > 0
			
 
				+    if band_dilate_radius > 0 and np.any(geom):
			
 
				+        k = cv2.getStructuringElement(
			
 
				+            cv2.MORPH_ELLIPSE, (band_dilate_radius * 2 + 1, band_dilate_radius * 2 + 1)
			
 
				+        )
			
 
				+        geom = cv2.dilate(line_mask, k) > 0
			
 
				+
			
 
				+    info: Dict[str, Any] = {
			
 
				+        "hough_total_lines": total_lines,
			
 
				+        "hough_diag_candidates": len(diag_candidates),
			
 
				+        "hough_kept_lines": len(kept_lines),
			
 
				+        "dominant_angles": dominant_angles,
			
 
				+        "angle_tolerance": angle_tolerance,
			
 
				+        "geom_mask_ratio": float(geom.sum() / gray_u8.size),
			
 
				+        "hough_lines_bgr": lines_filt_bgr,
			
 
				+        "hough_lines_all_bgr": lines_all_bgr,
			
 
				+        "angle_histogram_bgr": _render_angle_histogram(hist_smooth, dominant_angles),
			
 
				+    }
			
 
				+    return geom, info
			
 
				+
			
 
				+
			
 
				+def _compute_block_orientation_debug_maps(
			
 
				+    gray: np.ndarray,
			
 
				+    *,
			
 
				+    block_size: int = 48,
			
 
				+) -> Tuple[np.ndarray, np.ndarray]:
			
 
				+    """分块 diag/hv 弱边缘占比图（仅 debug 热力图，0~1 float）。"""
			
 
				+    gray_f = np.asarray(gray, dtype=np.float32)
			
 
				+    bs = max(4, int(block_size))
			
 
				+    h_blocks = gray_f.shape[0] // bs
			
 
				+    w_blocks = gray_f.shape[1] // bs
			
 
				+    if h_blocks == 0 or w_blocks == 0:
			
 
				+        z = np.zeros_like(gray_f, dtype=np.float32)
			
 
				+        return z, z
			
 
				+
			
 
				+    ph, pw = h_blocks * bs, w_blocks * bs
			
 
				+    gx = cv2.Sobel(gray_f, cv2.CV_32F, 1, 0, ksize=3)
			
 
				+    gy = cv2.Sobel(gray_f, cv2.CV_32F, 0, 1, ksize=3)
			
 
				+    mag = np.sqrt(gx * gx + gy * gy)
			
 
				+    ori = np.arctan2(gy, gx) * 180.0 / np.pi
			
 
				+
			
 
				+    diag = (
			
 
				+        ((ori > 25) & (ori < 65))
			
 
				+        | ((ori > 115) & (ori < 155))
			
 
				+        | ((ori > -155) & (ori < -115))
			
 
				+        | ((ori > -65) & (ori < -25))
			
 
				+    )
			
 
				+    hv = (
			
 
				+        ((ori > -20) & (ori < 20))
			
 
				+        | ((ori > 160) | (ori < -160))
			
 
				+        | ((ori > 70) & (ori < 110))
			
 
				+        | ((ori > -110) & (ori < -70))
			
 
				+    )
			
 
				+    weak = (mag > 1) & (mag < 15)
			
 
				+
			
 
				+    def _to_blocks(arr: np.ndarray) -> np.ndarray:
			
 
				+        return (
			
 
				+            arr[:ph, :pw]
			
 
				+            .reshape(h_blocks, bs, w_blocks, bs)
			
 
				+            .transpose(0, 2, 1, 3)
			
 
				+            .reshape(h_blocks, w_blocks, -1)
			
 
				+        )
			
 
				+
			
 
				+    b_diag = _to_blocks(diag)
			
 
				+    b_hv = _to_blocks(hv)
			
 
				+    b_weak = _to_blocks(weak)
			
 
				+    diag_weak = np.sum(b_diag & b_weak, axis=2)
			
 
				+    hv_weak = np.sum(b_hv & b_weak, axis=2)
			
 
				+    total_weak = np.sum(b_weak, axis=2)
			
 
				+    with np.errstate(divide="ignore", invalid="ignore"):
			
 
				+        diag_ratio = np.where(total_weak > 0, diag_weak / total_weak, 0.0).astype(np.float32)
			
 
				+        hv_ratio = np.where(total_weak > 0, hv_weak / total_weak, 0.0).astype(np.float32)
			
 
				+
			
 
				+    diag_up = np.repeat(np.repeat(diag_ratio, bs, axis=0), bs, axis=1)
			
 
				+    hv_up = np.repeat(np.repeat(hv_ratio, bs, axis=0), bs, axis=1)
			
 
				+    diag_full = np.zeros_like(gray_f, dtype=np.float32)
			
 
				+    hv_full = np.zeros_like(gray_f, dtype=np.float32)
			
 
				+    diag_full[:ph, :pw] = diag_up
			
 
				+    hv_full[:ph, :pw] = hv_up
			
 
				+    return diag_full, hv_full
			
 
				+
			
 
				+
			
 
				+def render_ratio_heatmap(ratio_map: np.ndarray) -> np.ndarray:
			
 
				+    """将 0~1 浮点占比图转为 BGR 热力图。"""
			
 
				+    r = np.clip(np.asarray(ratio_map, dtype=np.float32), 0.0, 1.0)
			
 
				+    u8 = (r * 255).astype(np.uint8)
			
 
				+    return cv2.applyColorMap(u8, cv2.COLORMAP_JET)
			
 
				+
			
 
				+
			
 
				+def save_watermark_mask_debug_layers(
			
 
				+    image: np.ndarray,
			
 
				+    output_dir: Union[str, Path],
			
 
				+    stem: str,
			
 
				+    debug: Dict[str, Any],
			
 
				+    *,
			
 
				+    image_format: str = "png",
			
 
				+) -> Dict[str, str]:
			
 
				+    """保存分层 debug 图（方案 D）。"""
			
 
				+    out_dir = Path(output_dir)
			
 
				+    out_dir.mkdir(parents=True, exist_ok=True)
			
 
				+    fmt = (image_format or "png").lstrip(".")
			
 
				+    paths: Dict[str, str] = {}
			
 
				+
			
 
				+    def _save_overlay(name: str, mask: Optional[np.ndarray], color=(0, 0, 255)) -> None:
			
 
				+        if mask is None or not np.any(mask):
			
 
				+            return
			
 
				+        from ocr_utils.watermark.removal import render_watermark_mask_overlay
			
 
				+
			
 
				+        ov = render_watermark_mask_overlay(image, mask, color=color)
			
 
				+        p = out_dir / f"{stem}_{name}.{fmt}"
			
 
				+        cv2.imwrite(str(p), cv2.cvtColor(ov, cv2.COLOR_RGB2BGR) if ov.shape[2] == 3 else ov)
			
 
				+        paths[name] = str(p)
			
 
				+
			
 
				+    _save_overlay("wm_candidate_overlay", debug.get("wm_candidate"))
			
 
				+    _save_overlay("geom_region_overlay", debug.get("geom_region"), color=(0, 180, 255))
			
 
				+    _save_overlay("geom_candidate_overlay", debug.get("geom_candidate"), color=(0, 255, 0))
			
 
				+    _save_overlay("wm_mask_overlay", debug.get("wm_mask"), color=(255, 0, 0))
			
 
				+
			
 
				+    hough_bgr = debug.get("hough_lines_bgr")
			
 
				+    if hough_bgr is not None:
			
 
				+        p = out_dir / f"{stem}_hough_lines.{fmt}"
			
 
				+        cv2.imwrite(str(p), hough_bgr)
			
 
				+        paths["hough_lines"] = str(p)
			
 
				+
			
 
				+    hough_all = debug.get("hough_lines_all_bgr")
			
 
				+    if hough_all is not None:
			
 
				+        p = out_dir / f"{stem}_hough_lines_all.{fmt}"
			
 
				+        cv2.imwrite(str(p), hough_all)
			
 
				+        paths["hough_lines_all"] = str(p)
			
 
				+
			
 
				+    angle_hist = debug.get("angle_histogram_bgr")
			
 
				+    if angle_hist is not None:
			
 
				+        p = out_dir / f"{stem}_angle_histogram.{fmt}"
			
 
				+        cv2.imwrite(str(p), angle_hist)
			
 
				+        paths["angle_histogram"] = str(p)
			
 
				+
			
 
				+    diag_hm = debug.get("diag_ratio_heatmap")
			
 
				+    if diag_hm is not None:
			
 
				+        p = out_dir / f"{stem}_diag_ratio_heatmap.{fmt}"
			
 
				+        cv2.imwrite(str(p), diag_hm)
			
 
				+        paths["diag_ratio_heatmap"] = str(p)
			
 
				+
			
 
				+    hv_hm = debug.get("hv_ratio_heatmap")
			
 
				+    if hv_hm is not None:
			
 
				+        p = out_dir / f"{stem}_hv_ratio_heatmap.{fmt}"
			
 
				+        cv2.imwrite(str(p), hv_hm)
			
 
				+        paths["hv_ratio_heatmap"] = str(p)
			
 
				+
			
 
				+    return paths
			
 
				+
			
 
				+
			
 
				+def _build_diag_region_mask(
			
 
				+    gray: np.ndarray,
			
 
				+    *,
			
 
				+    block_size: int = 48,
			
 
				+    diag_ratio_thresh: float = 0.20,
			
 
				+    light_gray_thresh: int = 238,
			
 
				+    light_ratio_thresh: float = 0.10,
			
 
				+    min_edge_count: int = 10,
			
 
				+    dilate_radius: int = 3,
			
 
				+) -> np.ndarray:
			
 
				+    """
			
 
				+    分块梯度方向检测：返回对角线方向纹理占优的区域掩膜。
			
 
				+
			
 
				+    原理：水印是45°斜向字符，其梯度主方向在30-60°和120-150°。
			
 
				+    分块统计该方向弱边缘占比，高频块标记为水印候选区域。
			
 
				+
			
 
				+    Returns:
			
 
				+        bool ndarray, 与 gray 同形状，True=疑似斜向水印区域。
			
 
				+    """
			
 
				+    gray_f = np.asarray(gray, dtype=np.float32)
			
 
				+    img_h, img_w = gray_f.shape
			
 
				+    bs = max(4, int(block_size))
			
 
				+
			
 
				+    # Sobel 梯度
			
 
				+    gx = cv2.Sobel(gray_f, cv2.CV_32F, 1, 0, ksize=3)
			
 
				+    gy = cv2.Sobel(gray_f, cv2.CV_32F, 0, 1, ksize=3)
			
 
				+    mag = np.sqrt(gx * gx + gy * gy)
			
 
				+    ori = np.arctan2(gy, gx) * 180.0 / np.pi
			
 
				+
			
 
				+    # 对角线方向 (±45° 附近，即梯度 30-65° / 115-155°)
			
 
				+    diag = (
			
 
				+        ((ori > 25) & (ori < 65))
			
 
				+        | ((ori > 115) & (ori < 155))
			
 
				+        | ((ori > -155) & (ori < -115))
			
 
				+        | ((ori > -65) & (ori < -25))
			
 
				+    )
			
 
				+
			
 
				+    h_blocks = img_h // bs
			
 
				+    w_blocks = img_w // bs
			
 
				+    if h_blocks == 0 or w_blocks == 0:
			
 
				+        return np.zeros_like(gray, dtype=bool)
			
 
				+
			
 
				+    ph, pw = h_blocks * bs, w_blocks * bs
			
 
				+
			
 
				+    # 分块统计
			
 
				+    def _to_blocks(arr: np.ndarray) -> np.ndarray:
			
 
				+        return arr[:ph, :pw].reshape(h_blocks, bs, w_blocks, bs).transpose(0, 2, 1, 3).reshape(h_blocks, w_blocks, -1)
			
 
				+
			
 
				+    block_mag = _to_blocks(mag)
			
 
				+    block_diag = _to_blocks(diag)
			
 
				+    block_gray = _to_blocks(gray_f)
			
 
				+
			
 
				+    weak = (block_mag > 1) & (block_mag < 15)
			
 
				+    diag_weak = np.sum(block_diag & weak, axis=2)
			
 
				+    total_weak = np.sum(weak, axis=2)
			
 
				+
			
 
				+    with np.errstate(divide="ignore", invalid="ignore"):
			
 
				+        diag_ratio = np.where(total_weak > 0, diag_weak / total_weak, 0.0)
			
 
				+    light_ratio = np.mean(block_gray >= light_gray_thresh, axis=2)
			
 
				+
			
 
				+    wm_blocks = (
			
 
				+        (diag_ratio > diag_ratio_thresh)
			
 
				+        & (light_ratio > light_ratio_thresh)
			
 
				+        & (total_weak > min_edge_count)
			
 
				+    )
			
 
				+
			
 
				+    # 展开为像素掩膜
			
 
				+    wm_block_mask = np.repeat(np.repeat(wm_blocks, bs, axis=0), bs, axis=1)
			
 
				+    full_mask = np.zeros(gray_f.shape, dtype=bool)
			
 
				+    full_mask[:ph, :pw] = wm_block_mask
			
 
				+
			
 
				+    if dilate_radius > 0:
			
 
				+        k = cv2.getStructuringElement(
			
 
				+            cv2.MORPH_ELLIPSE, (dilate_radius * 2 + 1, dilate_radius * 2 + 1)
			
 
				+        )
			
 
				+        full_mask = cv2.dilate(full_mask.astype(np.uint8), k) > 0
			
 
				+
			
 
				+    return full_mask
			
 
				+
			
 
				+
			
 
				+def _build_seal_protect_mask(
			
 
				+    bgr: np.ndarray,
			
 
				+    *,
			
 
				+    hue_high: int = 15,
			
 
				+    sat_min: int = 40,
			
 
				+    value_min: int = 30,
			
 
				+) -> np.ndarray:
			
 
				+    """红色/公章区域保护掩膜（True=保护，不置白）。"""
			
 
				+    hsv = cv2.cvtColor(bgr, cv2.COLOR_BGR2HSV)
			
 
				+    lower1 = np.array([0, sat_min, value_min], dtype=np.uint8)
			
 
				+    upper1 = np.array([hue_high, 255, 255], dtype=np.uint8)
			
 
				+    lower2 = np.array([170, sat_min, value_min], dtype=np.uint8)
			
 
				+    upper2 = np.array([180, 255, 255], dtype=np.uint8)
			
 
				+    m1 = cv2.inRange(hsv, lower1, upper1)
			
 
				+    m2 = cv2.inRange(hsv, lower2, upper2)
			
 
				+    m2 = cv2.inRange(hsv, lower2, upper2)
			
 
				+    return (m1 > 0) | (m2 > 0)
			
 
				+
			
 
				+
			
 
				+def _build_text_edge_protect(
			
 
				+    gray: np.ndarray,
			
 
				+    *,
			
 
				+    edge_window: int = 5,
			
 
				+    edge_std_thresh: float = 6.0,
			
 
				+    dilate_radius: int = 1,
			
 
				+) -> np.ndarray:
			
 
				+    """基于局部方差的笔画边缘保护掩膜（True=保护，不置白）。"""
			
 
				+    local_std = _local_std_map(gray, window=edge_window)
			
 
				+    edge_mask = local_std >= edge_std_thresh
			
 
				+    if dilate_radius > 0:
			
 
				+        k = cv2.getStructuringElement(
			
 
				+            cv2.MORPH_ELLIPSE, (dilate_radius * 2 + 1, dilate_radius * 2 + 1)
			
 
				+        )
			
 
				+        edge_mask = cv2.dilate(edge_mask.astype(np.uint8), k) > 0
			
 
				+    return edge_mask.astype(bool)
			
 
				+
			
 
				+
			
 
				+def _build_watermark_mask_light_on_white(
			
 
				+    gray: np.ndarray,
			
 
				+    *,
			
 
				+    bgr: Optional[np.ndarray] = None,
			
 
				+    light_gray_low: int = 236,
			
 
				+    light_gray_high: int = 253,
			
 
				+    whiten_gray_low: int = 200,
			
 
				+    text_protect_gray_max: int = 130,
			
 
				+    text_protect_percentile: Optional[float] = None,
			
 
				+    background_threshold: int = 248,
			
 
				+    morph_close_kernel: int = 0,
			
 
				+    morph_close_iter: int = 1,
			
 
				+    morph_dilate_kernel: int = 0,
			
 
				+    morph_dilate_iter: int = 1,
			
 
				+    min_component_area: int = 200,
			
 
				+    low_variance_thresh: float = 0.0,
			
 
				+    edge_window: int = 5,
			
 
				+    direction_filter: str = "hough",
			
 
				+    debug_block_maps: bool = True,
			
 
				+    debug_block_size: int = 48,
			
 
				+    hough_midtone_low: int = 200,
			
 
				+    hough_midtone_high: int = 254,
			
 
				+    hough_canny_low: int = 30,
			
 
				+    hough_canny_high: int = 100,
			
 
				+    hough_threshold: int = 25,
			
 
				+    hough_min_line_length: int = 35,
			
 
				+    hough_max_line_gap: int = 18,
			
 
				+    hough_line_thickness: int = 12,
			
 
				+    hough_band_dilate_radius: int = 14,
			
 
				+    hough_angle_tolerance: float = 5.0,
			
 
				+    hough_use_angle_statistics: bool = True,
			
 
				+    hough_secondary_peak_ratio: float = 0.35,
			
 
				+    hough_min_length_percentile: float = 25.0,
			
 
				+    diag_block_size: int = 0,
			
 
				+    diag_ratio_thresh: float = 0.20,
			
 
				+    diag_light_ratio_thresh: float = 0.10,
			
 
				+    diag_min_edge_count: int = 10,
			
 
				+    diag_dilate_radius: int = 3,
			
 
				+    seal_protect: bool = True,
			
 
				+    seal_hue_high: int = 15,
			
 
				+    seal_sat_min: int = 40,
			
 
				+) -> Tuple[np.ndarray, Dict[str, Any]]:
			
 
				+    """
			
 
				+    白底流水水印掩膜（方案 C + E）。
			
 
				+
			
 
				+    1. Hough 斜向线段 → geom_region（几何限定区域）
			
 
				+    2. wm_candidate = 浅色带且非正文保护
			
 
				+    3. wm_mask = geom_region（置白区域由几何约束；实际白化时再 g>=light_gray_low）
			
 
				+    4. debug 输出 candidate / geom / 交集 / 热力图
			
 
				+    """
			
 
				+    gray_arr = np.asarray(gray)
			
 
				+    bg_th = int(background_threshold)
			
 
				+    low = int(light_gray_low)
			
 
				+    high = int(light_gray_high)
			
 
				+
			
 
				+    if text_protect_gray_max > 0:
			
 
				+        t_protect = float(text_protect_gray_max)
			
 
				+    else:
			
 
				+        dark = gray_arr[gray_arr < min(130, bg_th)]
			
 
				+        if dark.size > 0 and text_protect_percentile is not None:
			
 
				+            t_protect = float(np.percentile(dark, text_protect_percentile))
			
 
				+        else:
			
 
				+            t_protect = 120.0
			
 
				+    text_protect = gray_arr <= t_protect
			
 
				+    low = max(low, int(t_protect) + 25)
			
 
				+
			
 
				+    wm_candidate = (gray_arr >= low) & (gray_arr < high) & (~text_protect)
			
 
				+
			
 
				+    direction = (direction_filter or "hough").lower().strip()
			
 
				+    hough_info: Dict[str, Any] = {}
			
 
				+    geom_region = np.zeros_like(gray_arr, dtype=bool)
			
 
				+
			
 
				+    if direction == "hough":
			
 
				+        geom_region, hough_info = _build_diag_hough_region_mask(
			
 
				+            gray_arr,
			
 
				+            midtone_low=hough_midtone_low,
			
 
				+            midtone_high=hough_midtone_high,
			
 
				+            canny_low=hough_canny_low,
			
 
				+            canny_high=hough_canny_high,
			
 
				+            hough_threshold=hough_threshold,
			
 
				+            min_line_length=hough_min_line_length,
			
 
				+            max_line_gap=hough_max_line_gap,
			
 
				+            angle_tolerance=hough_angle_tolerance,
			
 
				+            use_angle_statistics=hough_use_angle_statistics,
			
 
				+            secondary_peak_ratio=hough_secondary_peak_ratio,
			
 
				+            min_length_percentile=hough_min_length_percentile,
			
 
				+            line_thickness=hough_line_thickness,
			
 
				+            band_dilate_radius=hough_band_dilate_radius,
			
 
				+        )
			
 
				+    elif diag_block_size > 0:
			
 
				+        geom_region = _build_diag_region_mask(
			
 
				+            gray_arr,
			
 
				+            block_size=diag_block_size,
			
 
				+            diag_ratio_thresh=diag_ratio_thresh,
			
 
				+            light_gray_thresh=low,
			
 
				+            light_ratio_thresh=diag_light_ratio_thresh,
			
 
				+            min_edge_count=diag_min_edge_count,
			
 
				+            dilate_radius=diag_dilate_radius,
			
 
				+        )
			
 
				+
			
 
				+    geom_candidate = geom_region & wm_candidate
			
 
				+    wm_mask = geom_region.copy()
			
 
				+
			
 
				+    if min_component_area > 0 and np.any(wm_mask):
			
 
				+        n_labels, labels, stats, _ = cv2.connectedComponentsWithStats(
			
 
				+            wm_mask.astype(np.uint8), connectivity=8
			
 
				+        )
			
 
				+        filtered = np.zeros_like(wm_mask)
			
 
				+        for i in range(1, n_labels):
			
 
				+            if stats[i, cv2.CC_STAT_AREA] >= min_component_area:
			
 
				+                filtered[labels == i] = True
			
 
				+        if np.any(filtered):
			
 
				+            wm_mask = filtered
			
 
				+        elif np.any(geom_region):
			
 
				+            wm_mask = geom_region
			
 
				+
			
 
				+    seal_mask = np.zeros_like(wm_mask, dtype=bool)
			
 
				+    if seal_protect and bgr is not None and bgr.ndim == 3:
			
 
				+        seal_mask = _build_seal_protect_mask(
			
 
				+            bgr, hue_high=seal_hue_high, sat_min=seal_sat_min
			
 
				+        )
			
 
				+        wm_mask &= ~seal_mask
			
 
				+
			
 
				+    midtone = (gray_arr >= low) & (gray_arr < high)
			
 
				+    debug: Dict[str, Any] = {
			
 
				+        "mask_mode": "light_on_white",
			
 
				+        "direction_filter": direction,
			
 
				+        "light_gray_low": low,
			
 
				+        "light_gray_high": high,
			
 
				+        "midtone_ratio": float(midtone.sum() / gray_arr.size),
			
 
				+        "wm_candidate_ratio": float(wm_candidate.sum() / gray_arr.size),
			
 
				+        "geom_mask_ratio": float(geom_region.sum() / gray_arr.size),
			
 
				+        "geom_candidate_ratio": float(geom_candidate.sum() / gray_arr.size),
			
 
				+        "wm_mask_ratio": float(wm_mask.sum() / gray_arr.size),
			
 
				+        "T_protect": t_protect,
			
 
				+        "text_protect_gray_max": text_protect_gray_max,
			
 
				+        "text_protect": text_protect,
			
 
				+        "seal_protect": seal_mask,
			
 
				+        "wm_candidate": wm_candidate,
			
 
				+        "geom_region": geom_region,
			
 
				+        "geom_candidate": geom_candidate,
			
 
				+        "diag_region": geom_region,
			
 
				+        "wm_mask": wm_mask,
			
 
				+        "whiten_gray_low": int(whiten_gray_low),
			
 
				+        "hough_lines_bgr": hough_info.get("hough_lines_bgr"),
			
 
				+        "hough_lines_all_bgr": hough_info.get("hough_lines_all_bgr"),
			
 
				+        "angle_histogram_bgr": hough_info.get("angle_histogram_bgr"),
			
 
				+        "dominant_angles": hough_info.get("dominant_angles", []),
			
 
				+        "hough_kept_lines": hough_info.get("hough_kept_lines", 0),
			
 
				+        "hough_diag_candidates": hough_info.get("hough_diag_candidates", 0),
			
 
				+        "hough_total_lines": hough_info.get("hough_total_lines", 0),
			
 
				+    }
			
 
				+
			
 
				+    if debug_block_maps:
			
 
				+        bs = debug_block_size if debug_block_size > 0 else 48
			
 
				+        diag_map, hv_map = _compute_block_orientation_debug_maps(gray_arr, block_size=bs)
			
 
				+        debug["diag_ratio_heatmap"] = render_ratio_heatmap(diag_map)
			
 
				+        debug["hv_ratio_heatmap"] = render_ratio_heatmap(hv_map)
			
 
				+
			
 
				+    return wm_mask, debug
			
 
				+
			
 
				+
			
 
				+def build_watermark_mask(
			
 
				+    gray: np.ndarray,
			
 
				+    *,
			
 
				+    bgr: Optional[np.ndarray] = None,
			
 
				+    mask_mode: str = "diagonal_midtone",
			
 
				+    light_gray_low: int = 236,
			
 
				+    light_gray_high: int = 253,
			
 
				+    whiten_gray_low: int = 200,
			
 
				+    text_protect_gray_max: int = 130,
			
 
				+    morph_close_kernel: int = 0,
			
 
				+    morph_close_iter: int = 1,
			
 
				+    morph_dilate_kernel: int = 0,
			
 
				+    morph_dilate_iter: int = 1,
			
 
				+    low_variance_thresh: float = 0.0,
			
 
				+    edge_window: int = 5,
			
 
				+    direction_filter: str = "hough",
			
 
				+    debug_block_maps: bool = True,
			
 
				+    debug_block_size: int = 48,
			
 
				+    hough_midtone_low: int = 200,
			
 
				+    hough_midtone_high: int = 254,
			
 
				+    hough_canny_low: int = 30,
			
 
				+    hough_canny_high: int = 100,
			
 
				+    hough_threshold: int = 25,
			
 
				+    hough_min_line_length: int = 35,
			
 
				+    hough_max_line_gap: int = 18,
			
 
				+    hough_line_thickness: int = 12,
			
 
				+    hough_band_dilate_radius: int = 14,
			
 
				+    hough_angle_tolerance: float = 5.0,
			
 
				+    hough_use_angle_statistics: bool = True,
			
 
				+    hough_secondary_peak_ratio: float = 0.35,
			
 
				+    hough_min_length_percentile: float = 25.0,
			
 
				+    diag_block_size: int = 0,
			
 
				+    diag_ratio_thresh: float = 0.20,
			
 
				+    diag_light_ratio_thresh: float = 0.10,
			
 
				+    diag_min_edge_count: int = 10,
			
 
				+    diag_dilate_radius: int = 3,
			
 
				+    # diagonal_midtone 参数
			
 
				+    midtone_low: int = 100,
			
 
				+    midtone_high: int = 220,
			
 
				+    remove_horizontal_vertical: bool = True,
			
 
				+    diagonal_enhance: bool = True,
			
 
				+    diagonal_kernel_length: int = 25,
			
 
				+    horizontal_kernel_length: int = 35,
			
 
				+    vertical_kernel_length: int = 35,
			
 
				+    morph_open_kernel: int = 2,
			
 
				+    dmorph_close_kernel: int = 3,
			
 
				+    min_component_area: int = 200,
			
 
				+    text_protect_percentile: float = 10.0,
			
 
				+    background_threshold: int = 248,
			
 
				+    seal_protect: bool = True,
			
 
				+    seal_hue_high: int = 15,
			
 
				+    seal_sat_min: int = 40,
			
 
				+) -> Tuple[np.ndarray, Dict[str, Any]]:
			
 
				+    """
			
 
				+    构建水印掩膜 wm_mask（True=疑似水印像素）。
			
 
				+
			
 
				+    mask_mode:
			
 
				+        light_on_white — Hough 斜向几何带 + 浅色白化（方案 C/E）
			
 
				+        diagonal_midtone — 中间调 + 斜向形态学（旧逻辑）
			
 
				+    """
			
 
				+    gray = np.asarray(gray)
			
 
				+    if gray.ndim != 2:
			
 
				+        raise ValueError("build_watermark_mask expects single-channel grayscale")
			
 
				+
			
 
				+    mode = (mask_mode or "light_on_white").lower().strip()
			
 
				+    if mode == "light_on_white":
			
 
				+        return _build_watermark_mask_light_on_white(
			
 
				+            gray,
			
 
				+            bgr=bgr,
			
 
				+            light_gray_low=light_gray_low,
			
 
				+            light_gray_high=light_gray_high,
			
 
				+            whiten_gray_low=whiten_gray_low,
			
 
				+            text_protect_gray_max=text_protect_gray_max,
			
 
				+            text_protect_percentile=text_protect_percentile,
			
 
				+            background_threshold=background_threshold,
			
 
				+            morph_close_kernel=morph_close_kernel,
			
 
				+            morph_close_iter=morph_close_iter,
			
 
				+            morph_dilate_kernel=morph_dilate_kernel,
			
 
				+            morph_dilate_iter=morph_dilate_iter,
			
 
				+            low_variance_thresh=low_variance_thresh,
			
 
				+            edge_window=edge_window,
			
 
				+            min_component_area=min_component_area,
			
 
				+            direction_filter=direction_filter,
			
 
				+            debug_block_maps=debug_block_maps,
			
 
				+            debug_block_size=debug_block_size,
			
 
				+            hough_midtone_low=hough_midtone_low,
			
 
				+            hough_midtone_high=hough_midtone_high,
			
 
				+            hough_canny_low=hough_canny_low,
			
 
				+            hough_canny_high=hough_canny_high,
			
 
				+            hough_threshold=hough_threshold,
			
 
				+            hough_min_line_length=hough_min_line_length,
			
 
				+            hough_max_line_gap=hough_max_line_gap,
			
 
				+            hough_line_thickness=hough_line_thickness,
			
 
				+            hough_band_dilate_radius=hough_band_dilate_radius,
			
 
				+            hough_angle_tolerance=hough_angle_tolerance,
			
 
				+            hough_use_angle_statistics=hough_use_angle_statistics,
			
 
				+            hough_secondary_peak_ratio=hough_secondary_peak_ratio,
			
 
				+            hough_min_length_percentile=hough_min_length_percentile,
			
 
				+            diag_block_size=diag_block_size,
			
 
				+            diag_ratio_thresh=diag_ratio_thresh,
			
 
				+            diag_light_ratio_thresh=diag_light_ratio_thresh,
			
 
				+            diag_min_edge_count=diag_min_edge_count,
			
 
				+            diag_dilate_radius=diag_dilate_radius,
			
 
				+            seal_protect=seal_protect,
			
 
				+            seal_hue_high=seal_hue_high,
			
 
				+            seal_sat_min=seal_sat_min,
			
 
				+        )
			
 
				+
			
 
				+    midtone = (gray > midtone_low) & (gray < midtone_high)
			
 
				+    mid_u8 = (midtone.astype(np.uint8)) * 255
			
 
				+
			
 
				+    horiz = np.zeros_like(midtone, dtype=bool)
			
 
				+    vert = np.zeros_like(midtone, dtype=bool)
			
 
				+    if remove_horizontal_vertical:
			
 
				+        kh = cv2.getStructuringElement(
			
 
				+            cv2.MORPH_RECT, (max(3, horizontal_kernel_length), 1)
			
 
				+        )
			
 
				+        kv = cv2.getStructuringElement(
			
 
				+            cv2.MORPH_RECT, (1, max(3, vertical_kernel_length))
			
 
				+        )
			
 
				+        horiz = cv2.morphologyEx(mid_u8, cv2.MORPH_OPEN, kh) > 0
			
 
				+        vert = cv2.morphologyEx(mid_u8, cv2.MORPH_OPEN, kv) > 0
			
 
				+
			
 
				+    # 中间调去掉明显横竖线（保留斜向水印）
			
 
				+    candidate = midtone & ~(horiz | vert)
			
 
				+
			
 
				+    if diagonal_enhance:
			
 
				+        k45 = _line_structuring_kernel(diagonal_kernel_length, 45)
			
 
				+        k135 = _line_structuring_kernel(diagonal_kernel_length, 135)
			
 
				+        d45 = cv2.morphologyEx(mid_u8, cv2.MORPH_OPEN, k45) > 0
			
 
				+        d135 = cv2.morphologyEx(mid_u8, cv2.MORPH_OPEN, k135) > 0
			
 
				+        direction = d45 | d135
			
 
				+        dilate_k = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (7, 7))
			
 
				+        near_diag = cv2.dilate(direction.astype(np.uint8), dilate_k) > 0
			
 
				+        # 斜向结构足够时收窄到斜向附近；否则保留「中间调减横竖」结果
			
 
				+        if near_diag.sum() > gray.size * 0.001:
			
 
				+            candidate = candidate & near_diag
			
 
				+
			
 
				+    cand_u8 = (candidate.astype(np.uint8)) * 255
			
 
				+    if morph_open_kernel > 0:
			
 
				+        k_open = cv2.getStructuringElement(
			
 
				+            cv2.MORPH_ELLIPSE, (morph_open_kernel, morph_open_kernel)
			
 
				+        )
			
 
				+        cand_u8 = cv2.morphologyEx(cand_u8, cv2.MORPH_OPEN, k_open)
			
 
				+    if dmorph_close_kernel > 0:
			
 
				+        k_close = cv2.getStructuringElement(
			
 
				+            cv2.MORPH_ELLIPSE, (dmorph_close_kernel, dmorph_close_kernel)
			
 
				+        )
			
 
				+        cand_u8 = cv2.morphologyEx(cand_u8, cv2.MORPH_CLOSE, k_close)
			
 
				+
			
 
				+    wm_mask = cand_u8 > 0
			
 
				+
			
 
				+    if min_component_area > 0:
			
 
				+        n_labels, labels, stats, _ = cv2.connectedComponentsWithStats(
			
 
				+            wm_mask.astype(np.uint8), connectivity=8
			
 
				+        )
			
 
				+        filtered = np.zeros_like(wm_mask)
			
 
				+        for i in range(1, n_labels):
			
 
				+            if stats[i, cv2.CC_STAT_AREA] >= min_component_area:
			
 
				+                filtered[labels == i] = True
			
 
				+        wm_mask = filtered
			
 
				+
			
 
				+    non_bg = gray[gray < background_threshold]
			
 
				+    if non_bg.size > 0:
			
 
				+        t_protect = float(np.percentile(non_bg, text_protect_percentile))
			
 
				+    else:
			
 
				+        t_protect = 85.0
			
 
				+    t_protect = max(t_protect, float(midtone_low))
			
 
				+    text_protect = gray <= t_protect
			
 
				+
			
 
				+    midtone_ratio = float(midtone.sum() / gray.size)
			
 
				+    wm_ratio = float(wm_mask.sum() / gray.size)
			
 
				+
			
 
				+    # 掩膜过小：回退为「中间调减横竖」或整块中间调（满版斜纹水印常见）
			
 
				+    min_wm_ratio = max(0.005, midtone_ratio * 0.12)
			
 
				+    if wm_ratio < min_wm_ratio:
			
 
				+        relaxed = midtone & ~(horiz | vert) & (~text_protect)
			
 
				+        if relaxed.sum() / gray.size < min_wm_ratio:
			
 
				+            relaxed = midtone & (~text_protect)
			
 
				+        wm_mask = relaxed
			
 
				+        wm_ratio = float(wm_mask.sum() / gray.size)
			
 
				+
			
 
				+    seal_mask = np.zeros_like(wm_mask, dtype=bool)
			
 
				+    if seal_protect and bgr is not None and bgr.ndim == 3:
			
 
				+        seal_mask = _build_seal_protect_mask(
			
 
				+            bgr, hue_high=seal_hue_high, sat_min=seal_sat_min
			
 
				+        )
			
 
				+
			
 
				+    debug: Dict[str, Any] = {
			
 
				+        "mask_mode": "diagonal_midtone",
			
 
				+        "midtone_ratio": midtone_ratio,
			
 
				+        "wm_mask_ratio": wm_ratio,
			
 
				+        "T_protect": t_protect,
			
 
				+        "text_protect": text_protect,
			
 
				+        "seal_protect": seal_mask,
			
 
				+        "midtone_mask": midtone,
			
 
				+        "wm_mask": wm_mask,
			
 
				+    }
			
 
				+    return wm_mask, debug
			
 
				+
			
 
				+
			
 
				+def remove_watermark_masked_adaptive(
			
 
				+    gray: np.ndarray,
			
 
				+    *,
			
 
				+    bgr: Optional[np.ndarray] = None,
			
 
				+    mask_cfg: Optional[Dict[str, Any]] = None,
			
 
				+    adaptive_cfg: Optional[Dict[str, Any]] = None,
			
 
				+    threshold_fallback: int = 175,
			
 
				+    morph_close_kernel: int = 0,
			
 
				+) -> Tuple[np.ndarray, Dict[str, Any]]:
			
 
				+    """
			
 
				+    掩膜内置白（whiten_mode=mask_fill）或掩膜内动态阈值（threshold_in_mask）。
			
 
				+
			
 
				+    掩膜为空时回退全局 threshold_fallback。
			
 
				+    """
			
 
				+    gray = np.asarray(gray).copy()
			
 
				+    mcfg: Dict[str, Any] = {
			
 
				+        "mask_mode": "light_on_white",
			
 
				+        "light_gray_low": 236,
			
 
				+        "light_gray_high": 253,
			
 
				+        "whiten_gray_low": 200,
			
 
				+        "text_protect_gray_max": 130,
			
 
				+        "morph_close_kernel": 0,
			
 
				+        "morph_close_iter": 1,
			
 
				+        "morph_dilate_kernel": 0,
			
 
				+        "morph_dilate_iter": 1,
			
 
				+        "low_variance_thresh": 0.0,
			
 
				+        "edge_window": 5,
			
 
				+        "min_component_area": 200,
			
 
				+        "direction_filter": "hough",
			
 
				+        "debug_block_maps": True,
			
 
				+        "debug_block_size": 48,
			
 
				+        "hough_midtone_low": 200,
			
 
				+        "hough_midtone_high": 254,
			
 
				+        "hough_canny_low": 30,
			
 
				+        "hough_canny_high": 100,
			
 
				+        "hough_threshold": 25,
			
 
				+        "hough_min_line_length": 35,
			
 
				+        "hough_max_line_gap": 18,
			
 
				+        "hough_line_thickness": 12,
			
 
				+        "hough_band_dilate_radius": 14,
			
 
				+        "hough_angle_tolerance": 5.0,
			
 
				+        "hough_use_angle_statistics": True,
			
 
				+        "hough_secondary_peak_ratio": 0.35,
			
 
				+        "hough_min_length_percentile": 25.0,
			
 
				+        "diag_block_size": 0,
			
 
				+        "diag_ratio_thresh": 0.20,
			
 
				+        "diag_light_ratio_thresh": 0.10,
			
 
				+        "diag_min_edge_count": 10,
			
 
				+        "diag_dilate_radius": 3,
			
 
				+        "midtone_low": 100,
			
 
				+        "midtone_high": 220,
			
 
				+        "remove_horizontal_vertical": True,
			
 
				+        "diagonal_enhance": True,
			
 
				+        "diagonal_kernel_length": 25,
			
 
				+        "horizontal_kernel_length": 35,
			
 
				+        "vertical_kernel_length": 35,
			
 
				+        "morph_open_kernel": 2,
			
 
				+        "dmorph_close_kernel": 3,
			
 
				+        "text_protect_percentile": 10.0,
			
 
				+        "background_threshold": 248,
			
 
				+        "seal_protect": True,
			
 
				+        "seal_hue_high": 15,
			
 
				+        "seal_sat_min": 40,
			
 
				+    }
			
 
				+    mcfg.update(mask_cfg or {})
			
 
				+    mask_mode = str(mcfg.get("mask_mode", "light_on_white")).lower().strip()
			
 
				+
			
 
				+    # light_on_white 默认 mask_fill
			
 
				+    acfg: Dict[str, Any] = {
			
 
				+        "whiten_mode": None,
			
 
				+        "text_percentile": 10.0,
			
 
				+        "watermark_percentile": 88.0,
			
 
				+        "background_percentile": 95.0,
			
 
				+        "background_threshold": 248,
			
 
				+        "wm_margin": 12,
			
 
				+        "text_protect_max": 120,
			
 
				+    }
			
 
				+    acfg.update(adaptive_cfg or {})
			
 
				+    whiten_mode = acfg.get("whiten_mode")
			
 
				+    if not whiten_mode:
			
 
				+        whiten_mode = (
			
 
				+            "mask_fill"
			
 
				+            if mask_mode == "light_on_white"
			
 
				+            else "threshold_in_mask"
			
 
				+        )
			
 
				+    whiten_mode = str(whiten_mode).lower().strip()
			
 
				+
			
 
				+    wm_mask, debug = build_watermark_mask(gray, bgr=bgr, **mcfg)
			
 
				+
			
 
				+    if not np.any(wm_mask):
			
 
				+        cleaned = gray.copy()
			
 
				+        cleaned[gray > threshold_fallback] = 255
			
 
				+        debug["mode"] = "fallback_threshold"
			
 
				+        debug["threshold_fallback"] = threshold_fallback
			
 
				+        if morph_close_kernel > 0:
			
 
				+            kernel = np.ones((morph_close_kernel, morph_close_kernel), np.uint8)
			
 
				+            cleaned = cv2.morphologyEx(cleaned, cv2.MORPH_CLOSE, kernel)
			
 
				+        return cleaned, debug
			
 
				+
			
 
				+    bg_th = int(acfg["background_threshold"])
			
 
				+    bg_pixels = gray[gray >= bg_th]
			
 
				+    if bg_pixels.size > 0:
			
 
				+        b_level = float(np.percentile(bg_pixels, acfg["background_percentile"]))
			
 
				+    else:
			
 
				+        b_level = 250.0
			
 
				+
			
 
				+    if mask_mode == "light_on_white":
			
 
				+        t_protect = float(debug.get("T_protect", 150.0))
			
 
				+    else:
			
 
				+        non_bg = gray[gray < bg_th]
			
 
				+        if non_bg.size > 0:
			
 
				+            t_protect = float(np.percentile(non_bg, acfg["text_percentile"]))
			
 
				+        else:
			
 
				+            t_protect = float(debug.get("T_protect", 85.0))
			
 
				+        t_protect = min(t_protect, float(acfg["text_protect_max"]))
			
 
				+        t_protect = max(t_protect, float(mcfg.get("midtone_low", 100)))
			
 
				+
			
 
				+    text_protect = debug["text_protect"]
			
 
				+    seal_protect = debug["seal_protect"]
			
 
				+    t_wm: Optional[float] = None
			
 
				+
			
 
				+    if whiten_mode == "mask_fill":
			
 
				+        # 几何带内：g>=whiten_gray_low 置白；g<=130 正文硬保护（方案 E）
			
 
				+        wm_gray_low = float(
			
 
				+            mcfg.get("whiten_gray_low", debug.get("whiten_gray_low", 200))
			
 
				+        )
			
 
				+        to_white = (
			
 
				+            wm_mask
			
 
				+            & (gray >= wm_gray_low)
			
 
				+            & (gray < int(mcfg.get("light_gray_high", 254)))
			
 
				+            & (~text_protect)
			
 
				+            & (~seal_protect)
			
 
				+        )
			
 
				+    else:
			
 
				+        mask_vals = gray[wm_mask]
			
 
				+        if mask_vals.size > 0:
			
 
				+            t_wm = float(np.percentile(mask_vals, acfg["watermark_percentile"]))
			
 
				+        else:
			
 
				+            t_wm = t_protect + 0.45 * (b_level - t_protect)
			
 
				+        margin = float(acfg["wm_margin"])
			
 
				+        t_wm = max(t_wm, t_protect + margin)
			
 
				+        t_wm = min(t_wm, b_level - 3.0)
			
 
				+        t_wm = min(t_wm, float(mcfg.get("midtone_high", 220)) - 5.0)
			
 
				+        to_white = wm_mask & (gray >= t_wm) & (~text_protect) & (~seal_protect)
			
 
				+
			
 
				+    cleaned = gray.copy()
			
 
				+    cleaned[to_white] = 255
			
 
				+
			
 
				+    if morph_close_kernel > 0:
			
 
				+        kernel = np.ones((morph_close_kernel, morph_close_kernel), np.uint8)
			
 
				+        cleaned = cv2.morphologyEx(cleaned, cv2.MORPH_CLOSE, kernel)
			
 
				+
			
 
				+    debug.update(
			
 
				+        {
			
 
				+            "mode": "masked_adaptive",
			
 
				+            "mask_mode": mask_mode,
			
 
				+            "whiten_mode": whiten_mode,
			
 
				+            "T_wm": t_wm,
			
 
				+            "T_protect": t_protect,
			
 
				+            "B_level": b_level,
			
 
				+            "white_pixel_ratio": float(to_white.sum() / gray.size),
			
 
				+            "threshold_fallback": threshold_fallback,
			
 
				+        }
			
 
				+    )
			
 
				+    return cleaned, debug
			
 
				+
			
 
				+
			
 
				+def _image_to_gray_and_bgr(
			
 
				+    image: Union[np.ndarray, Image.Image],
			
 
				+) -> Tuple[np.ndarray, Optional[np.ndarray]]:
			
 
				+    """统一为灰度 + 可选 BGR（用于掩膜公章保护）。"""
			
 
				+    if isinstance(image, Image.Image):
			
 
				+        pil_img = image.convert("RGB") if image.mode == "RGBA" else image
			
 
				+        np_img = np.array(pil_img)
			
 
				+        np_img = cv2.cvtColor(np_img, cv2.COLOR_RGB2BGR)
			
 
				+    else:
			
 
				+        np_img = image.copy()
			
 
				+
			
 
				+    if np_img.ndim == 3:
			
 
				+        bgr = np_img
			
 
				+        gray = cv2.cvtColor(np_img, cv2.COLOR_BGR2GRAY)
			
 
				+    else:
			
 
				+        bgr = None
			
 
				+        gray = np_img
			
 
				+    return gray, bgr
			
 
				+
			
--- a/ocr_utils/watermark/contrast.py
+++ b/ocr_utils/watermark/contrast.py
@@ -0,0 +1,139 @@
 
				+"""水印 对比度增强（由 ocr_utils.watermark_utils 迁入）。"""
			
 
				+from __future__ import annotations
			
 
				+
			
 
				+import json
			
 
				+import re
			
 
				+from pathlib import Path
			
 
				+from typing import Any, Dict, Optional, Tuple, Union
			
 
				+
			
 
				+import cv2
			
 
				+import numpy as np
			
 
				+from loguru import logger
			
 
				+from PIL import Image
			
 
				+
			
 
				+def _enhance_text_restore(
			
 
				+    gray: np.ndarray,
			
 
				+    *,
			
 
				+    background_threshold: int = 248,
			
 
				+    text_lo_percentile: float = 1.0,
			
 
				+    text_hi_percentile: float = 99.0,
			
 
				+    text_black_target: int = 85,
			
 
				+) -> np.ndarray:
			
 
				+    """
			
 
				+    仅对非背景像素做动态范围压缩，将最深笔画拉向 text_black_target（默认 ~85，接近扫描件原图）。
			
 
				+
			
 
				+    背景（>= background_threshold）保持白色，避免整图 gamma 导致背景发灰。
			
 
				+    """
			
 
				+    result = gray.copy()
			
 
				+    bg_th = int(np.clip(background_threshold, 200, 255))
			
 
				+    text_mask = gray < bg_th
			
 
				+    if not np.any(text_mask):
			
 
				+        return result
			
 
				+
			
 
				+    vals = gray[text_mask].astype(np.float32)
			
 
				+    lo = float(np.percentile(vals, text_lo_percentile))
			
 
				+    hi = float(np.percentile(vals, text_hi_percentile))
			
 
				+    target = int(np.clip(text_black_target, 10, 200))
			
 
				+    if hi <= lo + 1.0:
			
 
				+        return result
			
 
				+
			
 
				+    stretched = (vals - lo) * target / (hi - lo)
			
 
				+    result[text_mask] = np.clip(stretched, 0, 255).astype(np.uint8)
			
 
				+    return result
			
 
				+
			
 
				+
			
 
				+def enhance_document_contrast(
			
 
				+    gray: np.ndarray,
			
 
				+    method: str = "text_restore",
			
 
				+    *,
			
 
				+    clip_limit: float = 2.0,
			
 
				+    tile_grid_size: int = 8,
			
 
				+    gamma: float = 0.85,
			
 
				+    black_percentile: float = 2.0,
			
 
				+    white_percentile: float = 98.0,
			
 
				+    background_threshold: int = 248,
			
 
				+    text_lo_percentile: float = 1.0,
			
 
				+    text_hi_percentile: float = 99.0,
			
 
				+    text_black_target: int = 85,
			
 
				+) -> np.ndarray:
			
 
				+    """
			
 
				+    文档灰度图对比度增强（常用于去水印后恢复笔画深度）。
			
 
				+
			
 
				+    Args:
			
 
				+        gray: 单通道 uint8 灰度图
			
 
				+        method: text_restore | clahe | gamma | linear
			
 
				+        clip_limit: CLAHE 对比度限制
			
 
				+        tile_grid_size: CLAHE 分块大小
			
 
				+        gamma: gamma 校正指数，<1 加深文字（去水印后发浅时适用）
			
 
				+        black_percentile: linear 拉伸下分位（映射到 0）
			
 
				+        white_percentile: linear 拉伸上分位（映射到 255）
			
 
				+        background_threshold: text_restore 背景阈值（>= 视为白底不处理）
			
 
				+        text_lo_percentile: text_restore 笔画下分位
			
 
				+        text_hi_percentile: text_restore 笔画上分位（映射到 text_black_target）
			
 
				+        text_black_target: text_restore 最深笔画目标灰度（越小越深，建议 75~95）
			
 
				+
			
 
				+    Returns:
			
 
				+        增强后的灰度图
			
 
				+    """
			
 
				+    if gray is None or gray.size == 0:
			
 
				+        return gray
			
 
				+    if gray.ndim != 2:
			
 
				+        raise ValueError("enhance_document_contrast expects single-channel grayscale image")
			
 
				+
			
 
				+    method = (method or "text_restore").lower().strip()
			
 
				+
			
 
				+    if method == "text_restore":
			
 
				+        return _enhance_text_restore(
			
 
				+            gray,
			
 
				+            background_threshold=background_threshold,
			
 
				+            text_lo_percentile=text_lo_percentile,
			
 
				+            text_hi_percentile=text_hi_percentile,
			
 
				+            text_black_target=text_black_target,
			
 
				+        )
			
 
				+
			
 
				+    if method == "gamma":
			
 
				+        gamma = max(0.1, min(float(gamma), 3.0))
			
 
				+        inv_gamma = 1.0 / gamma
			
 
				+        table = np.array(
			
 
				+            [((i / 255.0) ** inv_gamma) * 255 for i in range(256)],
			
 
				+            dtype=np.uint8,
			
 
				+        )
			
 
				+        return cv2.LUT(gray, table)
			
 
				+
			
 
				+    if method == "linear":
			
 
				+        p_low = float(np.percentile(gray, black_percentile))
			
 
				+        p_high = float(np.percentile(gray, white_percentile))
			
 
				+        if p_high <= p_low + 1.0:
			
 
				+            return gray
			
 
				+        stretched = (gray.astype(np.float32) - p_low) * 255.0 / (p_high - p_low)
			
 
				+        return np.clip(stretched, 0, 255).astype(np.uint8)
			
 
				+
			
 
				+    # 默认 CLAHE：局部对比度，适合扫描件
			
 
				+    tile = max(2, int(tile_grid_size))
			
 
				+    clahe = cv2.createCLAHE(
			
 
				+        clipLimit=max(0.1, float(clip_limit)),
			
 
				+        tileGridSize=(tile, tile),
			
 
				+    )
			
 
				+    return clahe.apply(gray)
			
 
				+
			
 
				+
			
 
				+def apply_contrast_enhancement_config(
			
 
				+    gray: np.ndarray,
			
 
				+    contrast_cfg: Optional[Dict[str, Any]],
			
 
				+) -> np.ndarray:
			
 
				+    """按配置字典应用对比度增强；未启用时原样返回。"""
			
 
				+    if not contrast_cfg or not contrast_cfg.get("enabled", False):
			
 
				+        return gray
			
 
				+    return enhance_document_contrast(
			
 
				+        gray,
			
 
				+        method=contrast_cfg.get("method", "text_restore"),
			
 
				+        clip_limit=contrast_cfg.get("clip_limit", 2.0),
			
 
				+        tile_grid_size=contrast_cfg.get("tile_grid_size", 8),
			
 
				+        gamma=contrast_cfg.get("gamma", 0.85),
			
 
				+        black_percentile=contrast_cfg.get("black_percentile", 2.0),
			
 
				+        white_percentile=contrast_cfg.get("white_percentile", 98.0),
			
 
				+        background_threshold=contrast_cfg.get("background_threshold", 248),
			
 
				+        text_lo_percentile=contrast_cfg.get("text_lo_percentile", 1.0),
			
 
				+        text_hi_percentile=contrast_cfg.get("text_hi_percentile", 99.0),
			
 
				+        text_black_target=contrast_cfg.get("text_black_target", 75),
			
 
				+    )
			
--- a/ocr_utils/watermark/debug.py
+++ b/ocr_utils/watermark/debug.py
@@ -0,0 +1,129 @@
 
				+"""水印 调试图保存（由 ocr_utils.watermark_utils 迁入）。"""
			
 
				+from __future__ import annotations
			
 
				+
			
 
				+import json
			
 
				+import re
			
 
				+from pathlib import Path
			
 
				+from typing import Any, Dict, Optional, Tuple, Union
			
 
				+
			
 
				+import cv2
			
 
				+import numpy as np
			
 
				+from loguru import logger
			
 
				+from PIL import Image
			
 
				+
			
 
				+from ocr_utils.watermark.removal import render_watermark_mask_overlay
			
 
				+
			
 
				+def _image_to_bgr_for_debug(img: np.ndarray) -> np.ndarray:
			
 
				+    """将 ndarray 转为 BGR，供 cv2.imwrite 使用。"""
			
 
				+    if img.ndim == 2:
			
 
				+        return cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
			
 
				+    out = img.copy()
			
 
				+    if out.shape[2] == 3:
			
 
				+        return cv2.cvtColor(out, cv2.COLOR_RGB2BGR)
			
 
				+    return out
			
 
				+
			
 
				+
			
 
				+def save_watermark_removal_debug(
			
 
				+    before: Union[np.ndarray, Image.Image],
			
 
				+    after: Union[np.ndarray, Image.Image],
			
 
				+    output_dir: Union[str, Path],
			
 
				+    page_name: str,
			
 
				+    *,
			
 
				+    processing_params: Optional[Dict[str, Any]] = None,
			
 
				+    image_format: str = "png",
			
 
				+    save_compare: bool = True,
			
 
				+    subdir: str = "watermark_removal",
			
 
				+    mask_overlay: Optional[np.ndarray] = None,
			
 
				+) -> Dict[str, str]:
			
 
				+    """
			
 
				+    保存去水印调试图（before / after / compare / meta.json）。
			
 
				+
			
 
				+    与 universal_doc_parser 的 module debug 目录结构一致：
			
 
				+    ``{output_dir}/debug/{subdir}/``
			
 
				+
			
 
				+    Args:
			
 
				+        before: 处理前图像（RGB/BGR/灰度）
			
 
				+        after: 处理后图像
			
 
				+        output_dir: 输出根目录（通常为 pipeline 或工具的输出目录）
			
 
				+        page_name: 文件名前缀（如 ``doc_page_002``）
			
 
				+        processing_params: 写入 meta.json 的参数（threshold、contrast_enhancement 等）
			
 
				+        image_format: 图片格式，png/jpg
			
 
				+        save_compare: 是否保存左右拼接对比图
			
 
				+        subdir: debug 根目录下的子目录名（默认 watermark_removal）
			
 
				+
			
 
				+    Returns:
			
 
				+        已保存文件路径字典（before/after/compare/meta，未保存的键省略）
			
 
				+    """
			
 
				+    if isinstance(before, Image.Image):
			
 
				+        before = np.array(before)
			
 
				+    if isinstance(after, Image.Image):
			
 
				+        after = np.array(after)
			
 
				+
			
 
				+    from ocr_utils.module_debug_viz import resolve_module_debug_dir
			
 
				+
			
 
				+    debug_dir = resolve_module_debug_dir(output_dir, subdir)
			
 
				+
			
 
				+    fmt = (image_format or "png").lstrip(".")
			
 
				+    before_bgr = _image_to_bgr_for_debug(before)
			
 
				+    after_bgr = _image_to_bgr_for_debug(after)
			
 
				+
			
 
				+    paths: Dict[str, str] = {}
			
 
				+    before_path = debug_dir / f"{page_name}_watermark_before.{fmt}"
			
 
				+    after_path = debug_dir / f"{page_name}_watermark_after.{fmt}"
			
 
				+    cv2.imwrite(str(before_path), before_bgr)
			
 
				+    cv2.imwrite(str(after_path), after_bgr)
			
 
				+    paths["before"] = str(before_path)
			
 
				+    paths["after"] = str(after_path)
			
 
				+
			
 
				+    if save_compare:
			
 
				+        h = max(before_bgr.shape[0], after_bgr.shape[0])
			
 
				+        if before_bgr.shape[0] != h:
			
 
				+            before_bgr = cv2.resize(before_bgr, (before_bgr.shape[1], h))
			
 
				+        if after_bgr.shape[0] != h:
			
 
				+            after_bgr = cv2.resize(after_bgr, (after_bgr.shape[1], h))
			
 
				+        compare = np.hstack([before_bgr, after_bgr])
			
 
				+        compare_path = debug_dir / f"{page_name}_watermark_compare.{fmt}"
			
 
				+        cv2.imwrite(str(compare_path), compare)
			
 
				+        paths["compare"] = str(compare_path)
			
 
				+        logger.info(f"Saved watermark compare: {compare_path}")
			
 
				+
			
 
				+    if mask_overlay is not None:
			
 
				+        mask_bgr = _image_to_bgr_for_debug(mask_overlay)
			
 
				+        mask_path = debug_dir / f"{page_name}_watermark_mask.{fmt}"
			
 
				+        cv2.imwrite(str(mask_path), mask_bgr)
			
 
				+        paths["mask"] = str(mask_path)
			
 
				+
			
 
				+    meta: Dict[str, Any] = {"page_name": page_name}
			
 
				+    if processing_params:
			
 
				+        _skip_meta = (
			
 
				+            "midtone_mask",
			
 
				+            "wm_mask",
			
 
				+            "wm_candidate",
			
 
				+            "geom_region",
			
 
				+            "geom_candidate",
			
 
				+            "diag_region",
			
 
				+            "text_protect",
			
 
				+            "seal_protect",
			
 
				+            "hough_lines_bgr",
			
 
				+            "diag_ratio_heatmap",
			
 
				+            "hv_ratio_heatmap",
			
 
				+        )
			
 
				+        meta_params = {
			
 
				+            k: v
			
 
				+            for k, v in processing_params.items()
			
 
				+            if k not in _skip_meta
			
 
				+        }
			
 
				+        meta.update(meta_params)
			
 
				+    else:
			
 
				+        meta.update({})
			
 
				+    meta["before"] = paths["before"]
			
 
				+    meta["after"] = paths["after"]
			
 
				+    if "compare" in paths:
			
 
				+        meta["compare"] = paths["compare"]
			
 
				+
			
 
				+    meta_path = debug_dir / f"{page_name}_watermark_meta.json"
			
 
				+    meta_path.write_text(json.dumps(meta, ensure_ascii=False, indent=2), encoding="utf-8")
			
 
				+    paths["meta"] = str(meta_path)
			
 
				+
			
 
				+    logger.info(f"Saved watermark debug: {before_path}, {after_path}")
			
 
				+    return paths
			
--- a/ocr_utils/watermark/pdf.py
+++ b/ocr_utils/watermark/pdf.py
@@ -0,0 +1,226 @@
 
				+"""水印 PDF XObject 水印（由 ocr_utils.watermark_utils 迁入）。"""
			
 
				+from __future__ import annotations
			
 
				+
			
 
				+import json
			
 
				+import re
			
 
				+from pathlib import Path
			
 
				+from typing import Any, Dict, Optional, Tuple, Union
			
 
				+
			
 
				+import cv2
			
 
				+import numpy as np
			
 
				+from loguru import logger
			
 
				+from PIL import Image
			
 
				+
			
 
				+def _is_watermark_xobj(doc, xref: int, obj_str: str) -> bool:
			
 
				+    """
			
 
				+    判断一个 Form XObject 是否为水印。
			
 
				+
			
 
				+    启发式规则（满足其一即视为水印）：
			
 
				+    1. 含旋转变换矩阵（cm 指令 sin/cos 分量非零），无论是否有 /Group
			
 
				+    2. 有透明度组（/Group）且内容流包含透明度操作符（ca/CA）
			
 
				+    3. 有透明度组且内容流体积 > 2KB（大量重复绘图 = 平铺水印）
			
 
				+    """
			
 
				+    if "/Form" not in obj_str:
			
 
				+        return False
			
 
				+
			
 
				+    try:
			
 
				+        stream = doc.xref_stream(xref)
			
 
				+        if not stream:
			
 
				+            return False
			
 
				+        stream_text = stream.decode("latin-1", errors="ignore")
			
 
				+    except Exception:
			
 
				+        return False
			
 
				+
			
 
				+    has_group = "/Group" in obj_str
			
 
				+
			
 
				+    cm_pattern = re.compile(
			
 
				+        r"([-\d.]+)\s+([-\d.]+)\s+([-\d.]+)\s+([-\d.]+)\s+[-\d.]+\s+[-\d.]+\s+cm"
			
 
				+    )
			
 
				+    for m in cm_pattern.finditer(stream_text):
			
 
				+        a, b, c, d = float(m.group(1)), float(m.group(2)), float(m.group(3)), float(m.group(4))
			
 
				+        if abs(b) > 0.1 or abs(c) > 0.1:
			
 
				+            return True
			
 
				+
			
 
				+    if not has_group:
			
 
				+        return False
			
 
				+
			
 
				+    if re.search(r'\b(ca|CA)\s+[0-9.]+', stream_text) or re.search(r'[0-9.]+\s+(ca|CA)\b', stream_text):
			
 
				+        return True
			
 
				+
			
 
				+    if len(stream_text) > 2048:
			
 
				+        return True
			
 
				+
			
 
				+    return False
			
 
				+
			
 
				+
			
 
				+def _is_watermark_image_xobj(doc, xref: int, obj_str: str) -> bool:
			
 
				+    """
			
 
				+    判断一个 Image XObject 是否为水印背景图。
			
 
				+
			
 
				+    判断规则（全部满足）：
			
 
				+    1. /Subtype /Image
			
 
				+    2. 有 /SMask（半透明）
			
 
				+    3. 宽 >= 600 且 高 >= 800（全页尺寸，排除小图标）
			
 
				+    4. 解码后像素均值 >= 240（近乎全白，水印文字稀疏）
			
 
				+    """
			
 
				+    if "/Image" not in obj_str or "/SMask" not in obj_str:
			
 
				+        return False
			
 
				+
			
 
				+    w_m = re.search(r'/Width\s+(\d+)', obj_str)
			
 
				+    h_m = re.search(r'/Height\s+(\d+)', obj_str)
			
 
				+    if not w_m or not h_m:
			
 
				+        return False
			
 
				+    if int(w_m.group(1)) < 600 or int(h_m.group(1)) < 800:
			
 
				+        return False
			
 
				+
			
 
				+    try:
			
 
				+        from io import BytesIO
			
 
				+        img_info = doc.extract_image(xref)
			
 
				+        pil_img = Image.open(BytesIO(img_info["image"])).convert("L")
			
 
				+        return float(np.array(pil_img).mean()) >= 240.0
			
 
				+    except Exception:
			
 
				+        return False
			
 
				+
			
 
				+
			
 
				+def _blank_watermark_image(doc, img_xref: int) -> None:
			
 
				+    """
			
 
				+    将水印 Image XObject 的 RGB 流和 SMask 替换为全白/全不透明。
			
 
				+
			
 
				+    关键点：必须先移除 /DecodeParms（Predictor 11），再调用 update_stream。
			
 
				+    否则渲染器在 FlateDecode 之后还会尝试 Predictor 解码，失败后回退原始数据，
			
 
				+    水印依然可见。
			
 
				+    """
			
 
				+    obj_str = doc.xref_object(img_xref)
			
 
				+
			
 
				+    w_m = re.search(r'/Width\s+(\d+)', obj_str)
			
 
				+    h_m = re.search(r'/Height\s+(\d+)', obj_str)
			
 
				+    w = int(w_m.group(1)) if w_m else 1
			
 
				+    h = int(h_m.group(1)) if h_m else 1
			
 
				+    cs_m = re.search(r'/ColorSpace\s+/Device(RGB|Gray|CMYK)', obj_str)
			
 
				+    channels = {'RGB': 3, 'CMYK': 4}.get(cs_m.group(1) if cs_m else '', 1)
			
 
				+
			
 
				+    doc.xref_set_key(img_xref, "DecodeParms", "null")
			
 
				+    doc.update_stream(img_xref, bytes([255]) * (w * h * channels))
			
 
				+
			
 
				+    smask_m = re.search(r'/SMask\s+(\d+)\s+0\s+R', obj_str)
			
 
				+    if smask_m:
			
 
				+        smask_xref = int(smask_m.group(1))
			
 
				+        smask_obj = doc.xref_object(smask_xref)
			
 
				+        sw = int(m.group(1)) if (m := re.search(r'/Width\s+(\d+)', smask_obj)) else w
			
 
				+        sh = int(m.group(1)) if (m := re.search(r'/Height\s+(\d+)', smask_obj)) else h
			
 
				+        doc.xref_set_key(smask_xref, "DecodeParms", "null")
			
 
				+        doc.update_stream(smask_xref, bytes([255]) * (sw * sh))
			
 
				+
			
 
				+
			
 
				+def scan_pdf_watermark_xobjs(pdf_bytes: bytes, sample_pages: int = 3) -> bool:
			
 
				+    """
			
 
				+    快速扫描 PDF 前 N 页，判断是否含水印 XObject。
			
 
				+
			
 
				+    无副作用（只读），用于在执行去水印前快速判断，避免对无水印的大文件
			
 
				+    执行全量扫描和序列化，显著降低财报等大文件的处理开销。
			
 
				+
			
 
				+    Args:
			
 
				+        pdf_bytes: PDF 文件的原始字节。
			
 
				+        sample_pages: 扫描页数上限，默认 3（银行流水通常前几页有水印）。
			
 
				+
			
 
				+    Returns:
			
 
				+        True 表示发现水印 XObject，False 表示未发现。
			
 
				+    """
			
 
				+    try:
			
 
				+        import fitz
			
 
				+    except ImportError:
			
 
				+        return False
			
 
				+
			
 
				+    doc = fitz.open(stream=pdf_bytes, filetype="pdf")
			
 
				+    pages_to_check = min(sample_pages, len(doc))
			
 
				+    try:
			
 
				+        for i in range(pages_to_check):
			
 
				+            page = doc[i]
			
 
				+            for xref, *_ in page.get_xobjects():
			
 
				+                try:
			
 
				+                    obj_str = doc.xref_object(xref)
			
 
				+                except Exception:
			
 
				+                    continue
			
 
				+                if _is_watermark_xobj(doc, xref, obj_str):
			
 
				+                    return True
			
 
				+            for img_tuple in page.get_images(full=True):
			
 
				+                try:
			
 
				+                    obj_str = doc.xref_object(img_tuple[0])
			
 
				+                except Exception:
			
 
				+                    continue
			
 
				+                if _is_watermark_image_xobj(doc, img_tuple[0], obj_str):
			
 
				+                    return True
			
 
				+    finally:
			
 
				+        doc.close()
			
 
				+    return False
			
 
				+
			
 
				+
			
 
				+def remove_txt_pdf_watermark(pdf_bytes: bytes) -> Optional[bytes]:
			
 
				+    """
			
 
				+    对文字型 PDF 执行原生水印去除，完全在内存中完成，不写临时文件。
			
 
				+
			
 
				+    支持两种水印形式：
			
 
				+    - Form XObject 水印：清空内容流
			
 
				+    - Image XObject 水印（全页背景图 + SMask 透明通道）：替换为全白像素
			
 
				+
			
 
				+    适用场景：pdf_type='txt' 的 PDF，去除后可直接传给渲染层（tobytes() → bytes）。
			
 
				+    对于大文件（如财报），建议先用 scan_pdf_watermark_xobjs() 快速判断再调用本函数。
			
 
				+
			
 
				+    Args:
			
 
				+        pdf_bytes: 原始 PDF 的字节内容。
			
 
				+
			
 
				+    Returns:
			
 
				+        去除水印后的 PDF bytes（garbage=4 压缩）；若未发现水印返回 None。
			
 
				+    """
			
 
				+    try:
			
 
				+        import fitz
			
 
				+    except ImportError:
			
 
				+        raise ImportError("请安装 PyMuPDF: pip install PyMuPDF")
			
 
				+
			
 
				+    from loguru import logger
			
 
				+
			
 
				+    doc = fitz.open(stream=pdf_bytes, filetype="pdf")
			
 
				+    processed_xrefs: set[int] = set()
			
 
				+    total_removed = 0
			
 
				+
			
 
				+    for page in doc:
			
 
				+        # ── Form XObject 水印 ─────────────────────────────────────────
			
 
				+        for xref, name, _invoker, _unused in page.get_xobjects():
			
 
				+            if xref in processed_xrefs:
			
 
				+                continue
			
 
				+            try:
			
 
				+                obj_str = doc.xref_object(xref)
			
 
				+            except Exception:
			
 
				+                continue
			
 
				+            if _is_watermark_xobj(doc, xref, obj_str):
			
 
				+                try:
			
 
				+                    doc.update_stream(xref, b"")
			
 
				+                    processed_xrefs.add(xref)
			
 
				+                    total_removed += 1
			
 
				+                    logger.debug(f"  [Form XObject] 清空水印 xref={xref}, name={name}")
			
 
				+                except Exception as e:
			
 
				+                    logger.warning(f"  清空 Form XObject xref={xref} 失败: {e}")
			
 
				+
			
 
				+        # ── Image XObject 水印 ────────────────────────────────────────
			
 
				+        for img_tuple in page.get_images(full=True):
			
 
				+            img_xref = img_tuple[0]
			
 
				+            if img_xref in processed_xrefs:
			
 
				+                continue
			
 
				+            try:
			
 
				+                obj_str = doc.xref_object(img_xref)
			
 
				+            except Exception:
			
 
				+                continue
			
 
				+            if _is_watermark_image_xobj(doc, img_xref, obj_str):
			
 
				+                _blank_watermark_image(doc, img_xref)
			
 
				+                processed_xrefs.add(img_xref)
			
 
				+                total_removed += 1
			
 
				+                logger.debug(f"  [Image XObject] 替换水印图像 xref={img_xref}")
			
 
				+
			
 
				+    if total_removed == 0:
			
 
				+        doc.close()
			
 
				+        return None
			
 
				+
			
 
				+    result = doc.tobytes(garbage=4, deflate=True)
			
 
				+    doc.close()
			
 
				+    logger.info(f"✅ PDF 层级水印去除：共清除 {total_removed} 个水印 XObject")
			
 
				+    return result
			
--- a/ocr_utils/watermark/presets.py
+++ b/ocr_utils/watermark/presets.py
@@ -0,0 +1,197 @@
 
				+"""
			
 
				+银行流水等场景的水印去除预设（页级 / 单元格级）。
			
 
				+
			
 
				+对外 YAML 只需 method、enabled、contrast_enhancement 等少量键；
			
 
				+mask / hough / adaptive 细参由此模块提供，避免配置漂移。
			
 
				+"""
			
 
				+from __future__ import annotations
			
 
				+
			
 
				+import copy
			
 
				+from typing import Any, Dict, Literal, Optional
			
 
				+
			
 
				+Scope = Literal["page", "cell"]
			
 
				+Method = Literal["threshold", "masked", "masked_adaptive"]
			
 
				+
			
 
				+_DETECT_DEFAULT: Dict[str, Any] = {
			
 
				+    "ratio_threshold": 0.025,
			
 
				+    "midtone_low": 100,
			
 
				+    "midtone_high": 220,
			
 
				+    "check_diagonal": True,
			
 
				+    "diagonal_angle_range": (30, 60),
			
 
				+}
			
 
				+
			
 
				+_MASK_PAGE: Dict[str, Any] = {
			
 
				+    "mask_mode": "light_on_white",
			
 
				+    "text_protect_gray_max": 130,
			
 
				+    "light_gray_low": 236,
			
 
				+    "light_gray_high": 253,
			
 
				+    "whiten_gray_low": 200,
			
 
				+    "direction_filter": "hough",
			
 
				+    "morph_close_kernel": 0,
			
 
				+    "morph_dilate_kernel": 0,
			
 
				+    "min_component_area": 200,
			
 
				+    "debug_block_maps": False,
			
 
				+    "debug_block_size": 48,
			
 
				+    "hough_midtone_low": 200,
			
 
				+    "hough_midtone_high": 254,
			
 
				+    "hough_canny_low": 30,
			
 
				+    "hough_canny_high": 100,
			
 
				+    "hough_threshold": 25,
			
 
				+    "hough_min_line_length": 35,
			
 
				+    "hough_max_line_gap": 18,
			
 
				+    "hough_line_thickness": 12,
			
 
				+    "hough_band_dilate_radius": 16,
			
 
				+    "hough_use_angle_statistics": True,
			
 
				+    "hough_angle_tolerance": 5.0,
			
 
				+    "hough_secondary_peak_ratio": 0.35,
			
 
				+    "hough_min_length_percentile": 25.0,
			
 
				+    "midtone_low": 95,
			
 
				+    "midtone_high": 235,
			
 
				+    "remove_horizontal_vertical": True,
			
 
				+    "diagonal_enhance": True,
			
 
				+    "diagonal_kernel_length": 25,
			
 
				+    "horizontal_kernel_length": 35,
			
 
				+    "vertical_kernel_length": 35,
			
 
				+    "morph_open_kernel": 2,
			
 
				+    "dmorph_close_kernel": 3,
			
 
				+    "text_protect_percentile": 10.0,
			
 
				+    "background_threshold": 248,
			
 
				+    "seal_protect": True,
			
 
				+}
			
 
				+
			
 
				+_MASK_CELL: Dict[str, Any] = {
			
 
				+    **_MASK_PAGE,
			
 
				+    "min_component_area": 60,
			
 
				+    "hough_min_line_length": 18,
			
 
				+    "hough_max_line_gap": 12,
			
 
				+    "hough_line_thickness": 8,
			
 
				+    "hough_band_dilate_radius": 10,
			
 
				+    "hough_threshold": 20,
			
 
				+    "text_protect_gray_max": 125,
			
 
				+}
			
 
				+
			
 
				+_ADAPTIVE_PAGE: Dict[str, Any] = {
			
 
				+    "whiten_mode": "mask_fill",
			
 
				+    "text_percentile": 10.0,
			
 
				+    "watermark_percentile": 70.0,
			
 
				+    "background_percentile": 95.0,
			
 
				+    "background_threshold": 248,
			
 
				+    "wm_margin": 12,
			
 
				+    "text_protect_max": 120,
			
 
				+}
			
 
				+
			
 
				+_ADAPTIVE_CELL: Dict[str, Any] = {
			
 
				+    **_ADAPTIVE_PAGE,
			
 
				+    "text_protect_max": 110,
			
 
				+    "wm_margin": 10,
			
 
				+}
			
 
				+
			
 
				+_CONTRAST_PAGE_DEFAULT: Dict[str, Any] = {
			
 
				+    "enabled": True,
			
 
				+    "method": "text_restore",
			
 
				+    "text_black_target": 85,
			
 
				+    "background_threshold": 248,
			
 
				+    "text_lo_percentile": 1.0,
			
 
				+    "text_hi_percentile": 99.0,
			
 
				+}
			
 
				+
			
 
				+_CONTRAST_CELL_DEFAULT: Dict[str, Any] = {
			
 
				+    "enabled": False,
			
 
				+    "method": "text_restore",
			
 
				+    "text_black_target": 88,
			
 
				+    "background_threshold": 248,
			
 
				+    "text_lo_percentile": 1.0,
			
 
				+    "text_hi_percentile": 99.0,
			
 
				+}
			
 
				+
			
 
				+
			
 
				+def _base_preset(scope: Scope, method: Method) -> Dict[str, Any]:
			
 
				+    mask = _MASK_CELL if scope == "cell" else _MASK_PAGE
			
 
				+    adaptive = _ADAPTIVE_CELL if scope == "cell" else _ADAPTIVE_PAGE
			
 
				+    contrast = (
			
 
				+        copy.deepcopy(_CONTRAST_CELL_DEFAULT)
			
 
				+        if scope == "cell"
			
 
				+        else copy.deepcopy(_CONTRAST_PAGE_DEFAULT)
			
 
				+    )
			
 
				+    threshold = 175 if scope == "page" else 170
			
 
				+    cfg: Dict[str, Any] = {
			
 
				+        "enabled": True,
			
 
				+        "detect_before_remove": scope == "page",
			
 
				+        "detect": copy.deepcopy(_DETECT_DEFAULT),
			
 
				+        "method": method,
			
 
				+        "threshold": threshold,
			
 
				+        "morph_close_kernel": 0,
			
 
				+        "contrast_enhancement": contrast,
			
 
				+        "debug_options": {
			
 
				+            "enabled": False,
			
 
				+            "save_compare": True,
			
 
				+            "image_format": "png",
			
 
				+            "subdir": "watermark_removal",
			
 
				+        },
			
 
				+    }
			
 
				+    if method in ("masked", "masked_adaptive"):
			
 
				+        cfg["mask"] = copy.deepcopy(mask)
			
 
				+    if method == "masked_adaptive":
			
 
				+        cfg["adaptive"] = copy.deepcopy(adaptive)
			
 
				+    return cfg
			
 
				+
			
 
				+
			
 
				+PAGE_WATERMARK_PRESETS: Dict[str, Dict[str, Any]] = {
			
 
				+    "threshold": _base_preset("page", "threshold"),
			
 
				+    "masked": _base_preset("page", "masked"),
			
 
				+    "masked_adaptive": _base_preset("page", "masked_adaptive"),
			
 
				+}
			
 
				+
			
 
				+CELL_WATERMARK_PRESETS: Dict[str, Dict[str, Any]] = {
			
 
				+    "threshold": _base_preset("cell", "threshold"),
			
 
				+    "masked": _base_preset("cell", "masked"),
			
 
				+    "masked_adaptive": _base_preset("cell", "masked_adaptive"),
			
 
				+}
			
 
				+
			
 
				+
			
 
				+def get_preset(scope: Scope, method: str) -> Dict[str, Any]:
			
 
				+    method = method or "masked_adaptive"
			
 
				+    presets = CELL_WATERMARK_PRESETS if scope == "cell" else PAGE_WATERMARK_PRESETS
			
 
				+    if method not in presets:
			
 
				+        method = "masked_adaptive"
			
 
				+    return copy.deepcopy(presets[method])
			
 
				+
			
 
				+
			
 
				+def _deep_merge(base: Dict[str, Any], override: Dict[str, Any]) -> Dict[str, Any]:
			
 
				+    out = copy.deepcopy(base)
			
 
				+    for k, v in override.items():
			
 
				+        if k in out and isinstance(out[k], dict) and isinstance(v, dict):
			
 
				+            out[k] = _deep_merge(out[k], v)
			
 
				+        else:
			
 
				+            out[k] = copy.deepcopy(v)
			
 
				+    return out
			
 
				+
			
 
				+
			
 
				+def merge_watermark_config(
			
 
				+    scope: Scope,
			
 
				+    user_cfg: Optional[Dict[str, Any]] = None,
			
 
				+    *,
			
 
				+    method: Optional[str] = None,
			
 
				+) -> Dict[str, Any]:
			
 
				+    """将用户 YAML 片段与 scope 预设合并；保留旧版 mask/adaptive 全量覆盖能力。"""
			
 
				+    user_cfg = user_cfg or {}
			
 
				+    m = method or user_cfg.get("method") or "masked_adaptive"
			
 
				+    merged = get_preset(scope, str(m))
			
 
				+
			
 
				+    for key in (
			
 
				+        "enabled",
			
 
				+        "detect_before_remove",
			
 
				+        "method",
			
 
				+        "threshold",
			
 
				+        "morph_close_kernel",
			
 
				+    ):
			
 
				+        if key in user_cfg:
			
 
				+            merged[key] = user_cfg[key]
			
 
				+
			
 
				+    for nested in ("detect", "mask", "adaptive", "contrast_enhancement", "debug_options"):
			
 
				+        if nested in user_cfg and isinstance(user_cfg[nested], dict):
			
 
				+            merged[nested] = _deep_merge(merged.get(nested) or {}, user_cfg[nested])
			
 
				+
			
 
				+    if method:
			
 
				+        merged["method"] = method
			
 
				+    return merged
			
--- a/ocr_utils/watermark/processor.py
+++ b/ocr_utils/watermark/processor.py
@@ -0,0 +1,153 @@
 
				+"""
			
 
				+水印处理门面：preset 解析、检测、去水印、对比度增强。
			
 
				+"""
			
 
				+from __future__ import annotations
			
 
				+
			
 
				+from typing import Any, Dict, List, Optional, Tuple, Union
			
 
				+
			
 
				+import numpy as np
			
 
				+from loguru import logger
			
 
				+from PIL import Image
			
 
				+
			
 
				+from ocr_utils.watermark.algorithms import detect_watermark
			
 
				+from ocr_utils.watermark.presets import Scope, merge_watermark_config
			
 
				+from ocr_utils.watermark.removal import remove_watermark_from_image_rgb
			
 
				+
			
 
				+
			
 
				+class WatermarkProcessor:
			
 
				+    """页级 / 单元格级水印去除编排。"""
			
 
				+
			
 
				+    def __init__(
			
 
				+        self,
			
 
				+        config: Dict[str, Any],
			
 
				+        *,
			
 
				+        scope: Scope = "page",
			
 
				+    ):
			
 
				+        self.scope = scope
			
 
				+        self.config = merge_watermark_config(scope, config)
			
 
				+
			
 
				+    @classmethod
			
 
				+    def from_user_config(
			
 
				+        cls,
			
 
				+        user_cfg: Optional[Dict[str, Any]],
			
 
				+        *,
			
 
				+        scope: Scope = "page",
			
 
				+    ) -> "WatermarkProcessor":
			
 
				+        return cls(user_cfg or {}, scope=scope)
			
 
				+
			
 
				+    @property
			
 
				+    def enabled(self) -> bool:
			
 
				+        return bool(self.config.get("enabled", False))
			
 
				+
			
 
				+    @property
			
 
				+    def method(self) -> str:
			
 
				+        return str(self.config.get("method") or "masked_adaptive")
			
 
				+
			
 
				+    @property
			
 
				+    def threshold(self) -> int:
			
 
				+        return int(self.config.get("threshold", 175))
			
 
				+
			
 
				+    @property
			
 
				+    def morph_close_kernel(self) -> int:
			
 
				+        return int(self.config.get("morph_close_kernel", 0))
			
 
				+
			
 
				+    def contrast_config(self) -> Optional[Dict[str, Any]]:
			
 
				+        ce = self.config.get("contrast_enhancement")
			
 
				+        if not isinstance(ce, dict):
			
 
				+            return None
			
 
				+        if not ce.get("enabled", False):
			
 
				+            return None
			
 
				+        return dict(ce)
			
 
				+
			
 
				+    def should_apply(self, image: Union[np.ndarray, Image.Image]) -> bool:
			
 
				+        if not self.enabled:
			
 
				+            return False
			
 
				+        if not bool(self.config.get("detect_before_remove", True)):
			
 
				+            return True
			
 
				+
			
 
				+        detect_cfg = self.config.get("detect")
			
 
				+        if not isinstance(detect_cfg, dict):
			
 
				+            detect_cfg = {}
			
 
				+
			
 
				+        angle_range = detect_cfg.get("diagonal_angle_range", (30, 60))
			
 
				+        if isinstance(angle_range, list):
			
 
				+            angle_range = tuple(angle_range)
			
 
				+
			
 
				+        return detect_watermark(
			
 
				+            image,
			
 
				+            midtone_low=int(detect_cfg.get("midtone_low", 100)),
			
 
				+            midtone_high=int(detect_cfg.get("midtone_high", 220)),
			
 
				+            ratio_threshold=float(detect_cfg.get("ratio_threshold", 0.025)),
			
 
				+            check_diagonal=bool(detect_cfg.get("check_diagonal", True)),
			
 
				+            diagonal_angle_range=angle_range,
			
 
				+        )
			
 
				+
			
 
				+    def process(
			
 
				+        self,
			
 
				+        image: Union[np.ndarray, Image.Image],
			
 
				+        *,
			
 
				+        apply_removal: Optional[bool] = None,
			
 
				+        apply_contrast: Optional[bool] = None,
			
 
				+        contrast_override: Optional[Dict[str, Any]] = None,
			
 
				+        removal_debug: Optional[Dict[str, Any]] = None,
			
 
				+        force: bool = False,
			
 
				+    ) -> Tuple[np.ndarray, List[str]]:
			
 
				+        """
			
 
				+        去水印 + 可选对比度增强。
			
 
				+
			
 
				+        Returns:
			
 
				+            (BGR ndarray, preprocess_stages)
			
 
				+        """
			
 
				+        stages: List[str] = []
			
 
				+        if isinstance(image, Image.Image):
			
 
				+            img = np.array(image.convert("RGB"))
			
 
				+            img = img[:, :, ::-1].copy()  # RGB -> BGR
			
 
				+        else:
			
 
				+            img = np.array(image)
			
 
				+            if img.ndim == 2:
			
 
				+                img = np.stack([img, img, img], axis=-1)
			
 
				+
			
 
				+        do_remove = apply_removal if apply_removal is not None else self.enabled
			
 
				+        if do_remove and not force and not self.should_apply(img):
			
 
				+            do_remove = False
			
 
				+
			
 
				+        if contrast_override is not None:
			
 
				+            contrast_cfg = dict(contrast_override)
			
 
				+            if apply_contrast is not False and not contrast_cfg.get("enabled", True):
			
 
				+                contrast_cfg["enabled"] = True
			
 
				+        else:
			
 
				+            contrast_cfg = self.contrast_config()
			
 
				+        if apply_contrast is False:
			
 
				+            contrast_cfg = None
			
 
				+        elif apply_contrast is True and contrast_cfg is None:
			
 
				+            ce = self.config.get("contrast_enhancement") or {}
			
 
				+            if isinstance(ce, dict) and ce.get("method"):
			
 
				+                contrast_cfg = dict(ce)
			
 
				+                contrast_cfg["enabled"] = True
			
 
				+
			
 
				+        if not do_remove and not contrast_cfg:
			
 
				+            return img, stages
			
 
				+
			
 
				+        try:
			
 
				+            if do_remove:
			
 
				+                stages.append("wm")
			
 
				+            if contrast_cfg:
			
 
				+                stages.append("contrast")
			
 
				+
			
 
				+            cleaned = remove_watermark_from_image_rgb(
			
 
				+                img,
			
 
				+                threshold=self.threshold,
			
 
				+                morph_close_kernel=self.morph_close_kernel,
			
 
				+                return_pil=False,
			
 
				+                contrast_enhancement=contrast_cfg,
			
 
				+                apply_watermark_removal=do_remove,
			
 
				+                watermark_removal_cfg=self.config,
			
 
				+                removal_debug=removal_debug,
			
 
				+            )
			
 
				+            return np.asarray(cleaned), stages
			
 
				+        except Exception as e:
			
 
				+            logger.warning(f"WatermarkProcessor.process failed (scope={self.scope}): {e}")
			
 
				+            return img, stages
			
 
				+
			
 
				+    def get_full_config(self) -> Dict[str, Any]:
			
 
				+        return dict(self.config)
			
--- a/ocr_utils/watermark/removal.py
+++ b/ocr_utils/watermark/removal.py
@@ -0,0 +1,152 @@
 
				+"""水印 去水印入口（由 ocr_utils.watermark_utils 迁入）。"""
			
 
				+from __future__ import annotations
			
 
				+
			
 
				+import json
			
 
				+import re
			
 
				+from pathlib import Path
			
 
				+from typing import Any, Dict, Optional, Tuple, Union
			
 
				+
			
 
				+import cv2
			
 
				+import numpy as np
			
 
				+from loguru import logger
			
 
				+from PIL import Image
			
 
				+
			
 
				+from ocr_utils.watermark.algorithms import (
			
 
				+    _image_to_gray_and_bgr,
			
 
				+    remove_watermark_masked_adaptive,
			
 
				+)
			
 
				+from ocr_utils.watermark.contrast import apply_contrast_enhancement_config
			
 
				+
			
 
				+def remove_watermark_from_image(
			
 
				+    image: Union[np.ndarray, Image.Image],
			
 
				+    threshold: int = 160,
			
 
				+    morph_close_kernel: int = 2,
			
 
				+    return_pil: Optional[bool] = None,
			
 
				+    watermark_removal_cfg: Optional[Dict[str, Any]] = None,
			
 
				+    removal_debug: Optional[Dict[str, Any]] = None,
			
 
				+) -> Union[np.ndarray, Image.Image]:
			
 
				+    """
			
 
				+    去除图像中的浅色斜向文字水印，返回灰度图。
			
 
				+
			
 
				+    method（watermark_removal_cfg）:
			
 
				+        threshold（默认）: gray > threshold → 255
			
 
				+        masked / masked_adaptive: 掩膜 + 掩膜内动态阈值
			
 
				+
			
 
				+    Args:
			
 
				+        image: 输入图像（PIL.Image 或 np.ndarray BGR/RGB/灰度）。
			
 
				+        threshold: 全局阈值或掩膜失败时的回退阈值。
			
 
				+        morph_close_kernel: 形态学闭运算核大小，0 跳过。
			
 
				+        watermark_removal_cfg: 完整配置（含 method / mask / adaptive）。
			
 
				+        removal_debug: 若传入 dict，写入掩膜与 T_wm 等调试字段。
			
 
				+
			
 
				+    Returns:
			
 
				+        去除水印后的灰度图：PIL.Image(mode='L') 或 np.ndarray(HxW, uint8)。
			
 
				+    """
			
 
				+    input_is_pil = isinstance(image, Image.Image)
			
 
				+    cfg = watermark_removal_cfg or {}
			
 
				+    method = str(cfg.get("method") or "threshold").lower().strip()
			
 
				+
			
 
				+    gray, bgr = _image_to_gray_and_bgr(image)
			
 
				+
			
 
				+    if method in ("masked", "masked_adaptive"):
			
 
				+        cleaned, dbg = remove_watermark_masked_adaptive(
			
 
				+            gray,
			
 
				+            bgr=bgr,
			
 
				+            mask_cfg=cfg.get("mask") if isinstance(cfg.get("mask"), dict) else None,
			
 
				+            adaptive_cfg=cfg.get("adaptive")
			
 
				+            if isinstance(cfg.get("adaptive"), dict)
			
 
				+            else None,
			
 
				+            threshold_fallback=threshold,
			
 
				+            morph_close_kernel=morph_close_kernel,
			
 
				+        )
			
 
				+        if removal_debug is not None:
			
 
				+            removal_debug.clear()
			
 
				+            removal_debug.update(dbg)
			
 
				+    else:
			
 
				+        cleaned = gray.copy()
			
 
				+        cleaned[gray > threshold] = 255
			
 
				+        if morph_close_kernel > 0:
			
 
				+            kernel = np.ones((morph_close_kernel, morph_close_kernel), np.uint8)
			
 
				+            cleaned = cv2.morphologyEx(cleaned, cv2.MORPH_CLOSE, kernel)
			
 
				+        if removal_debug is not None:
			
 
				+            removal_debug.clear()
			
 
				+            removal_debug.update({"mode": "threshold", "threshold": threshold})
			
 
				+
			
 
				+    should_return_pil = input_is_pil if return_pil is None else return_pil
			
 
				+    return Image.fromarray(cleaned, mode='L') if should_return_pil else cleaned
			
 
				+
			
 
				+
			
 
				+def remove_watermark_from_image_rgb(
			
 
				+    image: Union[np.ndarray, Image.Image],
			
 
				+    threshold: int = 160,
			
 
				+    morph_close_kernel: int = 2,
			
 
				+    return_pil: Optional[bool] = None,
			
 
				+    contrast_enhancement: Optional[Dict[str, Any]] = None,
			
 
				+    apply_watermark_removal: bool = True,
			
 
				+    watermark_removal_cfg: Optional[Dict[str, Any]] = None,
			
 
				+    removal_debug: Optional[Dict[str, Any]] = None,
			
 
				+) -> Union[np.ndarray, Image.Image]:
			
 
				+    """
			
 
				+    去除水印并返回 RGB 三通道图像。
			
 
				+
			
 
				+    与 remove_watermark_from_image 逻辑相同，但输出为 RGB（三通道），
			
 
				+    方便直接传入布局检测、OCR 等需要彩色输入的下游模型。
			
 
				+
			
 
				+    Args:
			
 
				+        contrast_enhancement: 对比度增强配置（含 enabled / method 等），见 apply_contrast_enhancement_config
			
 
				+        apply_watermark_removal: False 时跳过阈值抹白，仅做对比度增强（若启用）
			
 
				+
			
 
				+    Args/Returns: 同 remove_watermark_from_image，但输出为 RGB/BGR 三通道。
			
 
				+    """
			
 
				+    input_is_pil = isinstance(image, Image.Image)
			
 
				+
			
 
				+    if apply_watermark_removal:
			
 
				+        gray_result = remove_watermark_from_image(
			
 
				+            image,
			
 
				+            threshold,
			
 
				+            morph_close_kernel,
			
 
				+            return_pil=False,
			
 
				+            watermark_removal_cfg=watermark_removal_cfg,
			
 
				+            removal_debug=removal_debug,
			
 
				+        )
			
 
				+    else:
			
 
				+        if isinstance(image, Image.Image):
			
 
				+            np_img = np.array(image.convert("RGB"))
			
 
				+            np_img = cv2.cvtColor(np_img, cv2.COLOR_RGB2BGR)
			
 
				+        else:
			
 
				+            np_img = image.copy()
			
 
				+        gray_result = (
			
 
				+            cv2.cvtColor(np_img, cv2.COLOR_BGR2GRAY)
			
 
				+            if np_img.ndim == 3
			
 
				+            else np_img
			
 
				+        )
			
 
				+
			
 
				+    gray_result = apply_contrast_enhancement_config(gray_result, contrast_enhancement)
			
 
				+    rgb_np = cv2.cvtColor(gray_result, cv2.COLOR_GRAY2BGR)
			
 
				+
			
 
				+    should_return_pil = input_is_pil if return_pil is None else return_pil
			
 
				+    if should_return_pil:
			
 
				+        return Image.fromarray(cv2.cvtColor(rgb_np, cv2.COLOR_BGR2RGB))
			
 
				+    return rgb_np
			
 
				+
			
 
				+
			
 
				+def render_watermark_mask_overlay(
			
 
				+    image: np.ndarray,
			
 
				+    wm_mask: np.ndarray,
			
 
				+    *,
			
 
				+    color: Tuple[int, int, int] = (0, 0, 255),
			
 
				+    alpha: float = 0.45,
			
 
				+) -> np.ndarray:
			
 
				+    """在原图上叠加红色半透明水印掩膜，供调试图保存。"""
			
 
				+    if image.ndim == 2:
			
 
				+        base = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
			
 
				+    elif image.shape[2] == 3:
			
 
				+        base = image.copy()
			
 
				+        if image.max() <= 1:
			
 
				+            base = (image * 255).astype(np.uint8)
			
 
				+    else:
			
 
				+        base = cv2.cvtColor(image, cv2.COLOR_BGRA2BGR)
			
 
				+
			
 
				+    overlay = base.copy()
			
 
				+    overlay[wm_mask] = color
			
 
				+    return cv2.addWeighted(base, 1.0 - alpha, overlay, alpha, 0)
			
--- a/ocr_utils/watermark_utils.py
+++ b/ocr_utils/watermark_utils.py
@@ -1,1712 +1,46 @@
 
				 """
			
 
				-水印处理工具模块
			
 
				+水印处理兼容入口（实现已迁至 ocr_utils.watermark 包）。
			
 
				 
			
 
				-统一管理所有水印检测与去除能力，供整个平台复用：
			
 
				+新代码请优先使用::
			
 
				 
			
 
				-- 图像级（扫描 PDF / 图片）：
			
 
				-    detect_watermark()                检测图像中的斜向文字水印
			
 
				-    build_watermark_mask()            构建斜向浅灰水印掩膜（方案 D）
			
 
				-    remove_watermark_masked_adaptive() 掩膜 + 动态阈值去水印
			
 
				-    remove_watermark_from_image()     去除水印，返回灰度图
			
 
				-    remove_watermark_from_image_rgb() 去除水印，返回 RGB 图（适合模型输入）
			
 
				-    enhance_document_contrast()       去水印后对比度/笔画深度恢复
			
 
				-    save_watermark_removal_debug()    保存去水印前后对比调试图
			
 
				+    from ocr_utils.watermark import WatermarkProcessor, detect_watermark, ...
			
 
				 
			
 
				-- PDF 层级（文字型 PDF，保留可搜索性）：
			
 
				-    scan_pdf_watermark_xobjs()        快速扫描 PDF 是否含水印 XObject（无副作用）
			
 
				-    remove_txt_pdf_watermark()        从内存 PDF bytes 去除水印，返回新 bytes 或 None
			
 
				+本模块保留与历史 import 路径的兼容。
			
 
				 """
			
 
				-from __future__ import annotations
			
 
				-
			
 
				-import json
			
 
				-import re
			
 
				-from pathlib import Path
			
 
				-from typing import Any, Dict, Optional, Tuple, Union
			
 
				-
			
 
				-import cv2
			
 
				-import numpy as np
			
 
				-from loguru import logger
			
 
				-from PIL import Image
			
 
				-
			
 
				-
			
 
				-# ─────────────────────────────────────────────────────────────────────────────
			
 
				-# 图像级水印检测与去除
			
 
				-# ─────────────────────────────────────────────────────────────────────────────
			
 
				-
			
 
				-def detect_watermark(
			
 
				-    image: Union[np.ndarray, Image.Image],
			
 
				-    midtone_low: int = 100,
			
 
				-    midtone_high: int = 220,
			
 
				-    ratio_threshold: float = 0.03,
			
 
				-    check_diagonal: bool = True,
			
 
				-    diagonal_angle_range: tuple = (30, 60),
			
 
				-) -> bool:
			
 
				-    """
			
 
				-    检测图像中是否存在浅色斜向文字水印（银行流水类文档水印检测）。
			
 
				-
			
 
				-    原理：
			
 
				-    1. 将图像转为灰度，提取「中间调」像素（midtone_low ~ midtone_high），
			
 
				-       这些像素既不是纯白背景，也不是深黑正文，是浅灰水印的典型范围。
			
 
				-    2. 若中间调像素占比超过 ratio_threshold，初步判定存在水印。
			
 
				-    3. 若 check_diagonal=True，进一步用 Hough 直线变换验证中间调区域
			
 
				-       是否呈现斜向（diagonal_angle_range 度）纹理，以排除灰色背景误报。
			
 
				-
			
 
				-    Args:
			
 
				-        image: 输入图像，支持 PIL.Image 或 np.ndarray（BGR/RGB/灰度）。
			
 
				-        midtone_low: 中间调下限（默认 100），低于此视为深色正文。
			
 
				-        midtone_high: 中间调上限（默认 220），高于此视为纯白背景。
			
 
				-        ratio_threshold: 中间调像素占全图比例阈值（默认 0.03 即 3%）。
			
 
				-        check_diagonal: 是否进行斜向纹理验证（默认 True）。
			
 
				-        diagonal_angle_range: 斜向角度范围（度），默认 (30, 60)，含 45° 斜水印。
			
 
				-
			
 
				-    Returns:
			
 
				-        True 表示检测到水印，False 表示未检测到。
			
 
				-    """
			
 
				-    if isinstance(image, Image.Image):
			
 
				-        pil_img = image.convert('RGB') if image.mode == 'RGBA' else image
			
 
				-        np_img = np.array(pil_img)
			
 
				-        gray = cv2.cvtColor(np_img, cv2.COLOR_RGB2GRAY) if np_img.ndim == 3 else np_img
			
 
				-    else:
			
 
				-        np_img = image
			
 
				-        gray = cv2.cvtColor(np_img, cv2.COLOR_BGR2GRAY) if np_img.ndim == 3 else np_img
			
 
				-
			
 
				-    midtone_mask = (gray > midtone_low) & (gray < midtone_high)
			
 
				-    ratio = midtone_mask.sum() / gray.size
			
 
				-
			
 
				-    if ratio < ratio_threshold:
			
 
				-        return False
			
 
				-
			
 
				-    if not check_diagonal:
			
 
				-        return True
			
 
				-
			
 
				-    midtone_uint8 = (midtone_mask.astype(np.uint8)) * 255
			
 
				-    edges = cv2.Canny(midtone_uint8, 50, 150, apertureSize=3)
			
 
				-    lines = cv2.HoughLines(edges, rho=1, theta=np.pi / 180, threshold=80)
			
 
				-
			
 
				-    if lines is None:
			
 
				-        return False
			
 
				-
			
 
				-    low_rad = np.deg2rad(diagonal_angle_range[0])
			
 
				-    high_rad = np.deg2rad(diagonal_angle_range[1])
			
 
				-    diagonal_count = 0
			
 
				-    for line in lines:
			
 
				-        theta = line[0][1]
			
 
				-        if low_rad <= theta <= high_rad or (np.pi - high_rad) <= theta <= (np.pi - low_rad):
			
 
				-            diagonal_count += 1
			
 
				-
			
 
				-    return True | False
			
 
				-
			
 
				-
			
 
				-def _local_std_map(gray: np.ndarray, window: int = 5) -> np.ndarray:
			
 
				-    """局部标准差图（返回值与输入同形状）。"""
			
 
				-    gray = np.asarray(gray, dtype=np.float32)
			
 
				-    size = max(3, int(window))
			
 
				-    kernel = np.ones((size, size), dtype=np.float32) / (size * size)
			
 
				-    mean = cv2.filter2D(gray, -1, kernel)
			
 
				-    sq_mean = cv2.filter2D(gray * gray, -1, kernel)
			
 
				-    var = sq_mean - mean * mean
			
 
				-    var = np.maximum(var, 0)
			
 
				-    return np.sqrt(var)
			
 
				-
			
 
				-
			
 
				-def _line_structuring_kernel(length: int, angle_deg: float) -> np.ndarray:
			
 
				-    """生成指定角度、长度的线形结构元（用于斜向水印形态学）。"""
			
 
				-    length = max(3, int(length))
			
 
				-    k = np.zeros((length, length), np.uint8)
			
 
				-    c = length // 2
			
 
				-    rad = np.deg2rad(angle_deg)
			
 
				-    dx = int(round(np.cos(rad) * (c - 1)))
			
 
				-    dy = int(round(np.sin(rad) * (c - 1)))
			
 
				-    cv2.line(k, (c - dx, c - dy), (c + dx, c + dy), 1, thickness=1)
			
 
				-    return k
			
 
				-
			
 
				-
			
 
				-def _line_angle_deg(x1: int, y1: int, x2: int, y2: int) -> float:
			
 
				-    """线段方向角 [0, 180)（无向）。"""
			
 
				-    ang = float(np.degrees(np.arctan2(y2 - y1, x2 - x1)))
			
 
				-    if ang < 0:
			
 
				-        ang += 180.0
			
 
				-    return ang
			
 
				-
			
 
				-
			
 
				-def _angle_in_diagonal_ranges(
			
 
				-    angle_deg: float,
			
 
				-    ranges: Tuple[Tuple[float, float], Tuple[float, float]] = ((35.0, 55.0), (125.0, 145.0)),
			
 
				-) -> bool:
			
 
				-    for lo, hi in ranges:
			
 
				-        if lo <= angle_deg <= hi:
			
 
				-            return True
			
 
				-    return False
			
 
				-
			
 
				-
			
 
				-def _angle_distance_deg(a: float, b: float) -> float:
			
 
				-    """无向角距离 [0, 90]。"""
			
 
				-    d = abs(float(a) - float(b)) % 180.0
			
 
				-    return min(d, 180.0 - d)
			
 
				-
			
 
				-
			
 
				-def _line_length(x1: int, y1: int, x2: int, y2: int) -> float:
			
 
				-    return float(np.hypot(x2 - x1, y2 - y1))
			
 
				-
			
 
				-
			
 
				-def _find_dominant_diagonal_angles(
			
 
				-    segments: list,
			
 
				-    *,
			
 
				-    angle_ranges: Tuple[Tuple[float, float], Tuple[float, float]] = ((25.0, 65.0), (115.0, 155.0)),
			
 
				-    smooth_sigma: float = 2.0,
			
 
				-    secondary_peak_ratio: float = 0.35,
			
 
				-) -> Tuple[list, np.ndarray]:
			
 
				-    """
			
 
				-    按线段长度加权统计角度直方图，取主峰（及次峰）作为本页水印固定方向。
			
 
				-
			
 
				-    Returns:
			
 
				-        dominant_angles: 1~2 个主导角度（度）
			
 
				-        hist_smooth: 长度 180 的平滑直方图
			
 
				-    """
			
 
				-    hist = np.zeros(180, dtype=np.float64)
			
 
				-    for x1, y1, x2, y2, ang, length in segments:
			
 
				-        if not _angle_in_diagonal_ranges(ang, angle_ranges):
			
 
				-            continue
			
 
				-        hist[int(ang) % 180] += length
			
 
				-
			
 
				-    if hist.sum() <= 0:
			
 
				-        return [], hist
			
 
				-
			
 
				-    ksize = max(3, int(smooth_sigma * 4) | 1)
			
 
				-    hist_smooth = cv2.GaussianBlur(
			
 
				-        hist.reshape(1, 180).astype(np.float32), (ksize, 1), smooth_sigma
			
 
				-    ).flatten().astype(np.float64)
			
 
				-
			
 
				-    peaks: list = []
			
 
				-    for lo, hi in angle_ranges:
			
 
				-        lo_i, hi_i = int(lo), int(hi)
			
 
				-        sub = hist_smooth[lo_i : hi_i + 1]
			
 
				-        if sub.size == 0 or sub.max() <= 0:
			
 
				-            continue
			
 
				-        peak_ang = lo_i + int(sub.argmax())
			
 
				-        peaks.append((peak_ang, float(sub.max())))
			
 
				-
			
 
				-    if not peaks:
			
 
				-        return [], hist_smooth
			
 
				-
			
 
				-    peaks.sort(key=lambda x: -x[1])
			
 
				-    dominant: list = [peaks[0][0]]
			
 
				-    for ang, val in peaks[1:]:
			
 
				-        if val >= peaks[0][1] * secondary_peak_ratio:
			
 
				-            if all(_angle_distance_deg(ang, d) > 15 for d in dominant):
			
 
				-                dominant.append(ang)
			
 
				-    return dominant, hist_smooth
			
 
				-
			
 
				-
			
 
				-def _render_angle_histogram(hist: np.ndarray, dominant_angles: list) -> np.ndarray:
			
 
				-    """角度直方图 debug 图（BGR）。"""
			
 
				-    h_img, w_img = 120, 360
			
 
				-    canvas = np.ones((h_img, w_img, 3), dtype=np.uint8) * 255
			
 
				-    if hist.max() <= 0:
			
 
				-        return canvas
			
 
				-    norm = (hist / hist.max() * (h_img - 20)).astype(np.int32)
			
 
				-    for i, h in enumerate(norm):
			
 
				-        x = int(i * (w_img - 1) / 179)
			
 
				-        cv2.line(canvas, (x, h_img - 10), (x, h_img - 10 - int(h)), (180, 180, 180), 1)
			
 
				-    for ang in dominant_angles:
			
 
				-        x = int(ang * (w_img - 1) / 179)
			
 
				-        cv2.line(canvas, (x, 0), (x, h_img - 1), (0, 0, 255), 2)
			
 
				-    cv2.putText(canvas, "angle (deg)", (w_img // 2 - 40, h_img - 2), cv2.FONT_HERSHEY_SIMPLEX, 0.35, (0, 0, 0), 1)
			
 
				-    return canvas
			
 
				-
			
 
				-
			
 
				-def _build_diag_hough_region_mask(
			
 
				-    gray: np.ndarray,
			
 
				-    *,
			
 
				-    midtone_low: int = 200,
			
 
				-    midtone_high: int = 254,
			
 
				-    canny_low: int = 30,
			
 
				-    canny_high: int = 100,
			
 
				-    hough_threshold: int = 30,
			
 
				-    min_line_length: int = 40,
			
 
				-    max_line_gap: int = 15,
			
 
				-    angle_ranges: Tuple[Tuple[float, float], Tuple[float, float]] = ((25.0, 65.0), (115.0, 155.0)),
			
 
				-    angle_tolerance: float = 5.0,
			
 
				-    use_angle_statistics: bool = True,
			
 
				-    secondary_peak_ratio: float = 0.35,
			
 
				-    min_length_percentile: float = 25.0,
			
 
				-    line_thickness: int = 10,
			
 
				-    band_dilate_radius: int = 12,
			
 
				-) -> Tuple[np.ndarray, Dict[str, Any]]:
			
 
				-    """
			
 
				-    方案 C：Canny + HoughLinesP + 角度直方图统计主峰，仅保留与本页水印方向一致的线段。
			
 
				-    """
			
 
				-    gray_u8 = np.asarray(gray, dtype=np.uint8)
			
 
				-    band = ((gray_u8 >= midtone_low) & (gray_u8 < midtone_high)).astype(np.uint8) * 255
			
 
				-    edges = cv2.Canny(band, int(canny_low), int(canny_high), apertureSize=3)
			
 
				-
			
 
				-    lines_p = cv2.HoughLinesP(
			
 
				-        edges,
			
 
				-        rho=1,
			
 
				-        theta=np.pi / 180,
			
 
				-        threshold=int(hough_threshold),
			
 
				-        minLineLength=int(min_line_length),
			
 
				-        maxLineGap=int(max_line_gap),
			
 
				-    )
			
 
				-
			
 
				-    line_mask = np.zeros_like(gray_u8, dtype=np.uint8)
			
 
				-    lines_all_bgr = cv2.cvtColor(gray_u8, cv2.COLOR_GRAY2BGR)
			
 
				-    lines_filt_bgr = cv2.cvtColor(gray_u8, cv2.COLOR_GRAY2BGR)
			
 
				-    diag_candidates: list = []
			
 
				-    total_lines = 0
			
 
				-
			
 
				-    if lines_p is not None:
			
 
				-        for seg in lines_p:
			
 
				-            x1, y1, x2, y2 = [int(v) for v in seg[0]]
			
 
				-            total_lines += 1
			
 
				-            ang = _line_angle_deg(x1, y1, x2, y2)
			
 
				-            length = _line_length(x1, y1, x2, y2)
			
 
				-            if not _angle_in_diagonal_ranges(ang, angle_ranges):
			
 
				-                continue
			
 
				-            diag_candidates.append((x1, y1, x2, y2, ang, length))
			
 
				-            cv2.line(lines_all_bgr, (x1, y1), (x2, y2), (128, 128, 128), 1)
			
 
				-
			
 
				-    dominant_angles: list = []
			
 
				-    hist_smooth = np.zeros(180, dtype=np.float64)
			
 
				-    if use_angle_statistics and diag_candidates:
			
 
				-        dominant_angles, hist_smooth = _find_dominant_diagonal_angles(
			
 
				-            diag_candidates,
			
 
				-            angle_ranges=angle_ranges,
			
 
				-            secondary_peak_ratio=secondary_peak_ratio,
			
 
				-        )
			
 
				-
			
 
				-    def _angle_matches(ang: float) -> bool:
			
 
				-        if not use_angle_statistics or not dominant_angles:
			
 
				-            return True
			
 
				-        return any(_angle_distance_deg(ang, d) <= angle_tolerance for d in dominant_angles)
			
 
				-
			
 
				-    angle_matched = [
			
 
				-        s for s in diag_candidates if _angle_matches(s[4])
			
 
				-    ]
			
 
				-    if angle_matched and min_length_percentile > 0:
			
 
				-        lengths = np.array([s[5] for s in angle_matched], dtype=np.float32)
			
 
				-        len_th = float(np.percentile(lengths, min_length_percentile))
			
 
				-        angle_matched = [s for s in angle_matched if s[5] >= len_th]
			
 
				-
			
 
				-    matched_keys = {(s[0], s[1], s[2], s[3]) for s in angle_matched}
			
 
				-    kept_lines: list = []
			
 
				-    for x1, y1, x2, y2, ang, _length in angle_matched:
			
 
				-        kept_lines.append((x1, y1, x2, y2, ang))
			
 
				-        cv2.line(line_mask, (x1, y1), (x2, y2), 255, thickness=int(line_thickness))
			
 
				-        cv2.line(lines_filt_bgr, (x1, y1), (x2, y2), (0, 0, 255), 2)
			
 
				-    for x1, y1, x2, y2, _ang, _length in diag_candidates:
			
 
				-        if (x1, y1, x2, y2) not in matched_keys:
			
 
				-            cv2.line(lines_filt_bgr, (x1, y1), (x2, y2), (0, 180, 255), 1)
			
 
				-
			
 
				-    geom = line_mask > 0
			
 
				-    if band_dilate_radius > 0 and np.any(geom):
			
 
				-        k = cv2.getStructuringElement(
			
 
				-            cv2.MORPH_ELLIPSE, (band_dilate_radius * 2 + 1, band_dilate_radius * 2 + 1)
			
 
				-        )
			
 
				-        geom = cv2.dilate(line_mask, k) > 0
			
 
				-
			
 
				-    info: Dict[str, Any] = {
			
 
				-        "hough_total_lines": total_lines,
			
 
				-        "hough_diag_candidates": len(diag_candidates),
			
 
				-        "hough_kept_lines": len(kept_lines),
			
 
				-        "dominant_angles": dominant_angles,
			
 
				-        "angle_tolerance": angle_tolerance,
			
 
				-        "geom_mask_ratio": float(geom.sum() / gray_u8.size),
			
 
				-        "hough_lines_bgr": lines_filt_bgr,
			
 
				-        "hough_lines_all_bgr": lines_all_bgr,
			
 
				-        "angle_histogram_bgr": _render_angle_histogram(hist_smooth, dominant_angles),
			
 
				-    }
			
 
				-    return geom, info
			
 
				-
			
 
				-
			
 
				-def _compute_block_orientation_debug_maps(
			
 
				-    gray: np.ndarray,
			
 
				-    *,
			
 
				-    block_size: int = 48,
			
 
				-) -> Tuple[np.ndarray, np.ndarray]:
			
 
				-    """分块 diag/hv 弱边缘占比图（仅 debug 热力图，0~1 float）。"""
			
 
				-    gray_f = np.asarray(gray, dtype=np.float32)
			
 
				-    bs = max(4, int(block_size))
			
 
				-    h_blocks = gray_f.shape[0] // bs
			
 
				-    w_blocks = gray_f.shape[1] // bs
			
 
				-    if h_blocks == 0 or w_blocks == 0:
			
 
				-        z = np.zeros_like(gray_f, dtype=np.float32)
			
 
				-        return z, z
			
 
				-
			
 
				-    ph, pw = h_blocks * bs, w_blocks * bs
			
 
				-    gx = cv2.Sobel(gray_f, cv2.CV_32F, 1, 0, ksize=3)
			
 
				-    gy = cv2.Sobel(gray_f, cv2.CV_32F, 0, 1, ksize=3)
			
 
				-    mag = np.sqrt(gx * gx + gy * gy)
			
 
				-    ori = np.arctan2(gy, gx) * 180.0 / np.pi
			
 
				-
			
 
				-    diag = (
			
 
				-        ((ori > 25) & (ori < 65))
			
 
				-        | ((ori > 115) & (ori < 155))
			
 
				-        | ((ori > -155) & (ori < -115))
			
 
				-        | ((ori > -65) & (ori < -25))
			
 
				-    )
			
 
				-    hv = (
			
 
				-        ((ori > -20) & (ori < 20))
			
 
				-        | ((ori > 160) | (ori < -160))
			
 
				-        | ((ori > 70) & (ori < 110))
			
 
				-        | ((ori > -110) & (ori < -70))
			
 
				-    )
			
 
				-    weak = (mag > 1) & (mag < 15)
			
 
				-
			
 
				-    def _to_blocks(arr: np.ndarray) -> np.ndarray:
			
 
				-        return (
			
 
				-            arr[:ph, :pw]
			
 
				-            .reshape(h_blocks, bs, w_blocks, bs)
			
 
				-            .transpose(0, 2, 1, 3)
			
 
				-            .reshape(h_blocks, w_blocks, -1)
			
 
				-        )
			
 
				-
			
 
				-    b_diag = _to_blocks(diag)
			
 
				-    b_hv = _to_blocks(hv)
			
 
				-    b_weak = _to_blocks(weak)
			
 
				-    diag_weak = np.sum(b_diag & b_weak, axis=2)
			
 
				-    hv_weak = np.sum(b_hv & b_weak, axis=2)
			
 
				-    total_weak = np.sum(b_weak, axis=2)
			
 
				-    with np.errstate(divide="ignore", invalid="ignore"):
			
 
				-        diag_ratio = np.where(total_weak > 0, diag_weak / total_weak, 0.0).astype(np.float32)
			
 
				-        hv_ratio = np.where(total_weak > 0, hv_weak / total_weak, 0.0).astype(np.float32)
			
 
				-
			
 
				-    diag_up = np.repeat(np.repeat(diag_ratio, bs, axis=0), bs, axis=1)
			
 
				-    hv_up = np.repeat(np.repeat(hv_ratio, bs, axis=0), bs, axis=1)
			
 
				-    diag_full = np.zeros_like(gray_f, dtype=np.float32)
			
 
				-    hv_full = np.zeros_like(gray_f, dtype=np.float32)
			
 
				-    diag_full[:ph, :pw] = diag_up
			
 
				-    hv_full[:ph, :pw] = hv_up
			
 
				-    return diag_full, hv_full
			
 
				-
			
 
				-
			
 
				-def render_ratio_heatmap(ratio_map: np.ndarray) -> np.ndarray:
			
 
				-    """将 0~1 浮点占比图转为 BGR 热力图。"""
			
 
				-    r = np.clip(np.asarray(ratio_map, dtype=np.float32), 0.0, 1.0)
			
 
				-    u8 = (r * 255).astype(np.uint8)
			
 
				-    return cv2.applyColorMap(u8, cv2.COLORMAP_JET)
			
 
				-
			
 
				-
			
 
				-def save_watermark_mask_debug_layers(
			
 
				-    image: np.ndarray,
			
 
				-    output_dir: Union[str, Path],
			
 
				-    stem: str,
			
 
				-    debug: Dict[str, Any],
			
 
				-    *,
			
 
				-    image_format: str = "png",
			
 
				-) -> Dict[str, str]:
			
 
				-    """保存分层 debug 图（方案 D）。"""
			
 
				-    out_dir = Path(output_dir)
			
 
				-    out_dir.mkdir(parents=True, exist_ok=True)
			
 
				-    fmt = (image_format or "png").lstrip(".")
			
 
				-    paths: Dict[str, str] = {}
			
 
				-
			
 
				-    def _save_overlay(name: str, mask: Optional[np.ndarray], color=(0, 0, 255)) -> None:
			
 
				-        if mask is None or not np.any(mask):
			
 
				-            return
			
 
				-        ov = render_watermark_mask_overlay(image, mask, color=color)
			
 
				-        p = out_dir / f"{stem}_{name}.{fmt}"
			
 
				-        cv2.imwrite(str(p), cv2.cvtColor(ov, cv2.COLOR_RGB2BGR) if ov.shape[2] == 3 else ov)
			
 
				-        paths[name] = str(p)
			
 
				-
			
 
				-    _save_overlay("wm_candidate_overlay", debug.get("wm_candidate"))
			
 
				-    _save_overlay("geom_region_overlay", debug.get("geom_region"), color=(0, 180, 255))
			
 
				-    _save_overlay("geom_candidate_overlay", debug.get("geom_candidate"), color=(0, 255, 0))
			
 
				-    _save_overlay("wm_mask_overlay", debug.get("wm_mask"), color=(255, 0, 0))
			
 
				-
			
 
				-    hough_bgr = debug.get("hough_lines_bgr")
			
 
				-    if hough_bgr is not None:
			
 
				-        p = out_dir / f"{stem}_hough_lines.{fmt}"
			
 
				-        cv2.imwrite(str(p), hough_bgr)
			
 
				-        paths["hough_lines"] = str(p)
			
 
				-
			
 
				-    hough_all = debug.get("hough_lines_all_bgr")
			
 
				-    if hough_all is not None:
			
 
				-        p = out_dir / f"{stem}_hough_lines_all.{fmt}"
			
 
				-        cv2.imwrite(str(p), hough_all)
			
 
				-        paths["hough_lines_all"] = str(p)
			
 
				-
			
 
				-    angle_hist = debug.get("angle_histogram_bgr")
			
 
				-    if angle_hist is not None:
			
 
				-        p = out_dir / f"{stem}_angle_histogram.{fmt}"
			
 
				-        cv2.imwrite(str(p), angle_hist)
			
 
				-        paths["angle_histogram"] = str(p)
			
 
				-
			
 
				-    diag_hm = debug.get("diag_ratio_heatmap")
			
 
				-    if diag_hm is not None:
			
 
				-        p = out_dir / f"{stem}_diag_ratio_heatmap.{fmt}"
			
 
				-        cv2.imwrite(str(p), diag_hm)
			
 
				-        paths["diag_ratio_heatmap"] = str(p)
			
 
				-
			
 
				-    hv_hm = debug.get("hv_ratio_heatmap")
			
 
				-    if hv_hm is not None:
			
 
				-        p = out_dir / f"{stem}_hv_ratio_heatmap.{fmt}"
			
 
				-        cv2.imwrite(str(p), hv_hm)
			
 
				-        paths["hv_ratio_heatmap"] = str(p)
			
 
				-
			
 
				-    return paths
			
 
				-
			
 
				-
			
 
				-def _build_diag_region_mask(
			
 
				-    gray: np.ndarray,
			
 
				-    *,
			
 
				-    block_size: int = 48,
			
 
				-    diag_ratio_thresh: float = 0.20,
			
 
				-    light_gray_thresh: int = 238,
			
 
				-    light_ratio_thresh: float = 0.10,
			
 
				-    min_edge_count: int = 10,
			
 
				-    dilate_radius: int = 3,
			
 
				-) -> np.ndarray:
			
 
				-    """
			
 
				-    分块梯度方向检测：返回对角线方向纹理占优的区域掩膜。
			
 
				-
			
 
				-    原理：水印是45°斜向字符，其梯度主方向在30-60°和120-150°。
			
 
				-    分块统计该方向弱边缘占比，高频块标记为水印候选区域。
			
 
				-
			
 
				-    Returns:
			
 
				-        bool ndarray, 与 gray 同形状，True=疑似斜向水印区域。
			
 
				-    """
			
 
				-    gray_f = np.asarray(gray, dtype=np.float32)
			
 
				-    img_h, img_w = gray_f.shape
			
 
				-    bs = max(4, int(block_size))
			
 
				-
			
 
				-    # Sobel 梯度
			
 
				-    gx = cv2.Sobel(gray_f, cv2.CV_32F, 1, 0, ksize=3)
			
 
				-    gy = cv2.Sobel(gray_f, cv2.CV_32F, 0, 1, ksize=3)
			
 
				-    mag = np.sqrt(gx * gx + gy * gy)
			
 
				-    ori = np.arctan2(gy, gx) * 180.0 / np.pi
			
 
				-
			
 
				-    # 对角线方向 (±45° 附近，即梯度 30-65° / 115-155°)
			
 
				-    diag = (
			
 
				-        ((ori > 25) & (ori < 65))
			
 
				-        | ((ori > 115) & (ori < 155))
			
 
				-        | ((ori > -155) & (ori < -115))
			
 
				-        | ((ori > -65) & (ori < -25))
			
 
				-    )
			
 
				-
			
 
				-    h_blocks = img_h // bs
			
 
				-    w_blocks = img_w // bs
			
 
				-    if h_blocks == 0 or w_blocks == 0:
			
 
				-        return np.zeros_like(gray, dtype=bool)
			
 
				-
			
 
				-    ph, pw = h_blocks * bs, w_blocks * bs
			
 
				-
			
 
				-    # 分块统计
			
 
				-    def _to_blocks(arr: np.ndarray) -> np.ndarray:
			
 
				-        return arr[:ph, :pw].reshape(h_blocks, bs, w_blocks, bs).transpose(0, 2, 1, 3).reshape(h_blocks, w_blocks, -1)
			
 
				-
			
 
				-    block_mag = _to_blocks(mag)
			
 
				-    block_diag = _to_blocks(diag)
			
 
				-    block_gray = _to_blocks(gray_f)
			
 
				-
			
 
				-    weak = (block_mag > 1) & (block_mag < 15)
			
 
				-    diag_weak = np.sum(block_diag & weak, axis=2)
			
 
				-    total_weak = np.sum(weak, axis=2)
			
 
				-
			
 
				-    with np.errstate(divide="ignore", invalid="ignore"):
			
 
				-        diag_ratio = np.where(total_weak > 0, diag_weak / total_weak, 0.0)
			
 
				-    light_ratio = np.mean(block_gray >= light_gray_thresh, axis=2)
			
 
				-
			
 
				-    wm_blocks = (
			
 
				-        (diag_ratio > diag_ratio_thresh)
			
 
				-        & (light_ratio > light_ratio_thresh)
			
 
				-        & (total_weak > min_edge_count)
			
 
				-    )
			
 
				-
			
 
				-    # 展开为像素掩膜
			
 
				-    wm_block_mask = np.repeat(np.repeat(wm_blocks, bs, axis=0), bs, axis=1)
			
 
				-    full_mask = np.zeros(gray_f.shape, dtype=bool)
			
 
				-    full_mask[:ph, :pw] = wm_block_mask
			
 
				-
			
 
				-    if dilate_radius > 0:
			
 
				-        k = cv2.getStructuringElement(
			
 
				-            cv2.MORPH_ELLIPSE, (dilate_radius * 2 + 1, dilate_radius * 2 + 1)
			
 
				-        )
			
 
				-        full_mask = cv2.dilate(full_mask.astype(np.uint8), k) > 0
			
 
				-
			
 
				-    return full_mask
			
 
				-
			
 
				-
			
 
				-def _build_seal_protect_mask(
			
 
				-    bgr: np.ndarray,
			
 
				-    *,
			
 
				-    hue_high: int = 15,
			
 
				-    sat_min: int = 40,
			
 
				-    value_min: int = 30,
			
 
				-) -> np.ndarray:
			
 
				-    """红色/公章区域保护掩膜（True=保护，不置白）。"""
			
 
				-    hsv = cv2.cvtColor(bgr, cv2.COLOR_BGR2HSV)
			
 
				-    lower1 = np.array([0, sat_min, value_min], dtype=np.uint8)
			
 
				-    upper1 = np.array([hue_high, 255, 255], dtype=np.uint8)
			
 
				-    lower2 = np.array([170, sat_min, value_min], dtype=np.uint8)
			
 
				-    upper2 = np.array([180, 255, 255], dtype=np.uint8)
			
 
				-    m1 = cv2.inRange(hsv, lower1, upper1)
			
 
				-    m2 = cv2.inRange(hsv, lower2, upper2)
			
 
				-    m2 = cv2.inRange(hsv, lower2, upper2)
			
 
				-    return (m1 > 0) | (m2 > 0)
			
 
				-
			
 
				-
			
 
				-def _build_text_edge_protect(
			
 
				-    gray: np.ndarray,
			
 
				-    *,
			
 
				-    edge_window: int = 5,
			
 
				-    edge_std_thresh: float = 6.0,
			
 
				-    dilate_radius: int = 1,
			
 
				-) -> np.ndarray:
			
 
				-    """基于局部方差的笔画边缘保护掩膜（True=保护，不置白）。"""
			
 
				-    local_std = _local_std_map(gray, window=edge_window)
			
 
				-    edge_mask = local_std >= edge_std_thresh
			
 
				-    if dilate_radius > 0:
			
 
				-        k = cv2.getStructuringElement(
			
 
				-            cv2.MORPH_ELLIPSE, (dilate_radius * 2 + 1, dilate_radius * 2 + 1)
			
 
				-        )
			
 
				-        edge_mask = cv2.dilate(edge_mask.astype(np.uint8), k) > 0
			
 
				-    return edge_mask.astype(bool)
			
 
				-
			
 
				-
			
 
				-def _build_watermark_mask_light_on_white(
			
 
				-    gray: np.ndarray,
			
 
				-    *,
			
 
				-    bgr: Optional[np.ndarray] = None,
			
 
				-    light_gray_low: int = 236,
			
 
				-    light_gray_high: int = 253,
			
 
				-    whiten_gray_low: int = 200,
			
 
				-    text_protect_gray_max: int = 130,
			
 
				-    text_protect_percentile: Optional[float] = None,
			
 
				-    background_threshold: int = 248,
			
 
				-    morph_close_kernel: int = 0,
			
 
				-    morph_close_iter: int = 1,
			
 
				-    morph_dilate_kernel: int = 0,
			
 
				-    morph_dilate_iter: int = 1,
			
 
				-    min_component_area: int = 200,
			
 
				-    low_variance_thresh: float = 0.0,
			
 
				-    edge_window: int = 5,
			
 
				-    direction_filter: str = "hough",
			
 
				-    debug_block_maps: bool = True,
			
 
				-    debug_block_size: int = 48,
			
 
				-    hough_midtone_low: int = 200,
			
 
				-    hough_midtone_high: int = 254,
			
 
				-    hough_canny_low: int = 30,
			
 
				-    hough_canny_high: int = 100,
			
 
				-    hough_threshold: int = 25,
			
 
				-    hough_min_line_length: int = 35,
			
 
				-    hough_max_line_gap: int = 18,
			
 
				-    hough_line_thickness: int = 12,
			
 
				-    hough_band_dilate_radius: int = 14,
			
 
				-    hough_angle_tolerance: float = 5.0,
			
 
				-    hough_use_angle_statistics: bool = True,
			
 
				-    hough_secondary_peak_ratio: float = 0.35,
			
 
				-    hough_min_length_percentile: float = 25.0,
			
 
				-    diag_block_size: int = 0,
			
 
				-    diag_ratio_thresh: float = 0.20,
			
 
				-    diag_light_ratio_thresh: float = 0.10,
			
 
				-    diag_min_edge_count: int = 10,
			
 
				-    diag_dilate_radius: int = 3,
			
 
				-    seal_protect: bool = True,
			
 
				-    seal_hue_high: int = 15,
			
 
				-    seal_sat_min: int = 40,
			
 
				-) -> Tuple[np.ndarray, Dict[str, Any]]:
			
 
				-    """
			
 
				-    白底流水水印掩膜（方案 C + E）。
			
 
				-
			
 
				-    1. Hough 斜向线段 → geom_region（几何限定区域）
			
 
				-    2. wm_candidate = 浅色带且非正文保护
			
 
				-    3. wm_mask = geom_region（置白区域由几何约束；实际白化时再 g>=light_gray_low）
			
 
				-    4. debug 输出 candidate / geom / 交集 / 热力图
			
 
				-    """
			
 
				-    gray_arr = np.asarray(gray)
			
 
				-    bg_th = int(background_threshold)
			
 
				-    low = int(light_gray_low)
			
 
				-    high = int(light_gray_high)
			
 
				-
			
 
				-    if text_protect_gray_max > 0:
			
 
				-        t_protect = float(text_protect_gray_max)
			
 
				-    else:
			
 
				-        dark = gray_arr[gray_arr < min(130, bg_th)]
			
 
				-        if dark.size > 0 and text_protect_percentile is not None:
			
 
				-            t_protect = float(np.percentile(dark, text_protect_percentile))
			
 
				-        else:
			
 
				-            t_protect = 120.0
			
 
				-    text_protect = gray_arr <= t_protect
			
 
				-    low = max(low, int(t_protect) + 25)
			
 
				-
			
 
				-    wm_candidate = (gray_arr >= low) & (gray_arr < high) & (~text_protect)
			
 
				-
			
 
				-    direction = (direction_filter or "hough").lower().strip()
			
 
				-    hough_info: Dict[str, Any] = {}
			
 
				-    geom_region = np.zeros_like(gray_arr, dtype=bool)
			
 
				-
			
 
				-    if direction == "hough":
			
 
				-        geom_region, hough_info = _build_diag_hough_region_mask(
			
 
				-            gray_arr,
			
 
				-            midtone_low=hough_midtone_low,
			
 
				-            midtone_high=hough_midtone_high,
			
 
				-            canny_low=hough_canny_low,
			
 
				-            canny_high=hough_canny_high,
			
 
				-            hough_threshold=hough_threshold,
			
 
				-            min_line_length=hough_min_line_length,
			
 
				-            max_line_gap=hough_max_line_gap,
			
 
				-            angle_tolerance=hough_angle_tolerance,
			
 
				-            use_angle_statistics=hough_use_angle_statistics,
			
 
				-            secondary_peak_ratio=hough_secondary_peak_ratio,
			
 
				-            min_length_percentile=hough_min_length_percentile,
			
 
				-            line_thickness=hough_line_thickness,
			
 
				-            band_dilate_radius=hough_band_dilate_radius,
			
 
				-        )
			
 
				-    elif diag_block_size > 0:
			
 
				-        geom_region = _build_diag_region_mask(
			
 
				-            gray_arr,
			
 
				-            block_size=diag_block_size,
			
 
				-            diag_ratio_thresh=diag_ratio_thresh,
			
 
				-            light_gray_thresh=low,
			
 
				-            light_ratio_thresh=diag_light_ratio_thresh,
			
 
				-            min_edge_count=diag_min_edge_count,
			
 
				-            dilate_radius=diag_dilate_radius,
			
 
				-        )
			
 
				-
			
 
				-    geom_candidate = geom_region & wm_candidate
			
 
				-    wm_mask = geom_region.copy()
			
 
				-
			
 
				-    if min_component_area > 0 and np.any(wm_mask):
			
 
				-        n_labels, labels, stats, _ = cv2.connectedComponentsWithStats(
			
 
				-            wm_mask.astype(np.uint8), connectivity=8
			
 
				-        )
			
 
				-        filtered = np.zeros_like(wm_mask)
			
 
				-        for i in range(1, n_labels):
			
 
				-            if stats[i, cv2.CC_STAT_AREA] >= min_component_area:
			
 
				-                filtered[labels == i] = True
			
 
				-        if np.any(filtered):
			
 
				-            wm_mask = filtered
			
 
				-        elif np.any(geom_region):
			
 
				-            wm_mask = geom_region
			
 
				-
			
 
				-    seal_mask = np.zeros_like(wm_mask, dtype=bool)
			
 
				-    if seal_protect and bgr is not None and bgr.ndim == 3:
			
 
				-        seal_mask = _build_seal_protect_mask(
			
 
				-            bgr, hue_high=seal_hue_high, sat_min=seal_sat_min
			
 
				-        )
			
 
				-        wm_mask &= ~seal_mask
			
 
				-
			
 
				-    midtone = (gray_arr >= low) & (gray_arr < high)
			
 
				-    debug: Dict[str, Any] = {
			
 
				-        "mask_mode": "light_on_white",
			
 
				-        "direction_filter": direction,
			
 
				-        "light_gray_low": low,
			
 
				-        "light_gray_high": high,
			
 
				-        "midtone_ratio": float(midtone.sum() / gray_arr.size),
			
 
				-        "wm_candidate_ratio": float(wm_candidate.sum() / gray_arr.size),
			
 
				-        "geom_mask_ratio": float(geom_region.sum() / gray_arr.size),
			
 
				-        "geom_candidate_ratio": float(geom_candidate.sum() / gray_arr.size),
			
 
				-        "wm_mask_ratio": float(wm_mask.sum() / gray_arr.size),
			
 
				-        "T_protect": t_protect,
			
 
				-        "text_protect_gray_max": text_protect_gray_max,
			
 
				-        "text_protect": text_protect,
			
 
				-        "seal_protect": seal_mask,
			
 
				-        "wm_candidate": wm_candidate,
			
 
				-        "geom_region": geom_region,
			
 
				-        "geom_candidate": geom_candidate,
			
 
				-        "diag_region": geom_region,
			
 
				-        "wm_mask": wm_mask,
			
 
				-        "whiten_gray_low": int(whiten_gray_low),
			
 
				-        "hough_lines_bgr": hough_info.get("hough_lines_bgr"),
			
 
				-        "hough_lines_all_bgr": hough_info.get("hough_lines_all_bgr"),
			
 
				-        "angle_histogram_bgr": hough_info.get("angle_histogram_bgr"),
			
 
				-        "dominant_angles": hough_info.get("dominant_angles", []),
			
 
				-        "hough_kept_lines": hough_info.get("hough_kept_lines", 0),
			
 
				-        "hough_diag_candidates": hough_info.get("hough_diag_candidates", 0),
			
 
				-        "hough_total_lines": hough_info.get("hough_total_lines", 0),
			
 
				-    }
			
 
				-
			
 
				-    if debug_block_maps:
			
 
				-        bs = debug_block_size if debug_block_size > 0 else 48
			
 
				-        diag_map, hv_map = _compute_block_orientation_debug_maps(gray_arr, block_size=bs)
			
 
				-        debug["diag_ratio_heatmap"] = render_ratio_heatmap(diag_map)
			
 
				-        debug["hv_ratio_heatmap"] = render_ratio_heatmap(hv_map)
			
 
				-
			
 
				-    return wm_mask, debug
			
 
				-
			
 
				-
			
 
				-def build_watermark_mask(
			
 
				-    gray: np.ndarray,
			
 
				-    *,
			
 
				-    bgr: Optional[np.ndarray] = None,
			
 
				-    mask_mode: str = "diagonal_midtone",
			
 
				-    light_gray_low: int = 236,
			
 
				-    light_gray_high: int = 253,
			
 
				-    whiten_gray_low: int = 200,
			
 
				-    text_protect_gray_max: int = 130,
			
 
				-    morph_close_kernel: int = 0,
			
 
				-    morph_close_iter: int = 1,
			
 
				-    morph_dilate_kernel: int = 0,
			
 
				-    morph_dilate_iter: int = 1,
			
 
				-    low_variance_thresh: float = 0.0,
			
 
				-    edge_window: int = 5,
			
 
				-    direction_filter: str = "hough",
			
 
				-    debug_block_maps: bool = True,
			
 
				-    debug_block_size: int = 48,
			
 
				-    hough_midtone_low: int = 200,
			
 
				-    hough_midtone_high: int = 254,
			
 
				-    hough_canny_low: int = 30,
			
 
				-    hough_canny_high: int = 100,
			
 
				-    hough_threshold: int = 25,
			
 
				-    hough_min_line_length: int = 35,
			
 
				-    hough_max_line_gap: int = 18,
			
 
				-    hough_line_thickness: int = 12,
			
 
				-    hough_band_dilate_radius: int = 14,
			
 
				-    hough_angle_tolerance: float = 5.0,
			
 
				-    hough_use_angle_statistics: bool = True,
			
 
				-    hough_secondary_peak_ratio: float = 0.35,
			
 
				-    hough_min_length_percentile: float = 25.0,
			
 
				-    diag_block_size: int = 0,
			
 
				-    diag_ratio_thresh: float = 0.20,
			
 
				-    diag_light_ratio_thresh: float = 0.10,
			
 
				-    diag_min_edge_count: int = 10,
			
 
				-    diag_dilate_radius: int = 3,
			
 
				-    # diagonal_midtone 参数
			
 
				-    midtone_low: int = 100,
			
 
				-    midtone_high: int = 220,
			
 
				-    remove_horizontal_vertical: bool = True,
			
 
				-    diagonal_enhance: bool = True,
			
 
				-    diagonal_kernel_length: int = 25,
			
 
				-    horizontal_kernel_length: int = 35,
			
 
				-    vertical_kernel_length: int = 35,
			
 
				-    morph_open_kernel: int = 2,
			
 
				-    dmorph_close_kernel: int = 3,
			
 
				-    min_component_area: int = 200,
			
 
				-    text_protect_percentile: float = 10.0,
			
 
				-    background_threshold: int = 248,
			
 
				-    seal_protect: bool = True,
			
 
				-    seal_hue_high: int = 15,
			
 
				-    seal_sat_min: int = 40,
			
 
				-) -> Tuple[np.ndarray, Dict[str, Any]]:
			
 
				-    """
			
 
				-    构建水印掩膜 wm_mask（True=疑似水印像素）。
			
 
				-
			
 
				-    mask_mode:
			
 
				-        light_on_white — Hough 斜向几何带 + 浅色白化（方案 C/E）
			
 
				-        diagonal_midtone — 中间调 + 斜向形态学（旧逻辑）
			
 
				-    """
			
 
				-    gray = np.asarray(gray)
			
 
				-    if gray.ndim != 2:
			
 
				-        raise ValueError("build_watermark_mask expects single-channel grayscale")
			
 
				-
			
 
				-    mode = (mask_mode or "light_on_white").lower().strip()
			
 
				-    if mode == "light_on_white":
			
 
				-        return _build_watermark_mask_light_on_white(
			
 
				-            gray,
			
 
				-            bgr=bgr,
			
 
				-            light_gray_low=light_gray_low,
			
 
				-            light_gray_high=light_gray_high,
			
 
				-            whiten_gray_low=whiten_gray_low,
			
 
				-            text_protect_gray_max=text_protect_gray_max,
			
 
				-            text_protect_percentile=text_protect_percentile,
			
 
				-            background_threshold=background_threshold,
			
 
				-            morph_close_kernel=morph_close_kernel,
			
 
				-            morph_close_iter=morph_close_iter,
			
 
				-            morph_dilate_kernel=morph_dilate_kernel,
			
 
				-            morph_dilate_iter=morph_dilate_iter,
			
 
				-            low_variance_thresh=low_variance_thresh,
			
 
				-            edge_window=edge_window,
			
 
				-            min_component_area=min_component_area,
			
 
				-            direction_filter=direction_filter,
			
 
				-            debug_block_maps=debug_block_maps,
			
 
				-            debug_block_size=debug_block_size,
			
 
				-            hough_midtone_low=hough_midtone_low,
			
 
				-            hough_midtone_high=hough_midtone_high,
			
 
				-            hough_canny_low=hough_canny_low,
			
 
				-            hough_canny_high=hough_canny_high,
			
 
				-            hough_threshold=hough_threshold,
			
 
				-            hough_min_line_length=hough_min_line_length,
			
 
				-            hough_max_line_gap=hough_max_line_gap,
			
 
				-            hough_line_thickness=hough_line_thickness,
			
 
				-            hough_band_dilate_radius=hough_band_dilate_radius,
			
 
				-            hough_angle_tolerance=hough_angle_tolerance,
			
 
				-            hough_use_angle_statistics=hough_use_angle_statistics,
			
 
				-            hough_secondary_peak_ratio=hough_secondary_peak_ratio,
			
 
				-            hough_min_length_percentile=hough_min_length_percentile,
			
 
				-            diag_block_size=diag_block_size,
			
 
				-            diag_ratio_thresh=diag_ratio_thresh,
			
 
				-            diag_light_ratio_thresh=diag_light_ratio_thresh,
			
 
				-            diag_min_edge_count=diag_min_edge_count,
			
 
				-            diag_dilate_radius=diag_dilate_radius,
			
 
				-            seal_protect=seal_protect,
			
 
				-            seal_hue_high=seal_hue_high,
			
 
				-            seal_sat_min=seal_sat_min,
			
 
				-        )
			
 
				-
			
 
				-    midtone = (gray > midtone_low) & (gray < midtone_high)
			
 
				-    mid_u8 = (midtone.astype(np.uint8)) * 255
			
 
				-
			
 
				-    horiz = np.zeros_like(midtone, dtype=bool)
			
 
				-    vert = np.zeros_like(midtone, dtype=bool)
			
 
				-    if remove_horizontal_vertical:
			
 
				-        kh = cv2.getStructuringElement(
			
 
				-            cv2.MORPH_RECT, (max(3, horizontal_kernel_length), 1)
			
 
				-        )
			
 
				-        kv = cv2.getStructuringElement(
			
 
				-            cv2.MORPH_RECT, (1, max(3, vertical_kernel_length))
			
 
				-        )
			
 
				-        horiz = cv2.morphologyEx(mid_u8, cv2.MORPH_OPEN, kh) > 0
			
 
				-        vert = cv2.morphologyEx(mid_u8, cv2.MORPH_OPEN, kv) > 0
			
 
				-
			
 
				-    # 中间调去掉明显横竖线（保留斜向水印）
			
 
				-    candidate = midtone & ~(horiz | vert)
			
 
				-
			
 
				-    if diagonal_enhance:
			
 
				-        k45 = _line_structuring_kernel(diagonal_kernel_length, 45)
			
 
				-        k135 = _line_structuring_kernel(diagonal_kernel_length, 135)
			
 
				-        d45 = cv2.morphologyEx(mid_u8, cv2.MORPH_OPEN, k45) > 0
			
 
				-        d135 = cv2.morphologyEx(mid_u8, cv2.MORPH_OPEN, k135) > 0
			
 
				-        direction = d45 | d135
			
 
				-        dilate_k = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (7, 7))
			
 
				-        near_diag = cv2.dilate(direction.astype(np.uint8), dilate_k) > 0
			
 
				-        # 斜向结构足够时收窄到斜向附近；否则保留「中间调减横竖」结果
			
 
				-        if near_diag.sum() > gray.size * 0.001:
			
 
				-            candidate = candidate & near_diag
			
 
				-
			
 
				-    cand_u8 = (candidate.astype(np.uint8)) * 255
			
 
				-    if morph_open_kernel > 0:
			
 
				-        k_open = cv2.getStructuringElement(
			
 
				-            cv2.MORPH_ELLIPSE, (morph_open_kernel, morph_open_kernel)
			
 
				-        )
			
 
				-        cand_u8 = cv2.morphologyEx(cand_u8, cv2.MORPH_OPEN, k_open)
			
 
				-    if dmorph_close_kernel > 0:
			
 
				-        k_close = cv2.getStructuringElement(
			
 
				-            cv2.MORPH_ELLIPSE, (dmorph_close_kernel, dmorph_close_kernel)
			
 
				-        )
			
 
				-        cand_u8 = cv2.morphologyEx(cand_u8, cv2.MORPH_CLOSE, k_close)
			
 
				-
			
 
				-    wm_mask = cand_u8 > 0
			
 
				-
			
 
				-    if min_component_area > 0:
			
 
				-        n_labels, labels, stats, _ = cv2.connectedComponentsWithStats(
			
 
				-            wm_mask.astype(np.uint8), connectivity=8
			
 
				-        )
			
 
				-        filtered = np.zeros_like(wm_mask)
			
 
				-        for i in range(1, n_labels):
			
 
				-            if stats[i, cv2.CC_STAT_AREA] >= min_component_area:
			
 
				-                filtered[labels == i] = True
			
 
				-        wm_mask = filtered
			
 
				-
			
 
				-    non_bg = gray[gray < background_threshold]
			
 
				-    if non_bg.size > 0:
			
 
				-        t_protect = float(np.percentile(non_bg, text_protect_percentile))
			
 
				-    else:
			
 
				-        t_protect = 85.0
			
 
				-    t_protect = max(t_protect, float(midtone_low))
			
 
				-    text_protect = gray <= t_protect
			
 
				-
			
 
				-    midtone_ratio = float(midtone.sum() / gray.size)
			
 
				-    wm_ratio = float(wm_mask.sum() / gray.size)
			
 
				-
			
 
				-    # 掩膜过小：回退为「中间调减横竖」或整块中间调（满版斜纹水印常见）
			
 
				-    min_wm_ratio = max(0.005, midtone_ratio * 0.12)
			
 
				-    if wm_ratio < min_wm_ratio:
			
 
				-        relaxed = midtone & ~(horiz | vert) & (~text_protect)
			
 
				-        if relaxed.sum() / gray.size < min_wm_ratio:
			
 
				-            relaxed = midtone & (~text_protect)
			
 
				-        wm_mask = relaxed
			
 
				-        wm_ratio = float(wm_mask.sum() / gray.size)
			
 
				-
			
 
				-    seal_mask = np.zeros_like(wm_mask, dtype=bool)
			
 
				-    if seal_protect and bgr is not None and bgr.ndim == 3:
			
 
				-        seal_mask = _build_seal_protect_mask(
			
 
				-            bgr, hue_high=seal_hue_high, sat_min=seal_sat_min
			
 
				-        )
			
 
				-
			
 
				-    debug: Dict[str, Any] = {
			
 
				-        "mask_mode": "diagonal_midtone",
			
 
				-        "midtone_ratio": midtone_ratio,
			
 
				-        "wm_mask_ratio": wm_ratio,
			
 
				-        "T_protect": t_protect,
			
 
				-        "text_protect": text_protect,
			
 
				-        "seal_protect": seal_mask,
			
 
				-        "midtone_mask": midtone,
			
 
				-        "wm_mask": wm_mask,
			
 
				-    }
			
 
				-    return wm_mask, debug
			
 
				-
			
 
				-
			
 
				-def remove_watermark_masked_adaptive(
			
 
				-    gray: np.ndarray,
			
 
				-    *,
			
 
				-    bgr: Optional[np.ndarray] = None,
			
 
				-    mask_cfg: Optional[Dict[str, Any]] = None,
			
 
				-    adaptive_cfg: Optional[Dict[str, Any]] = None,
			
 
				-    threshold_fallback: int = 175,
			
 
				-    morph_close_kernel: int = 0,
			
 
				-) -> Tuple[np.ndarray, Dict[str, Any]]:
			
 
				-    """
			
 
				-    掩膜内置白（whiten_mode=mask_fill）或掩膜内动态阈值（threshold_in_mask）。
			
 
				-
			
 
				-    掩膜为空时回退全局 threshold_fallback。
			
 
				-    """
			
 
				-    gray = np.asarray(gray).copy()
			
 
				-    mcfg: Dict[str, Any] = {
			
 
				-        "mask_mode": "light_on_white",
			
 
				-        "light_gray_low": 236,
			
 
				-        "light_gray_high": 253,
			
 
				-        "whiten_gray_low": 200,
			
 
				-        "text_protect_gray_max": 130,
			
 
				-        "morph_close_kernel": 0,
			
 
				-        "morph_close_iter": 1,
			
 
				-        "morph_dilate_kernel": 0,
			
 
				-        "morph_dilate_iter": 1,
			
 
				-        "low_variance_thresh": 0.0,
			
 
				-        "edge_window": 5,
			
 
				-        "min_component_area": 200,
			
 
				-        "direction_filter": "hough",
			
 
				-        "debug_block_maps": True,
			
 
				-        "debug_block_size": 48,
			
 
				-        "hough_midtone_low": 200,
			
 
				-        "hough_midtone_high": 254,
			
 
				-        "hough_canny_low": 30,
			
 
				-        "hough_canny_high": 100,
			
 
				-        "hough_threshold": 25,
			
 
				-        "hough_min_line_length": 35,
			
 
				-        "hough_max_line_gap": 18,
			
 
				-        "hough_line_thickness": 12,
			
 
				-        "hough_band_dilate_radius": 14,
			
 
				-        "hough_angle_tolerance": 5.0,
			
 
				-        "hough_use_angle_statistics": True,
			
 
				-        "hough_secondary_peak_ratio": 0.35,
			
 
				-        "hough_min_length_percentile": 25.0,
			
 
				-        "diag_block_size": 0,
			
 
				-        "diag_ratio_thresh": 0.20,
			
 
				-        "diag_light_ratio_thresh": 0.10,
			
 
				-        "diag_min_edge_count": 10,
			
 
				-        "diag_dilate_radius": 3,
			
 
				-        "midtone_low": 100,
			
 
				-        "midtone_high": 220,
			
 
				-        "remove_horizontal_vertical": True,
			
 
				-        "diagonal_enhance": True,
			
 
				-        "diagonal_kernel_length": 25,
			
 
				-        "horizontal_kernel_length": 35,
			
 
				-        "vertical_kernel_length": 35,
			
 
				-        "morph_open_kernel": 2,
			
 
				-        "dmorph_close_kernel": 3,
			
 
				-        "text_protect_percentile": 10.0,
			
 
				-        "background_threshold": 248,
			
 
				-        "seal_protect": True,
			
 
				-        "seal_hue_high": 15,
			
 
				-        "seal_sat_min": 40,
			
 
				-    }
			
 
				-    mcfg.update(mask_cfg or {})
			
 
				-    mask_mode = str(mcfg.get("mask_mode", "light_on_white")).lower().strip()
			
 
				-
			
 
				-    # light_on_white 默认 mask_fill
			
 
				-    acfg: Dict[str, Any] = {
			
 
				-        "whiten_mode": None,
			
 
				-        "text_percentile": 10.0,
			
 
				-        "watermark_percentile": 88.0,
			
 
				-        "background_percentile": 95.0,
			
 
				-        "background_threshold": 248,
			
 
				-        "wm_margin": 12,
			
 
				-        "text_protect_max": 120,
			
 
				-    }
			
 
				-    acfg.update(adaptive_cfg or {})
			
 
				-    whiten_mode = acfg.get("whiten_mode")
			
 
				-    if not whiten_mode:
			
 
				-        whiten_mode = (
			
 
				-            "mask_fill"
			
 
				-            if mask_mode == "light_on_white"
			
 
				-            else "threshold_in_mask"
			
 
				-        )
			
 
				-    whiten_mode = str(whiten_mode).lower().strip()
			
 
				-
			
 
				-    wm_mask, debug = build_watermark_mask(gray, bgr=bgr, **mcfg)
			
 
				-
			
 
				-    if not np.any(wm_mask):
			
 
				-        cleaned = gray.copy()
			
 
				-        cleaned[gray > threshold_fallback] = 255
			
 
				-        debug["mode"] = "fallback_threshold"
			
 
				-        debug["threshold_fallback"] = threshold_fallback
			
 
				-        if morph_close_kernel > 0:
			
 
				-            kernel = np.ones((morph_close_kernel, morph_close_kernel), np.uint8)
			
 
				-            cleaned = cv2.morphologyEx(cleaned, cv2.MORPH_CLOSE, kernel)
			
 
				-        return cleaned, debug
			
 
				-
			
 
				-    bg_th = int(acfg["background_threshold"])
			
 
				-    bg_pixels = gray[gray >= bg_th]
			
 
				-    if bg_pixels.size > 0:
			
 
				-        b_level = float(np.percentile(bg_pixels, acfg["background_percentile"]))
			
 
				-    else:
			
 
				-        b_level = 250.0
			
 
				-
			
 
				-    if mask_mode == "light_on_white":
			
 
				-        t_protect = float(debug.get("T_protect", 150.0))
			
 
				-    else:
			
 
				-        non_bg = gray[gray < bg_th]
			
 
				-        if non_bg.size > 0:
			
 
				-            t_protect = float(np.percentile(non_bg, acfg["text_percentile"]))
			
 
				-        else:
			
 
				-            t_protect = float(debug.get("T_protect", 85.0))
			
 
				-        t_protect = min(t_protect, float(acfg["text_protect_max"]))
			
 
				-        t_protect = max(t_protect, float(mcfg.get("midtone_low", 100)))
			
 
				-
			
 
				-    text_protect = debug["text_protect"]
			
 
				-    seal_protect = debug["seal_protect"]
			
 
				-    t_wm: Optional[float] = None
			
 
				-
			
 
				-    if whiten_mode == "mask_fill":
			
 
				-        # 几何带内：g>=whiten_gray_low 置白；g<=130 正文硬保护（方案 E）
			
 
				-        wm_gray_low = float(
			
 
				-            mcfg.get("whiten_gray_low", debug.get("whiten_gray_low", 200))
			
 
				-        )
			
 
				-        to_white = (
			
 
				-            wm_mask
			
 
				-            & (gray >= wm_gray_low)
			
 
				-            & (gray < int(mcfg.get("light_gray_high", 254)))
			
 
				-            & (~text_protect)
			
 
				-            & (~seal_protect)
			
 
				-        )
			
 
				-    else:
			
 
				-        mask_vals = gray[wm_mask]
			
 
				-        if mask_vals.size > 0:
			
 
				-            t_wm = float(np.percentile(mask_vals, acfg["watermark_percentile"]))
			
 
				-        else:
			
 
				-            t_wm = t_protect + 0.45 * (b_level - t_protect)
			
 
				-        margin = float(acfg["wm_margin"])
			
 
				-        t_wm = max(t_wm, t_protect + margin)
			
 
				-        t_wm = min(t_wm, b_level - 3.0)
			
 
				-        t_wm = min(t_wm, float(mcfg.get("midtone_high", 220)) - 5.0)
			
 
				-        to_white = wm_mask & (gray >= t_wm) & (~text_protect) & (~seal_protect)
			
 
				-
			
 
				-    cleaned = gray.copy()
			
 
				-    cleaned[to_white] = 255
			
 
				-
			
 
				-    if morph_close_kernel > 0:
			
 
				-        kernel = np.ones((morph_close_kernel, morph_close_kernel), np.uint8)
			
 
				-        cleaned = cv2.morphologyEx(cleaned, cv2.MORPH_CLOSE, kernel)
			
 
				-
			
 
				-    debug.update(
			
 
				-        {
			
 
				-            "mode": "masked_adaptive",
			
 
				-            "mask_mode": mask_mode,
			
 
				-            "whiten_mode": whiten_mode,
			
 
				-            "T_wm": t_wm,
			
 
				-            "T_protect": t_protect,
			
 
				-            "B_level": b_level,
			
 
				-            "white_pixel_ratio": float(to_white.sum() / gray.size),
			
 
				-            "threshold_fallback": threshold_fallback,
			
 
				-        }
			
 
				-    )
			
 
				-    return cleaned, debug
			
 
				-
			
 
				-
			
 
				-def _image_to_gray_and_bgr(
			
 
				-    image: Union[np.ndarray, Image.Image],
			
 
				-) -> Tuple[np.ndarray, Optional[np.ndarray]]:
			
 
				-    """统一为灰度 + 可选 BGR（用于掩膜公章保护）。"""
			
 
				-    if isinstance(image, Image.Image):
			
 
				-        pil_img = image.convert("RGB") if image.mode == "RGBA" else image
			
 
				-        np_img = np.array(pil_img)
			
 
				-        np_img = cv2.cvtColor(np_img, cv2.COLOR_RGB2BGR)
			
 
				-    else:
			
 
				-        np_img = image.copy()
			
 
				-
			
 
				-    if np_img.ndim == 3:
			
 
				-        bgr = np_img
			
 
				-        gray = cv2.cvtColor(np_img, cv2.COLOR_BGR2GRAY)
			
 
				-    else:
			
 
				-        bgr = None
			
 
				-        gray = np_img
			
 
				-    return gray, bgr
			
 
				-
			
 
				-
			
 
				-def _enhance_text_restore(
			
 
				-    gray: np.ndarray,
			
 
				-    *,
			
 
				-    background_threshold: int = 248,
			
 
				-    text_lo_percentile: float = 1.0,
			
 
				-    text_hi_percentile: float = 99.0,
			
 
				-    text_black_target: int = 85,
			
 
				-) -> np.ndarray:
			
 
				-    """
			
 
				-    仅对非背景像素做动态范围压缩，将最深笔画拉向 text_black_target（默认 ~85，接近扫描件原图）。
			
 
				-
			
 
				-    背景（>= background_threshold）保持白色，避免整图 gamma 导致背景发灰。
			
 
				-    """
			
 
				-    result = gray.copy()
			
 
				-    bg_th = int(np.clip(background_threshold, 200, 255))
			
 
				-    text_mask = gray < bg_th
			
 
				-    if not np.any(text_mask):
			
 
				-        return result
			
 
				-
			
 
				-    vals = gray[text_mask].astype(np.float32)
			
 
				-    lo = float(np.percentile(vals, text_lo_percentile))
			
 
				-    hi = float(np.percentile(vals, text_hi_percentile))
			
 
				-    target = int(np.clip(text_black_target, 10, 200))
			
 
				-    if hi <= lo + 1.0:
			
 
				-        return result
			
 
				-
			
 
				-    stretched = (vals - lo) * target / (hi - lo)
			
 
				-    result[text_mask] = np.clip(stretched, 0, 255).astype(np.uint8)
			
 
				-    return result
			
 
				-
			
 
				-
			
 
				-def enhance_document_contrast(
			
 
				-    gray: np.ndarray,
			
 
				-    method: str = "text_restore",
			
 
				-    *,
			
 
				-    clip_limit: float = 2.0,
			
 
				-    tile_grid_size: int = 8,
			
 
				-    gamma: float = 0.85,
			
 
				-    black_percentile: float = 2.0,
			
 
				-    white_percentile: float = 98.0,
			
 
				-    background_threshold: int = 248,
			
 
				-    text_lo_percentile: float = 1.0,
			
 
				-    text_hi_percentile: float = 99.0,
			
 
				-    text_black_target: int = 85,
			
 
				-) -> np.ndarray:
			
 
				-    """
			
 
				-    文档灰度图对比度增强（常用于去水印后恢复笔画深度）。
			
 
				-
			
 
				-    Args:
			
 
				-        gray: 单通道 uint8 灰度图
			
 
				-        method: text_restore | clahe | gamma | linear
			
 
				-        clip_limit: CLAHE 对比度限制
			
 
				-        tile_grid_size: CLAHE 分块大小
			
 
				-        gamma: gamma 校正指数，<1 加深文字（去水印后发浅时适用）
			
 
				-        black_percentile: linear 拉伸下分位（映射到 0）
			
 
				-        white_percentile: linear 拉伸上分位（映射到 255）
			
 
				-        background_threshold: text_restore 背景阈值（>= 视为白底不处理）
			
 
				-        text_lo_percentile: text_restore 笔画下分位
			
 
				-        text_hi_percentile: text_restore 笔画上分位（映射到 text_black_target）
			
 
				-        text_black_target: text_restore 最深笔画目标灰度（越小越深，建议 75~95）
			
 
				-
			
 
				-    Returns:
			
 
				-        增强后的灰度图
			
 
				-    """
			
 
				-    if gray is None or gray.size == 0:
			
 
				-        return gray
			
 
				-    if gray.ndim != 2:
			
 
				-        raise ValueError("enhance_document_contrast expects single-channel grayscale image")
			
 
				-
			
 
				-    method = (method or "text_restore").lower().strip()
			
 
				-
			
 
				-    if method == "text_restore":
			
 
				-        return _enhance_text_restore(
			
 
				-            gray,
			
 
				-            background_threshold=background_threshold,
			
 
				-            text_lo_percentile=text_lo_percentile,
			
 
				-            text_hi_percentile=text_hi_percentile,
			
 
				-            text_black_target=text_black_target,
			
 
				-        )
			
 
				-
			
 
				-    if method == "gamma":
			
 
				-        gamma = max(0.1, min(float(gamma), 3.0))
			
 
				-        inv_gamma = 1.0 / gamma
			
 
				-        table = np.array(
			
 
				-            [((i / 255.0) ** inv_gamma) * 255 for i in range(256)],
			
 
				-            dtype=np.uint8,
			
 
				-        )
			
 
				-        return cv2.LUT(gray, table)
			
 
				-
			
 
				-    if method == "linear":
			
 
				-        p_low = float(np.percentile(gray, black_percentile))
			
 
				-        p_high = float(np.percentile(gray, white_percentile))
			
 
				-        if p_high <= p_low + 1.0:
			
 
				-            return gray
			
 
				-        stretched = (gray.astype(np.float32) - p_low) * 255.0 / (p_high - p_low)
			
 
				-        return np.clip(stretched, 0, 255).astype(np.uint8)
			
 
				-
			
 
				-    # 默认 CLAHE：局部对比度，适合扫描件
			
 
				-    tile = max(2, int(tile_grid_size))
			
 
				-    clahe = cv2.createCLAHE(
			
 
				-        clipLimit=max(0.1, float(clip_limit)),
			
 
				-        tileGridSize=(tile, tile),
			
 
				-    )
			
 
				-    return clahe.apply(gray)
			
 
				-
			
 
				-
			
 
				-def apply_contrast_enhancement_config(
			
 
				-    gray: np.ndarray,
			
 
				-    contrast_cfg: Optional[Dict[str, Any]],
			
 
				-) -> np.ndarray:
			
 
				-    """按配置字典应用对比度增强；未启用时原样返回。"""
			
 
				-    if not contrast_cfg or not contrast_cfg.get("enabled", False):
			
 
				-        return gray
			
 
				-    return enhance_document_contrast(
			
 
				-        gray,
			
 
				-        method=contrast_cfg.get("method", "text_restore"),
			
 
				-        clip_limit=contrast_cfg.get("clip_limit", 2.0),
			
 
				-        tile_grid_size=contrast_cfg.get("tile_grid_size", 8),
			
 
				-        gamma=contrast_cfg.get("gamma", 0.85),
			
 
				-        black_percentile=contrast_cfg.get("black_percentile", 2.0),
			
 
				-        white_percentile=contrast_cfg.get("white_percentile", 98.0),
			
 
				-        background_threshold=contrast_cfg.get("background_threshold", 248),
			
 
				-        text_lo_percentile=contrast_cfg.get("text_lo_percentile", 1.0),
			
 
				-        text_hi_percentile=contrast_cfg.get("text_hi_percentile", 99.0),
			
 
				-        text_black_target=contrast_cfg.get("text_black_target", 75),
			
 
				-    )
			
 
				-
			
 
				-
			
 
				-def remove_watermark_from_image(
			
 
				-    image: Union[np.ndarray, Image.Image],
			
 
				-    threshold: int = 160,
			
 
				-    morph_close_kernel: int = 2,
			
 
				-    return_pil: Optional[bool] = None,
			
 
				-    watermark_removal_cfg: Optional[Dict[str, Any]] = None,
			
 
				-    removal_debug: Optional[Dict[str, Any]] = None,
			
 
				-) -> Union[np.ndarray, Image.Image]:
			
 
				-    """
			
 
				-    去除图像中的浅色斜向文字水印，返回灰度图。
			
 
				-
			
 
				-    method（watermark_removal_cfg）:
			
 
				-        threshold（默认）: gray > threshold → 255
			
 
				-        masked / masked_adaptive: 掩膜 + 掩膜内动态阈值
			
 
				-
			
 
				-    Args:
			
 
				-        image: 输入图像（PIL.Image 或 np.ndarray BGR/RGB/灰度）。
			
 
				-        threshold: 全局阈值或掩膜失败时的回退阈值。
			
 
				-        morph_close_kernel: 形态学闭运算核大小，0 跳过。
			
 
				-        watermark_removal_cfg: 完整配置（含 method / mask / adaptive）。
			
 
				-        removal_debug: 若传入 dict，写入掩膜与 T_wm 等调试字段。
			
 
				-
			
 
				-    Returns:
			
 
				-        去除水印后的灰度图：PIL.Image(mode='L') 或 np.ndarray(HxW, uint8)。
			
 
				-    """
			
 
				-    input_is_pil = isinstance(image, Image.Image)
			
 
				-    cfg = watermark_removal_cfg or {}
			
 
				-    method = str(cfg.get("method") or "threshold").lower().strip()
			
 
				-
			
 
				-    gray, bgr = _image_to_gray_and_bgr(image)
			
 
				-
			
 
				-    if method in ("masked", "masked_adaptive"):
			
 
				-        cleaned, dbg = remove_watermark_masked_adaptive(
			
 
				-            gray,
			
 
				-            bgr=bgr,
			
 
				-            mask_cfg=cfg.get("mask") if isinstance(cfg.get("mask"), dict) else None,
			
 
				-            adaptive_cfg=cfg.get("adaptive")
			
 
				-            if isinstance(cfg.get("adaptive"), dict)
			
 
				-            else None,
			
 
				-            threshold_fallback=threshold,
			
 
				-            morph_close_kernel=morph_close_kernel,
			
 
				-        )
			
 
				-        if removal_debug is not None:
			
 
				-            removal_debug.clear()
			
 
				-            removal_debug.update(dbg)
			
 
				-    else:
			
 
				-        cleaned = gray.copy()
			
 
				-        cleaned[gray > threshold] = 255
			
 
				-        if morph_close_kernel > 0:
			
 
				-            kernel = np.ones((morph_close_kernel, morph_close_kernel), np.uint8)
			
 
				-            cleaned = cv2.morphologyEx(cleaned, cv2.MORPH_CLOSE, kernel)
			
 
				-        if removal_debug is not None:
			
 
				-            removal_debug.clear()
			
 
				-            removal_debug.update({"mode": "threshold", "threshold": threshold})
			
 
				-
			
 
				-    should_return_pil = input_is_pil if return_pil is None else return_pil
			
 
				-    return Image.fromarray(cleaned, mode='L') if should_return_pil else cleaned
			
 
				-
			
 
				-
			
 
				-def remove_watermark_from_image_rgb(
			
 
				-    image: Union[np.ndarray, Image.Image],
			
 
				-    threshold: int = 160,
			
 
				-    morph_close_kernel: int = 2,
			
 
				-    return_pil: Optional[bool] = None,
			
 
				-    contrast_enhancement: Optional[Dict[str, Any]] = None,
			
 
				-    apply_watermark_removal: bool = True,
			
 
				-    watermark_removal_cfg: Optional[Dict[str, Any]] = None,
			
 
				-    removal_debug: Optional[Dict[str, Any]] = None,
			
 
				-) -> Union[np.ndarray, Image.Image]:
			
 
				-    """
			
 
				-    去除水印并返回 RGB 三通道图像。
			
 
				-
			
 
				-    与 remove_watermark_from_image 逻辑相同，但输出为 RGB（三通道），
			
 
				-    方便直接传入布局检测、OCR 等需要彩色输入的下游模型。
			
 
				-
			
 
				-    Args:
			
 
				-        contrast_enhancement: 对比度增强配置（含 enabled / method 等），见 apply_contrast_enhancement_config
			
 
				-        apply_watermark_removal: False 时跳过阈值抹白，仅做对比度增强（若启用）
			
 
				-
			
 
				-    Args/Returns: 同 remove_watermark_from_image，但输出为 RGB/BGR 三通道。
			
 
				-    """
			
 
				-    input_is_pil = isinstance(image, Image.Image)
			
 
				-
			
 
				-    if apply_watermark_removal:
			
 
				-        gray_result = remove_watermark_from_image(
			
 
				-            image,
			
 
				-            threshold,
			
 
				-            morph_close_kernel,
			
 
				-            return_pil=False,
			
 
				-            watermark_removal_cfg=watermark_removal_cfg,
			
 
				-            removal_debug=removal_debug,
			
 
				-        )
			
 
				-    else:
			
 
				-        if isinstance(image, Image.Image):
			
 
				-            np_img = np.array(image.convert("RGB"))
			
 
				-            np_img = cv2.cvtColor(np_img, cv2.COLOR_RGB2BGR)
			
 
				-        else:
			
 
				-            np_img = image.copy()
			
 
				-        gray_result = (
			
 
				-            cv2.cvtColor(np_img, cv2.COLOR_BGR2GRAY)
			
 
				-            if np_img.ndim == 3
			
 
				-            else np_img
			
 
				-        )
			
 
				-
			
 
				-    gray_result = apply_contrast_enhancement_config(gray_result, contrast_enhancement)
			
 
				-    rgb_np = cv2.cvtColor(gray_result, cv2.COLOR_GRAY2BGR)
			
 
				-
			
 
				-    should_return_pil = input_is_pil if return_pil is None else return_pil
			
 
				-    if should_return_pil:
			
 
				-        return Image.fromarray(cv2.cvtColor(rgb_np, cv2.COLOR_BGR2RGB))
			
 
				-    return rgb_np
			
 
				-
			
 
				-
			
 
				-def render_watermark_mask_overlay(
			
 
				-    image: np.ndarray,
			
 
				-    wm_mask: np.ndarray,
			
 
				-    *,
			
 
				-    color: Tuple[int, int, int] = (0, 0, 255),
			
 
				-    alpha: float = 0.45,
			
 
				-) -> np.ndarray:
			
 
				-    """在原图上叠加红色半透明水印掩膜，供调试图保存。"""
			
 
				-    if image.ndim == 2:
			
 
				-        base = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
			
 
				-    elif image.shape[2] == 3:
			
 
				-        base = image.copy()
			
 
				-        if image.max() <= 1:
			
 
				-            base = (image * 255).astype(np.uint8)
			
 
				-    else:
			
 
				-        base = cv2.cvtColor(image, cv2.COLOR_BGRA2BGR)
			
 
				-
			
 
				-    overlay = base.copy()
			
 
				-    overlay[wm_mask] = color
			
 
				-    return cv2.addWeighted(base, 1.0 - alpha, overlay, alpha, 0)
			
 
				-
			
 
				-
			
 
				-def _image_to_bgr_for_debug(img: np.ndarray) -> np.ndarray:
			
 
				-    """将 ndarray 转为 BGR，供 cv2.imwrite 使用。"""
			
 
				-    if img.ndim == 2:
			
 
				-        return cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
			
 
				-    out = img.copy()
			
 
				-    if out.shape[2] == 3:
			
 
				-        return cv2.cvtColor(out, cv2.COLOR_RGB2BGR)
			
 
				-    return out
			
 
				-
			
 
				-
			
 
				-def save_watermark_removal_debug(
			
 
				-    before: Union[np.ndarray, Image.Image],
			
 
				-    after: Union[np.ndarray, Image.Image],
			
 
				-    output_dir: Union[str, Path],
			
 
				-    page_name: str,
			
 
				-    *,
			
 
				-    processing_params: Optional[Dict[str, Any]] = None,
			
 
				-    image_format: str = "png",
			
 
				-    save_compare: bool = True,
			
 
				-    subdir: str = "watermark_removal",
			
 
				-    mask_overlay: Optional[np.ndarray] = None,
			
 
				-) -> Dict[str, str]:
			
 
				-    """
			
 
				-    保存去水印调试图（before / after / compare / meta.json）。
			
 
				-
			
 
				-    与 universal_doc_parser 的 module debug 目录结构一致：
			
 
				-    ``{output_dir}/debug/{subdir}/``
			
 
				-
			
 
				-    Args:
			
 
				-        before: 处理前图像（RGB/BGR/灰度）
			
 
				-        after: 处理后图像
			
 
				-        output_dir: 输出根目录（通常为 pipeline 或工具的输出目录）
			
 
				-        page_name: 文件名前缀（如 ``doc_page_002``）
			
 
				-        processing_params: 写入 meta.json 的参数（threshold、contrast_enhancement 等）
			
 
				-        image_format: 图片格式，png/jpg
			
 
				-        save_compare: 是否保存左右拼接对比图
			
 
				-        subdir: debug 根目录下的子目录名（默认 watermark_removal）
			
 
				-
			
 
				-    Returns:
			
 
				-        已保存文件路径字典（before/after/compare/meta，未保存的键省略）
			
 
				-    """
			
 
				-    if isinstance(before, Image.Image):
			
 
				-        before = np.array(before)
			
 
				-    if isinstance(after, Image.Image):
			
 
				-        after = np.array(after)
			
 
				-
			
 
				-    from ocr_utils.module_debug_viz import resolve_module_debug_dir
			
 
				-
			
 
				-    debug_dir = resolve_module_debug_dir(output_dir, subdir)
			
 
				-
			
 
				-    fmt = (image_format or "png").lstrip(".")
			
 
				-    before_bgr = _image_to_bgr_for_debug(before)
			
 
				-    after_bgr = _image_to_bgr_for_debug(after)
			
 
				-
			
 
				-    paths: Dict[str, str] = {}
			
 
				-    before_path = debug_dir / f"{page_name}_watermark_before.{fmt}"
			
 
				-    after_path = debug_dir / f"{page_name}_watermark_after.{fmt}"
			
 
				-    cv2.imwrite(str(before_path), before_bgr)
			
 
				-    cv2.imwrite(str(after_path), after_bgr)
			
 
				-    paths["before"] = str(before_path)
			
 
				-    paths["after"] = str(after_path)
			
 
				-
			
 
				-    if save_compare:
			
 
				-        h = max(before_bgr.shape[0], after_bgr.shape[0])
			
 
				-        if before_bgr.shape[0] != h:
			
 
				-            before_bgr = cv2.resize(before_bgr, (before_bgr.shape[1], h))
			
 
				-        if after_bgr.shape[0] != h:
			
 
				-            after_bgr = cv2.resize(after_bgr, (after_bgr.shape[1], h))
			
 
				-        compare = np.hstack([before_bgr, after_bgr])
			
 
				-        compare_path = debug_dir / f"{page_name}_watermark_compare.{fmt}"
			
 
				-        cv2.imwrite(str(compare_path), compare)
			
 
				-        paths["compare"] = str(compare_path)
			
 
				-        logger.info(f"Saved watermark compare: {compare_path}")
			
 
				-
			
 
				-    if mask_overlay is not None:
			
 
				-        mask_bgr = _image_to_bgr_for_debug(mask_overlay)
			
 
				-        mask_path = debug_dir / f"{page_name}_watermark_mask.{fmt}"
			
 
				-        cv2.imwrite(str(mask_path), mask_bgr)
			
 
				-        paths["mask"] = str(mask_path)
			
 
				-
			
 
				-    meta: Dict[str, Any] = {"page_name": page_name}
			
 
				-    if processing_params:
			
 
				-        _skip_meta = (
			
 
				-            "midtone_mask",
			
 
				-            "wm_mask",
			
 
				-            "wm_candidate",
			
 
				-            "geom_region",
			
 
				-            "geom_candidate",
			
 
				-            "diag_region",
			
 
				-            "text_protect",
			
 
				-            "seal_protect",
			
 
				-            "hough_lines_bgr",
			
 
				-            "diag_ratio_heatmap",
			
 
				-            "hv_ratio_heatmap",
			
 
				-        )
			
 
				-        meta_params = {
			
 
				-            k: v
			
 
				-            for k, v in processing_params.items()
			
 
				-            if k not in _skip_meta
			
 
				-        }
			
 
				-        meta.update(meta_params)
			
 
				-    else:
			
 
				-        meta.update({})
			
 
				-    meta["before"] = paths["before"]
			
 
				-    meta["after"] = paths["after"]
			
 
				-    if "compare" in paths:
			
 
				-        meta["compare"] = paths["compare"]
			
 
				-
			
 
				-    meta_path = debug_dir / f"{page_name}_watermark_meta.json"
			
 
				-    meta_path.write_text(json.dumps(meta, ensure_ascii=False, indent=2), encoding="utf-8")
			
 
				-    paths["meta"] = str(meta_path)
			
 
				-
			
 
				-    logger.info(f"Saved watermark debug: {before_path}, {after_path}")
			
 
				-    return paths
			
 
				-
			
 
				-
			
 
				-# ─────────────────────────────────────────────────────────────────────────────
			
 
				-# PDF 层级水印去除（文字型 PDF，保留可搜索性）
			
 
				-# ─────────────────────────────────────────────────────────────────────────────
			
 
				-
			
 
				-def _is_watermark_xobj(doc, xref: int, obj_str: str) -> bool:
			
 
				-    """
			
 
				-    判断一个 Form XObject 是否为水印。
			
 
				-
			
 
				-    启发式规则（满足其一即视为水印）：
			
 
				-    1. 含旋转变换矩阵（cm 指令 sin/cos 分量非零），无论是否有 /Group
			
 
				-    2. 有透明度组（/Group）且内容流包含透明度操作符（ca/CA）
			
 
				-    3. 有透明度组且内容流体积 > 2KB（大量重复绘图 = 平铺水印）
			
 
				-    """
			
 
				-    if "/Form" not in obj_str:
			
 
				-        return False
			
 
				-
			
 
				-    try:
			
 
				-        stream = doc.xref_stream(xref)
			
 
				-        if not stream:
			
 
				-            return False
			
 
				-        stream_text = stream.decode("latin-1", errors="ignore")
			
 
				-    except Exception:
			
 
				-        return False
			
 
				-
			
 
				-    has_group = "/Group" in obj_str
			
 
				-
			
 
				-    cm_pattern = re.compile(
			
 
				-        r"([-\d.]+)\s+([-\d.]+)\s+([-\d.]+)\s+([-\d.]+)\s+[-\d.]+\s+[-\d.]+\s+cm"
			
 
				-    )
			
 
				-    for m in cm_pattern.finditer(stream_text):
			
 
				-        a, b, c, d = float(m.group(1)), float(m.group(2)), float(m.group(3)), float(m.group(4))
			
 
				-        if abs(b) > 0.1 or abs(c) > 0.1:
			
 
				-            return True
			
 
				-
			
 
				-    if not has_group:
			
 
				-        return False
			
 
				-
			
 
				-    if re.search(r'\b(ca|CA)\s+[0-9.]+', stream_text) or re.search(r'[0-9.]+\s+(ca|CA)\b', stream_text):
			
 
				-        return True
			
 
				-
			
 
				-    if len(stream_text) > 2048:
			
 
				-        return True
			
 
				-
			
 
				-    return False
			
 
				-
			
 
				-
			
 
				-def _is_watermark_image_xobj(doc, xref: int, obj_str: str) -> bool:
			
 
				-    """
			
 
				-    判断一个 Image XObject 是否为水印背景图。
			
 
				-
			
 
				-    判断规则（全部满足）：
			
 
				-    1. /Subtype /Image
			
 
				-    2. 有 /SMask（半透明）
			
 
				-    3. 宽 >= 600 且 高 >= 800（全页尺寸，排除小图标）
			
 
				-    4. 解码后像素均值 >= 240（近乎全白，水印文字稀疏）
			
 
				-    """
			
 
				-    if "/Image" not in obj_str or "/SMask" not in obj_str:
			
 
				-        return False
			
 
				-
			
 
				-    w_m = re.search(r'/Width\s+(\d+)', obj_str)
			
 
				-    h_m = re.search(r'/Height\s+(\d+)', obj_str)
			
 
				-    if not w_m or not h_m:
			
 
				-        return False
			
 
				-    if int(w_m.group(1)) < 600 or int(h_m.group(1)) < 800:
			
 
				-        return False
			
 
				-
			
 
				-    try:
			
 
				-        from io import BytesIO
			
 
				-        img_info = doc.extract_image(xref)
			
 
				-        pil_img = Image.open(BytesIO(img_info["image"])).convert("L")
			
 
				-        return float(np.array(pil_img).mean()) >= 240.0
			
 
				-    except Exception:
			
 
				-        return False
			
 
				-
			
 
				-
			
 
				-def _blank_watermark_image(doc, img_xref: int) -> None:
			
 
				-    """
			
 
				-    将水印 Image XObject 的 RGB 流和 SMask 替换为全白/全不透明。
			
 
				-
			
 
				-    关键点：必须先移除 /DecodeParms（Predictor 11），再调用 update_stream。
			
 
				-    否则渲染器在 FlateDecode 之后还会尝试 Predictor 解码，失败后回退原始数据，
			
 
				-    水印依然可见。
			
 
				-    """
			
 
				-    obj_str = doc.xref_object(img_xref)
			
 
				-
			
 
				-    w_m = re.search(r'/Width\s+(\d+)', obj_str)
			
 
				-    h_m = re.search(r'/Height\s+(\d+)', obj_str)
			
 
				-    w = int(w_m.group(1)) if w_m else 1
			
 
				-    h = int(h_m.group(1)) if h_m else 1
			
 
				-    cs_m = re.search(r'/ColorSpace\s+/Device(RGB|Gray|CMYK)', obj_str)
			
 
				-    channels = {'RGB': 3, 'CMYK': 4}.get(cs_m.group(1) if cs_m else '', 1)
			
 
				-
			
 
				-    doc.xref_set_key(img_xref, "DecodeParms", "null")
			
 
				-    doc.update_stream(img_xref, bytes([255]) * (w * h * channels))
			
 
				-
			
 
				-    smask_m = re.search(r'/SMask\s+(\d+)\s+0\s+R', obj_str)
			
 
				-    if smask_m:
			
 
				-        smask_xref = int(smask_m.group(1))
			
 
				-        smask_obj = doc.xref_object(smask_xref)
			
 
				-        sw = int(m.group(1)) if (m := re.search(r'/Width\s+(\d+)', smask_obj)) else w
			
 
				-        sh = int(m.group(1)) if (m := re.search(r'/Height\s+(\d+)', smask_obj)) else h
			
 
				-        doc.xref_set_key(smask_xref, "DecodeParms", "null")
			
 
				-        doc.update_stream(smask_xref, bytes([255]) * (sw * sh))
			
 
				-
			
 
				-
			
 
				-def scan_pdf_watermark_xobjs(pdf_bytes: bytes, sample_pages: int = 3) -> bool:
			
 
				-    """
			
 
				-    快速扫描 PDF 前 N 页，判断是否含水印 XObject。
			
 
				-
			
 
				-    无副作用（只读），用于在执行去水印前快速判断，避免对无水印的大文件
			
 
				-    执行全量扫描和序列化，显著降低财报等大文件的处理开销。
			
 
				-
			
 
				-    Args:
			
 
				-        pdf_bytes: PDF 文件的原始字节。
			
 
				-        sample_pages: 扫描页数上限，默认 3（银行流水通常前几页有水印）。
			
 
				-
			
 
				-    Returns:
			
 
				-        True 表示发现水印 XObject，False 表示未发现。
			
 
				-    """
			
 
				-    try:
			
 
				-        import fitz
			
 
				-    except ImportError:
			
 
				-        return False
			
 
				-
			
 
				-    doc = fitz.open(stream=pdf_bytes, filetype="pdf")
			
 
				-    pages_to_check = min(sample_pages, len(doc))
			
 
				-    try:
			
 
				-        for i in range(pages_to_check):
			
 
				-            page = doc[i]
			
 
				-            for xref, *_ in page.get_xobjects():
			
 
				-                try:
			
 
				-                    obj_str = doc.xref_object(xref)
			
 
				-                except Exception:
			
 
				-                    continue
			
 
				-                if _is_watermark_xobj(doc, xref, obj_str):
			
 
				-                    return True
			
 
				-            for img_tuple in page.get_images(full=True):
			
 
				-                try:
			
 
				-                    obj_str = doc.xref_object(img_tuple[0])
			
 
				-                except Exception:
			
 
				-                    continue
			
 
				-                if _is_watermark_image_xobj(doc, img_tuple[0], obj_str):
			
 
				-                    return True
			
 
				-    finally:
			
 
				-        doc.close()
			
 
				-    return False
			
 
				-
			
 
				-
			
 
				-def remove_txt_pdf_watermark(pdf_bytes: bytes) -> Optional[bytes]:
			
 
				-    """
			
 
				-    对文字型 PDF 执行原生水印去除，完全在内存中完成，不写临时文件。
			
 
				-
			
 
				-    支持两种水印形式：
			
 
				-    - Form XObject 水印：清空内容流
			
 
				-    - Image XObject 水印（全页背景图 + SMask 透明通道）：替换为全白像素
			
 
				-
			
 
				-    适用场景：pdf_type='txt' 的 PDF，去除后可直接传给渲染层（tobytes() → bytes）。
			
 
				-    对于大文件（如财报），建议先用 scan_pdf_watermark_xobjs() 快速判断再调用本函数。
			
 
				-
			
 
				-    Args:
			
 
				-        pdf_bytes: 原始 PDF 的字节内容。
			
 
				-
			
 
				-    Returns:
			
 
				-        去除水印后的 PDF bytes（garbage=4 压缩）；若未发现水印返回 None。
			
 
				-    """
			
 
				-    try:
			
 
				-        import fitz
			
 
				-    except ImportError:
			
 
				-        raise ImportError("请安装 PyMuPDF: pip install PyMuPDF")
			
 
				-
			
 
				-    from loguru import logger
			
 
				-
			
 
				-    doc = fitz.open(stream=pdf_bytes, filetype="pdf")
			
 
				-    processed_xrefs: set[int] = set()
			
 
				-    total_removed = 0
			
 
				-
			
 
				-    for page in doc:
			
 
				-        # ── Form XObject 水印 ─────────────────────────────────────────
			
 
				-        for xref, name, _invoker, _unused in page.get_xobjects():
			
 
				-            if xref in processed_xrefs:
			
 
				-                continue
			
 
				-            try:
			
 
				-                obj_str = doc.xref_object(xref)
			
 
				-            except Exception:
			
 
				-                continue
			
 
				-            if _is_watermark_xobj(doc, xref, obj_str):
			
 
				-                try:
			
 
				-                    doc.update_stream(xref, b"")
			
 
				-                    processed_xrefs.add(xref)
			
 
				-                    total_removed += 1
			
 
				-                    logger.debug(f"  [Form XObject] 清空水印 xref={xref}, name={name}")
			
 
				-                except Exception as e:
			
 
				-                    logger.warning(f"  清空 Form XObject xref={xref} 失败: {e}")
			
 
				-
			
 
				-        # ── Image XObject 水印 ────────────────────────────────────────
			
 
				-        for img_tuple in page.get_images(full=True):
			
 
				-            img_xref = img_tuple[0]
			
 
				-            if img_xref in processed_xrefs:
			
 
				-                continue
			
 
				-            try:
			
 
				-                obj_str = doc.xref_object(img_xref)
			
 
				-            except Exception:
			
 
				-                continue
			
 
				-            if _is_watermark_image_xobj(doc, img_xref, obj_str):
			
 
				-                _blank_watermark_image(doc, img_xref)
			
 
				-                processed_xrefs.add(img_xref)
			
 
				-                total_removed += 1
			
 
				-                logger.debug(f"  [Image XObject] 替换水印图像 xref={img_xref}")
			
 
				-
			
 
				-    if total_removed == 0:
			
 
				-        doc.close()
			
 
				-        return None
			
 
				-
			
 
				-    result = doc.tobytes(garbage=4, deflate=True)
			
 
				-    doc.close()
			
 
				-    logger.info(f"✅ PDF 层级水印去除：共清除 {total_removed} 个水印 XObject")
			
 
				-    return result
			
 
				+from ocr_utils.watermark.algorithms import (
			
 
				+    build_watermark_mask,
			
 
				+    detect_watermark,
			
 
				+    remove_watermark_masked_adaptive,
			
 
				+    render_ratio_heatmap,
			
 
				+    save_watermark_mask_debug_layers,
			
 
				+)
			
 
				+from ocr_utils.watermark.contrast import (
			
 
				+    apply_contrast_enhancement_config,
			
 
				+    enhance_document_contrast,
			
 
				+)
			
 
				+from ocr_utils.watermark.debug import save_watermark_removal_debug
			
 
				+from ocr_utils.watermark.pdf import (
			
 
				+    remove_txt_pdf_watermark,
			
 
				+    scan_pdf_watermark_xobjs,
			
 
				+)
			
 
				+from ocr_utils.watermark.removal import (
			
 
				+    remove_watermark_from_image,
			
 
				+    remove_watermark_from_image_rgb,
			
 
				+    render_watermark_mask_overlay,
			
 
				+)
			
 
				+
			
 
				+__all__ = [
			
 
				+    "apply_contrast_enhancement_config",
			
 
				+    "build_watermark_mask",
			
 
				+    "detect_watermark",
			
 
				+    "enhance_document_contrast",
			
 
				+    "remove_txt_pdf_watermark",
			
 
				+    "remove_watermark_from_image",
			
 
				+    "remove_watermark_from_image_rgb",
			
 
				+    "remove_watermark_masked_adaptive",
			
 
				+    "render_ratio_heatmap",
			
 
				+    "render_watermark_mask_overlay",
			
 
				+    "save_watermark_mask_debug_layers",
			
 
				+    "save_watermark_removal_debug",
			
 
				+    "scan_pdf_watermark_xobjs",
			
 
				+]