瀏覽代碼

feat(重构实验模块与单元格预处理功能): 删除cell_sweep.py,新增cell_preprocess_lab.py和cell_sweep.py,整合单元格裁剪图预处理与参数扫描功能,优化水印去除、对比度增强及放大处理,提升OCR处理的准确性与灵活性。同时新增实验模块集合与README文档,明确各子模块功能与用法。

zhch158_admin 2 天之前
父節點
當前提交
9780104eed

+ 0 - 554
ocr_tools/cell_preprocess_lab/cell_sweep.py

@@ -1,554 +0,0 @@
-#!/usr/bin/env python3
-"""
-单元格裁剪图预处理参数扫描:去水印 / threshold / contrast / upscale / det 阈值 / OCR 模式。
-
-默认从 **原图**(`*_raw.png`)出发,与 pipeline 二次 OCR 一致,避免对已预处理 debug 图二次去水印。
-
-用法:
-    python cell_sweep.py cell219_empty_empty_raw.png -o ./out -t "ATM存折取款"
-    python cell_sweep.py /path/to/tablecell_ocr/ -o ./out
-    python cell_sweep.py cell.png --quick --no-save-images
-    OCR_DET_MODEL_PATH=... OCR_REC_MODEL_PATH=... python cell_sweep.py cell.png
-"""
-from __future__ import annotations
-
-import argparse
-import json
-import os
-import sys
-from itertools import product
-from pathlib import Path
-from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple
-
-import cv2
-import numpy as np
-
-_repo_root = Path(__file__).resolve().parents[2]
-if str(_repo_root) not in sys.path:
-    sys.path.insert(0, str(_repo_root))
-
-from ocr_utils.watermark import WatermarkProcessor, merge_watermark_config
-from ocr_utils.watermark.contrast import apply_contrast_enhancement_config
-
-_IMAGE_SUFFIXES = {".png", ".jpg", ".jpeg", ".bmp", ".tif", ".tiff", ".webp"}
-_DEFAULT_MODEL_DIR = Path(
-    "/Users/zhch158/models/modelscope_cache/models/OpenDataLab/"
-    "PDF-Extract-Kit-1___0/models/OCR/paddleocr_torch"
-)
-
-
-def _parse_csv_ints(s: str) -> List[Optional[int]]:
-    out: List[Optional[int]] = []
-    for part in s.split(","):
-        part = part.strip()
-        if not part or part.lower() in ("none", "d", "default"):
-            out.append(None)
-        else:
-            out.append(int(part))
-    return out
-
-
-def _parse_csv_floats(s: str) -> List[float]:
-    return [float(x.strip()) for x in s.split(",") if x.strip()]
-
-
-def _parse_csv_bools(s: str) -> List[bool]:
-    out: List[bool] = []
-    for part in s.split(","):
-        p = part.strip().lower()
-        if p in ("1", "true", "yes", "on"):
-            out.append(True)
-        elif p in ("0", "false", "no", "off"):
-            out.append(False)
-        else:
-            raise ValueError(f"无效的 bool 值: {part!r}")
-    return out
-
-
-def _default_model_dir() -> Path:
-    det = os.environ.get("OCR_DET_MODEL_PATH")
-    if det:
-        return Path(det).parent
-    return _DEFAULT_MODEL_DIR
-
-
-def _upscale(img: np.ndarray, min_side: int) -> np.ndarray:
-    h, w = img.shape[:2]
-    if h >= min_side and w >= min_side:
-        return img
-    s = max(min_side / max(h, 1), min_side / max(w, 1), 1.0)
-    return cv2.resize(img, None, fx=s, fy=s, interpolation=cv2.INTER_CUBIC)
-
-
-def _preprocess(
-    raw: np.ndarray,
-    *,
-    method: str,
-    thresh: Optional[int],
-    contrast: bool,
-    upscale: int,
-    text_black_target: int,
-) -> np.ndarray:
-    user: Dict[str, Any] = {"enabled": True, "method": method}
-    if method == "threshold" and thresh is not None:
-        user["threshold"] = thresh
-    cfg = merge_watermark_config("cell", user)
-    img, _ = WatermarkProcessor(cfg, scope="cell").process(raw, force=True)
-    if contrast:
-        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
-        ce = dict(cfg.get("contrast_enhancement") or {})
-        ce["enabled"] = True
-        ce["text_black_target"] = text_black_target
-        gray = apply_contrast_enhancement_config(gray, ce)
-        img = cv2.cvtColor(gray, cv2.COLOR_GRAY2BGR)
-    return _upscale(img, upscale)
-
-
-def _parse_rec_pair(rec_part: Any) -> Tuple[str, float]:
-    """从 OCR 返回的 (text, score) 或嵌套结构中解析识别结果。"""
-    if rec_part is None:
-        return "", 0.0
-    if isinstance(rec_part, (list, tuple)) and len(rec_part) >= 2:
-        if isinstance(rec_part[0], (list, tuple, dict)):
-            return "", 0.0
-        txt = str(rec_part[0] or "").strip()
-        try:
-            sc = float(rec_part[1] or 0.0)
-        except (TypeError, ValueError):
-            sc = 0.0
-        return txt, sc if txt else 0.0
-    if isinstance(rec_part, (list, tuple)) and len(rec_part) == 1:
-        txt = str(rec_part[0] or "").strip()
-        return txt, 0.0
-    return "", 0.0
-
-
-def _aggregate_rec_score(boxes: List[Dict[str, Any]]) -> float:
-    """按字符数加权平均识别分(与 pipeline aggregate_line_ocr 一致)。"""
-    total_len = sum(len(b.get("text") or "") for b in boxes)
-    if total_len <= 0:
-        return 0.0
-    weighted = sum(
-        len(b.get("text") or "") * float(b.get("score") or 0.0) for b in boxes
-    )
-    return weighted / total_len
-
-
-def _ocr(engine: Any, img: np.ndarray, *, det: bool, rec: bool) -> Dict[str, Any]:
-    empty: Dict[str, Any] = {
-        "text": "",
-        "score": 0.0,
-        "boxes": [],
-        "det": det,
-        "rec": rec,
-        "n_boxes": 0,
-    }
-    try:
-        res = engine.ocr(img, det=det, rec=rec)
-        items = res[0] if res and res[0] is not None else []
-        boxes_out: List[Dict[str, Any]] = []
-
-        if det:
-            for item in items:
-                if not item or len(item) < 2:
-                    continue
-                text, score = _parse_rec_pair(item[1])
-                bbox = item[0]
-                if hasattr(bbox, "tolist"):
-                    bbox = bbox.tolist()
-                entry: Dict[str, Any] = {
-                    "text": text,
-                    "score": round(score, 6),
-                }
-                if bbox is not None:
-                    entry["det_bbox"] = bbox
-                boxes_out.append(entry)
-        else:
-            for item in items:
-                text, score = _parse_rec_pair(item)
-                if not text and isinstance(item, (list, tuple)) and len(item) >= 1:
-                    text, score = _parse_rec_pair(item[0])
-                boxes_out.append({"text": text, "score": round(score, 6)})
-
-        text = "".join(b["text"] for b in boxes_out if b.get("text")).strip()
-        agg_score = _aggregate_rec_score(boxes_out)
-        return {
-            "text": text,
-            "score": round(agg_score, 6),
-            "boxes": boxes_out,
-            "det": det,
-            "rec": rec,
-            "n_boxes": len(boxes_out),
-        }
-    except Exception as e:
-        out = dict(empty)
-        out["error"] = str(e)
-        return out
-
-
-def _make_engine(det_thresh: float, model_dir: Path) -> Any:
-    from ocr_tools.pytorch_models.pytorch_paddle import PytorchPaddleOCR
-
-    det_path = os.environ.get("OCR_DET_MODEL_PATH") or str(
-        model_dir / "ch_PP-OCRv5_det_infer.pth"
-    )
-    rec_path = os.environ.get("OCR_REC_MODEL_PATH") or str(
-        model_dir / "ch_PP-OCRv4_rec_server_doc_infer.pth"
-    )
-    return PytorchPaddleOCR(
-        lang="ch",
-        det_model_path=det_path,
-        rec_model_path=rec_path,
-        det_db_box_thresh=det_thresh,
-    )
-
-
-def resolve_input_image(path: Path, *, prefer_raw: bool) -> Path:
-    """优先使用与 pipeline debug 配套的 *_raw.png。"""
-    if not prefer_raw or path.stem.endswith("_raw"):
-        return path
-    raw_path = path.parent / f"{path.stem}_raw{path.suffix}"
-    if raw_path.is_file():
-        print(f"  使用原图: {raw_path.name}(跳过 {path.name})")
-        return raw_path
-    return path
-
-
-def collect_inputs(path: Path, *, prefer_raw: bool) -> List[Path]:
-    if path.is_file():
-        if path.suffix.lower() not in _IMAGE_SUFFIXES:
-            raise ValueError(f"不支持的图像格式: {path}")
-        return [resolve_input_image(path, prefer_raw=prefer_raw)]
-
-    if not path.is_dir():
-        raise FileNotFoundError(path)
-
-    all_images = sorted(
-        p
-        for p in path.iterdir()
-        if p.is_file() and p.suffix.lower() in _IMAGE_SUFFIXES
-    )
-    if not all_images:
-        raise FileNotFoundError(f"目录内无图像: {path}")
-
-    if prefer_raw:
-        raws = [p for p in all_images if p.stem.endswith("_raw")]
-        if raws:
-            return raws
-
-    chosen: List[Path] = []
-    for p in all_images:
-        if p.stem.endswith("_raw"):
-            continue
-        raw_sibling = p.parent / f"{p.stem}_raw{p.suffix}"
-        if prefer_raw and raw_sibling.is_file():
-            continue
-        chosen.append(p)
-    return chosen or all_images
-
-
-def _match_hit(text: str, target: Optional[str]) -> Optional[str]:
-    if not text:
-        return None
-    if not target:
-        return "nonempty"
-    if target in text:
-        return "full"
-    if len(target) >= 6 and target.isdigit() and len(text) >= 6 and text.isdigit():
-        return "partial"
-    return None
-
-
-def run_sweep(
-    input_path: Path,
-    out_dir: Path,
-    *,
-    prefer_raw: bool,
-    target: Optional[str],
-    model_dir: Path,
-    methods: Sequence[str],
-    thresholds: Sequence[Optional[int]],
-    contrasts: Sequence[bool],
-    upscales: Sequence[int],
-    det_threshs: Sequence[float],
-    text_black_target: int,
-    save_images: bool,
-    run_baseline: bool,
-    baseline_upscale: int,
-) -> Dict[str, Any]:
-    resolved = resolve_input_image(input_path, prefer_raw=prefer_raw)
-    raw = cv2.imread(str(resolved))
-    if raw is None:
-        raise RuntimeError(f"无法读取图像: {resolved}")
-
-    stem = resolved.stem.removesuffix("_raw") if resolved.stem.endswith("_raw") else resolved.stem
-    cell_out = out_dir / stem
-    cell_out.mkdir(parents=True, exist_ok=True)
-
-    ocr_modes: List[Tuple[str, bool, bool]] = [
-        ("det_rec", True, True),
-        ("whole_rec", False, True),
-    ]
-
-    results: List[Dict[str, Any]] = []
-    hits: List[Dict[str, Any]] = []
-    engines: Dict[float, Any] = {}
-    total = 0
-
-    for method, thresh, contrast, upscale, det_th in product(
-        methods, thresholds, contrasts, upscales, det_threshs
-    ):
-        if method != "threshold" and thresh is not None:
-            continue
-        if det_th not in engines:
-            print(f"  [{stem}] 加载 OCR det_db_box_thresh={det_th} ...")
-            engines[det_th] = _make_engine(det_th, model_dir)
-
-        img = _preprocess(
-            raw,
-            method=method,
-            thresh=thresh,
-            contrast=contrast,
-            upscale=upscale,
-            text_black_target=text_black_target,
-        )
-        tag = f"{method}_t{thresh or 'd'}_c{int(contrast)}_u{upscale}_det{det_th}"
-        if save_images:
-            cv2.imwrite(str(cell_out / f"{tag}.png"), img)
-
-        for mode_name, det, rec in ocr_modes:
-            total += 1
-            ocr = _ocr(engines[det_th], img, det=det, rec=rec)
-            row: Dict[str, Any] = {
-                "tag": tag,
-                "method": method,
-                "threshold": thresh,
-                "contrast": contrast,
-                "upscale": upscale,
-                "det_db_box_thresh": det_th,
-                "ocr_mode": mode_name,
-                **ocr,
-            }
-            results.append(row)
-            m = _match_hit(row.get("text", ""), target)
-            if m:
-                row["match"] = m
-                hits.append(row)
-                print(
-                    f"  HIT [{m}] {mode_name} {tag} "
-                    f"score={row.get('score')} -> {row.get('text')!r}"
-                )
-
-    if run_baseline:
-        for det_th in det_threshs:
-            if det_th not in engines:
-                engines[det_th] = _make_engine(det_th, model_dir)
-            base_img = _upscale(raw, baseline_upscale)
-            if save_images:
-                cv2.imwrite(str(cell_out / f"baseline_upscale{baseline_upscale}.png"), base_img)
-            for mode_name, det, rec in ocr_modes:
-                ocr = _ocr(engines[det_th], base_img, det=det, rec=rec)
-                row = {
-                    "tag": f"baseline_upscale{baseline_upscale}",
-                    "det_db_box_thresh": det_th,
-                    "ocr_mode": mode_name,
-                    **ocr,
-                }
-                results.append(row)
-                m = _match_hit(row.get("text", ""), target)
-                if m:
-                    row["match"] = m
-                    hits.append(row)
-
-    report = {
-        "input": str(resolved),
-        "input_requested": str(input_path),
-        "output_dir": str(cell_out),
-        "target": target,
-        "total_trials": total,
-        "hits": hits,
-        "all_results": results,
-    }
-    report_path = cell_out / "sweep_report.json"
-    report_path.write_text(
-        json.dumps(report, ensure_ascii=False, indent=2), encoding="utf-8"
-    )
-    return report
-
-
-def _build_arg_parser() -> argparse.ArgumentParser:
-    p = argparse.ArgumentParser(
-        description="单元格图预处理 + OCR 参数网格扫描(对齐 pipeline 格级二次 OCR)",
-    )
-    p.add_argument(
-        "input",
-        type=Path,
-        help="单元格裁剪图路径,或 tablecell_ocr 目录(批量扫描)",
-    )
-    p.add_argument(
-        "-o",
-        "--output",
-        type=Path,
-        default=None,
-        help="输出目录,默认 <input_dir|input_parent>/sweep_out/<stem>",
-    )
-    p.add_argument(
-        "-t",
-        "--target",
-        default=None,
-        help="期望 OCR 文本;用于标记 HIT(子串匹配)。省略则任意非空为 HIT",
-    )
-    p.add_argument(
-        "--model-dir",
-        type=Path,
-        default=None,
-        help="PaddleOCR torch 模型目录(含 det/rec .pth),也可用 OCR_*_MODEL_PATH",
-    )
-    p.add_argument(
-        "--no-prefer-raw",
-        action="store_true",
-        help="不自动选用同名的 *_raw.png",
-    )
-    p.add_argument(
-        "--quick",
-        action="store_true",
-        help="缩小网格(threshold 170,175 × upscale 128,192 × det 0.3,0.5)",
-    )
-    p.add_argument(
-        "--methods",
-        default="threshold,masked_adaptive",
-        help="去水印方式,逗号分隔",
-    )
-    p.add_argument(
-        "--thresholds",
-        default="155,165,170,175,180,none",
-        help="threshold 法的阈值;none=预设默认",
-    )
-    p.add_argument(
-        "--contrasts",
-        default="false,true",
-        help="是否 contrast,逗号分隔 false,true",
-    )
-    p.add_argument(
-        "--upscales",
-        default="64,96,128,192",
-        help="最短边放大目标,逗号分隔整数",
-    )
-    p.add_argument(
-        "--det-threshs",
-        default="0.2,0.3,0.4,0.5",
-        help="det_db_box_thresh,逗号分隔",
-    )
-    p.add_argument(
-        "--text-black-target",
-        type=int,
-        default=88,
-        help="contrast text_restore 目标黑度",
-    )
-    p.add_argument(
-        "--no-save-images",
-        action="store_true",
-        help="不写出中间预处理 png(仅报告)",
-    )
-    p.add_argument(
-        "--no-baseline",
-        action="store_true",
-        help="跳过「仅放大、不去水印」对照组",
-    )
-    p.add_argument(
-        "--baseline-upscale",
-        type=int,
-        default=128,
-        help="baseline 对照组的最短边放大",
-    )
-    return p
-
-
-def main(argv: Optional[Sequence[str]] = None) -> None:
-    args = _build_arg_parser().parse_args(argv)
-    inputs = collect_inputs(args.input, prefer_raw=not args.no_prefer_raw)
-    if not inputs:
-        raise SystemExit("未找到可扫描的图像")
-
-    if args.output is not None:
-        out_root = args.output
-    elif args.input.is_file():
-        out_root = args.input.parent / "sweep_out"
-    else:
-        out_root = args.input / "sweep_out"
-    out_root.mkdir(parents=True, exist_ok=True)
-
-    model_dir = args.model_dir or _default_model_dir()
-    methods = [m.strip() for m in args.methods.split(",") if m.strip()]
-
-    if args.quick:
-        thresholds = [170, 175]
-        upscales = [128, 192]
-        det_threshs = [0.3, 0.5]
-        contrasts = [False, True]
-    else:
-        thresholds = _parse_csv_ints(args.thresholds)
-        upscales = [int(x) for x in args.upscales.split(",") if x.strip()]
-        det_threshs = _parse_csv_floats(args.det_threshs)
-        contrasts = _parse_csv_bools(args.contrasts)
-
-    print(f"扫描 {len(inputs)} 张图 -> {out_root}")
-    print(f"  methods={methods} thresholds={thresholds} upscales={upscales}")
-    if args.target:
-        print(f"  target={args.target!r}")
-
-    summary: List[Dict[str, Any]] = []
-    for img_path in inputs:
-        print(f"\n=== {img_path.name} ===")
-        report = run_sweep(
-            img_path,
-            out_root,
-            prefer_raw=not args.no_prefer_raw,
-            target=args.target,
-            model_dir=model_dir,
-            methods=methods,
-            thresholds=thresholds,
-            contrasts=contrasts,
-            upscales=upscales,
-            det_threshs=det_threshs,
-            text_black_target=args.text_black_target,
-            save_images=not args.no_save_images,
-            run_baseline=not args.no_baseline,
-            baseline_upscale=args.baseline_upscale,
-        )
-        summary.append(
-            {
-                "input": report["input"],
-                "hits": len(report["hits"]),
-                "report": str(Path(report["output_dir"]) / "sweep_report.json"),
-            }
-        )
-
-    index_path = out_root / "sweep_index.json"
-    index_path.write_text(
-        json.dumps(summary, ensure_ascii=False, indent=2), encoding="utf-8"
-    )
-    print(f"\n全部完成,索引: {index_path}")
-    for s in summary:
-        print(f"  {s['input']}: {s['hits']} hits -> {s['report']}")
-
-
-if __name__ == "__main__":
-    if len(sys.argv) == 1:
-        print("ℹ️  未提供命令行参数,使用默认配置运行...")
-        default_config = {
-            "input": "/Users/zhch158/workspace/data/流水分析/彭_广东兴宁农村商业银行/bank_statement_yusys_local/debug/table_recognition_wired/tablecell_ocr/彭_广东兴宁农村商业银行_page_002_0/cell219_empty_empty_raw.png",
-            "output": "./output/彭_广东兴宁农村商业银行/cell219_sweep",
-            "target": "ATM存折取款",
-        }
-        sys.argv = [sys.argv[0], default_config["input"]]
-        for key, value in default_config.items():
-            if key == "input":
-                continue
-            flag = f"--{key.replace('_', '-')}"
-            if isinstance(value, bool) and value:
-                sys.argv.append(flag)
-            elif not isinstance(value, bool):
-                sys.argv.extend([flag, str(value)])
-
-    sys.exit(main())

+ 465 - 0
ocr_tools/lab/README.md

@@ -0,0 +1,465 @@
+# 🧪 实验 Lab
+
+水印去除/对比度增强/单元格预处理实验模块,统一归集到 `ocr_tools/lab/` 目录下。
+包含 LaMa GAN inpainting、掩膜参数扫描、对比度增强扫描、单元格预处理 OCR 扫描。
+
+## Models
+- [LaMa-Inpainting](https://github.com/advimman/lama.git)
+- [BrushNet](https://github.com/TencentARC/BrushNet)
+
+## 模型下载地址:https://drive.google.com/drive/folders/1B2x7eQDgecTL0oh3LSIBDGj0fTxs6Ips?usp=sharing
+
+---
+
+## LaMa 本地权重调用方式(不下载)
+
+当前脚本已支持直接读取本地权重文件,不再依赖运行时下载。
+
+- 默认权重路径:`/Users/zhch158/models/big-lama/models/best.ckpt`
+- 默认会自动推断配置文件:`/Users/zhch158/models/big-lama/config.yaml`
+
+### 运行方式
+
+先激活环境:
+
+```bash
+conda activate mineru
+```
+
+执行评估(使用默认本地权重):
+
+```bash
+cd ocr_platform/ocr_tools/lab/gan_experiments_lab
+python evaluate.py
+```
+
+或使用非交互方式:
+
+```bash
+conda run -n mineru python ocr_tools/lab/gan_experiments_lab/evaluate.py
+```
+
+### 可选参数
+
+- `--lama-ckpt`:指定 `best.ckpt` 路径
+- `--lama-config`:指定 `config.yaml` 路径(默认按 ckpt 自动推断)
+- `--lama-repo`:指定 LaMa 仓库路径(用于导入 `saicinpainting`;默认自动探测)
+
+示例:
+
+```bash
+conda run -n mineru python ocr_tools/lab/gan_experiments_lab/evaluate.py \
+  --lama-ckpt /Users/zhch158/models/big-lama/models/best.ckpt \
+  --lama-config /Users/zhch158/models/big-lama/config.yaml \
+  --lama-repo /path/to/lama
+
+conda run -n mineru python ocr_tools/lab/gan_experiments_lab/evaluate.py \
+  --lama-repo /Users/zhch158/workspace/repository.git/lama
+```
+
+---
+
+## 水印去除原理
+
+LaMa、BrushNet 等 GAN inpainting 模型需要一张 **水印位置掩码图(watermark mask)**
+作为输入,模型在 mask 标出的区域进行修复(inpainting)。
+
+对于银行流水等文档,流程为:
+
+```
+原图 → 水印检测 (build_watermark_mask) → 掩码图 → LaMa/BrushNet 修复 → 去水印结果
+```
+
+### 关键难点
+
+- **浅色斜向水印**与正文笔划在灰度上重叠,传统图像学方法难以精确分离
+- 掩码过大 → 伤到正文;掩码过小 → 水印去不干净
+- 水印盖在正文上时,GAN 修复可能改变底层数字/文字
+
+---
+
+## 水印掩膜实验(watermark_lab)
+
+针对银行流水等场景的水印 mask 生成参数扫描,支持三种策略:
+
+| 策略 | 说明 | 适用场景 |
+|---|---|---|
+| `light_on_white` | Hough 斜向检测 + 浅色带提取 | 斜向文字水印(默认) |
+| `diagonal_midtone` | 中间调 + 斜向形态学 | 通用水印 |
+| `fused` | 以上两种 + 背景差异残差 OR 融合 | 多类型水印混合 |
+
+### 快速扫描
+
+```bash
+cd ocr_platform
+conda run -n mineru python ocr_tools/lab/watermark_lab/watermark_sweep.py \
+  ocr_tools/lab/gan_experiments_lab/test_images/input/彭_广东兴宁农村商业银行_page_002.png \
+  --quick
+```
+
+### 批量扫描
+
+```bash
+conda run -n mineru python ocr_tools/lab/watermark_lab/watermark_sweep.py \
+  ocr_tools/lab/gan_experiments_lab/test_images/input/ \
+  --lama-ckpt /Users/zhch158/models/big-lama/models/best.ckpt
+```
+
+### 输出产物
+
+```
+output/<图片名>/
+├── sweep_report.json             # 扫描结果汇总(含各策略 ratio、排序)
+├── sweep_summary.csv              # CSV 汇总表
+├── light_on_white_*.png           # light_on_white 策略各参数 overlay 图
+├── fused_*.png                    # 融合策略各参数 overlay 图
+├── diagonal_midtone_*.png         # diagonal_midtone 策略 overlay 图
+└── *_inpainted.png                # (可选)LaMa 修复结果图
+```
+
+### 实验结果(银行流水样例)
+
+**测试图**:`彭_广东兴宁农村商业银行_page_002.png`(带斜向半透明水印)
+
+| 策略 | direction_filter | mask 覆盖率 | 说明 |
+|---|---|---|---|
+| `light_on_white` | `hough` | **10.8%** | Hough 斜向检测决定几何区域,是主力策略 |
+| `light_on_white` | `none` | **0%** | 无几何约束时 mask 为空 |
+| `diagonal_midtone` | — | **3.2%** | 中间调+斜向形态学,补充覆盖 |
+| `background_diff` | — | **0.5~0.7%** | 中值滤波背景差异残差,贡献极小 |
+| **fused** | `hough` | **11.3~11.5%** | 三策略 OR 融合,比纯 light_on_white 多 ~0.5% |
+
+**结论**:对于银行流水斜向水印,`light_on_white + direction_filter=hough` 是主力策略。
+`fused` 融合在基础之上加入 `diagonal_midtone` 的 3.2% 补充,使整体 mask 增加到 11.3~11.5%。
+但水印与正文重叠区域仍无法通过纯图像学方法精确分离。
+
+---
+
+## 对比度增强实验(contrast_sweep)
+
+**核心思路**:不去水印,直接对原图做多种对比度增强,让水印在视觉上"淡化",
+正文保持清晰,使后续 OCR 不受水印干扰。
+
+支持四种增强方法:
+
+| 方法 | 说明 | 关键参数 |
+|---|---|---|
+| `text_restore` | 仅非背景像素动态范围压缩 | `text_black_target`(40~120), `background_threshold`(235~252) |
+| `clahe` | 自适应直方图均衡(局部) | `clip_limit`(0.5~8.0), `tile_grid_size`(4~32) |
+| `gamma` | Gamma 校正(<1 加深文字) | `gamma`(0.3~0.9) |
+| `linear` | 分位线性拉伸 | `black_percentile`(1~8), `white_percentile`(92~98) |
+
+### 快速扫描
+
+```bash
+cd ocr_platform
+conda run -n mineru python ocr_tools/lab/watermark_lab/contrast_sweep.py \
+  ocr_tools/lab/gan_experiments_lab/test_images/input/彭_广东兴宁农村商业银行_page_002.png \
+  --quick
+```
+
+### 全量扫描 + OCR 验证
+
+```bash
+conda run -n mineru python ocr_tools/lab/watermark_lab/contrast_sweep.py \
+  ocr_tools/lab/gan_experiments_lab/test_images/input/ \
+  --ocr --model-dir /path/to/paddleocr_models
+```
+
+### 输出产物
+
+```
+output/<图片名>/
+├── contrast_report.json           # 扫描结果汇总(含 fade/sharpness/combined 评分)
+├── contrast_summary.csv            # CSV 汇总表
+├── quad_compare.png               # 四宫格对比图(原图 + 各方法最优)
+├── clahe_cl*.png                  # CLAHE 各参数增强结果
+├── text_restore_t*.png            # text_restore 各参数结果
+├── gamma_g*.png                   # Gamma 各参数结果
+└── linear_b*.png                  # Linear 各参数结果
+```
+
+### 评分指标
+
+| 指标 | 含义 | 越大越好 |
+|---|---|---|
+| `fade_score` | 水印淡化程度:增强前后背景残差方差变化率 | 正数=水印变淡 |
+| `sharpness_score` | 文字清晰度:局部标准差均值 | ✓ |
+| `combined_score` | 综合分 = fade×0.5 + sharpness(归一化)×0.5 | ✓ |
+
+### 实验结果(银行流水样例)
+
+**测试图**:`彭_广东兴宁农村商业银行_page_002.png`(带斜向半透明水印,20 组参数)
+
+| 排名 | 方法 | 参数 | fade | sharpness | combined |
+|---|---|---|---|---|---|
+| **1** | **clahe** | clip=1.0, tile=16 | 0.009 | 13.42 | **0.504** |
+| 2 | clahe | clip=0.5, tile=8 | 0.009 | 13.40 | 0.504 |
+| 3 | gamma | g=0.85 | -0.253 | 20.49 | 0.499 |
+| 4 | linear | bp=2.0, wp=95.0 | -0.163 | 17.06 | 0.426 |
+| 5 | text_restore | t=85, bg=240 | -0.769 | 17.09 | 0.116 |
+
+**关键发现**:
+
+1. **CLAHE(clip_limit=1.0, tile_grid_size=16)是最优方案**,水印被局部均衡化"摊平"几乎消失,文字保持清晰
+2. `text_restore` 对银行流水有害(fade 为负):它把水印像素当成"要恢复的文字"一并压深,水印反而更显眼
+3. `gamma=0.85` 作为备选简单有效,整体对比度提升但水印淡化程度不如 CLAHE
+4. CLAHE 仅需 **2.7ms**,比 text_restore 快 4 倍
+
+**推荐生产配置**:
+
+```python
+from ocr_utils.watermark.contrast import enhance_document_contrast
+
+enhanced = enhance_document_contrast(
+    gray,
+    method="clahe",
+    clip_limit=1.0,      # 不要过大,否则水印也被增强
+    tile_grid_size=16,   # 大块更平滑,水印淡化更均匀
+)
+```
+
+**结论:对银行流水类文档,CLAHE 对比度增强替代去水印流程更干净、更快、更简单。**
+
+---
+
+## 目录结构
+
+```
+ocr_tools/lab/
+│
+├── README.md                     # 本文件
+│
+├── gan_experiments_lab/          # LaMa GAN inpainting 评估
+│   ├── evaluate.py               #   去水印评估(baseline vs GAN)
+│   ├── lama_inpaint.py           #   LaMa 推理封装
+│   ├── watermark_synthesis.py    #   水印合成脚本
+│   ├── test_images/              #   测试图片
+│   │   ├── input/                #     原图(带水印)
+│   │   └── clean/                #     无印参考图(用于 PSNR/SSIM)
+│   └── output/                   #   评估输出
+│       ├── compare/              #     三联对比图
+│       ├── inpainted/            #     修复结果
+│       ├── mask_debug/           #     掩膜可视化
+│       └── metrics/              #     评估指标 JSON
+│
+├── watermark_lab/                # 水印实验(掩膜扫描 + 对比度扫描)
+│   ├── fused_mask.py             #   三策略融合水印掩膜
+│   ├── watermark_sweep.py        #   掩膜参数网格扫描
+│   ├── contrast_sweep.py         #   对比度增强参数网格扫描
+│   └── output/                   #   实验输出
+│
+└── cell_preprocess_lab/          # 单元格预处理实验
+    ├── cell_preprocess_lab.py    #   单元格预处理主脚本
+    ├── cell_sweep.py             #   单元格预处理 + OCR 参数扫描
+    └── output/                   #   实验输出
+```
+
+**生产配置对齐**:`bank_statement_yusys_local.yaml` 的 `table_recognition.second_pass_ocr.cell_preprocess` 已与 cell_sweep 最优参数对齐(Pass1:`threshold=150` + CLAHE `cl=1.0, tile=8` + `upscale_min_side=128`;Pass2 `enhance_retry`:CLAHE `tile=4`)。
+
+---
+
+# 多种提高清晰度方法说明
+## CLAHE(自适应直方图均衡)
+
+### 原理
+
+CLAHE 是 AHE(自适应直方图均衡)的改进版。核心思想是:
+
+1. **分块**:把图像切成 `tile_grid_size × tile_grid_size` 像素的小块
+2. **局部均衡**:每个小块内独立做直方图均衡化——让局部灰度分布更均匀
+3. **对比度限制**:直方图统计时,"削平"过高的柱子(超过 `clip_limit` 的部分被裁掉并均匀分给所有灰度级),防止局部噪声被过度放大
+4. **双线性插值**:块边界处像素用相邻四个块的映射函数做双线性插值,消除块效应
+
+**为什么对水印有效**:水印是覆盖在全图上的均匀半透明层,在局部窗口(例如 16×16 像素)内,它表现为"整体灰度偏高一点点"而非"纹理"。CLAHE 的局部均衡化把每个窗口的灰度动态范围拉满后,水印的这种"整体偏移"就被摊平了,而文字作为局部高频信息被保留甚至增强。
+
+### 参数
+
+| 参数 | 含义 | 取值范围 | 效果 |
+|---|---|---|---|
+| `clip_limit` | 对比度剪切阈值 | 0.5~8.0(默认2.0) | **越小**→输出越均匀、噪声抑制越强、水印淡化越好;**越大**→局部对比度越强、细节突出但可能引入噪声 |
+| `tile_grid_size` | 分块大小(像素) | 4~32(默认8) | **越小**→更"局部"、细小文字更清晰;**越大**→更"全局"、更平滑、水印淡化更均匀 |
+
+在你实验的最优参数 `clip_limit=1.0, tile=16` 中:低 `clip` 强力压制了水印纹理想被放大的趋势,大 `tile` 让过渡更平滑。
+
+---
+
+## Gamma 校正
+
+### 原理
+
+对图像做幂律变换:
+
+```
+output = 255 × (input / 255)^(1/γ)
+```
+
+这是一个**像素级别的非线性映射**,查表(LUT)实现,速度快。
+
+```
+gamma < 1  →  中间调整体提亮(向右拉伸),深色笔划变浅 → 去水印后恢复用
+gamma > 1  →  中间调整体压暗(向左压缩),浅色水印可能变深
+
+灰度映射曲线(gamma < 1):
+  255|              ╱
+     |           ╱
+     |        ╱╱
+  0  |______╱
+     0    128    255
+```
+
+你们代码里的实现:
+
+```94:101:ocr_tools/lab/../../ocr_utils/watermark/contrast.py
+    if method == "gamma":
+        gamma = max(0.1, min(float(gamma), 3.0))
+        inv_gamma = 1.0 / gamma
+        table = np.array(
+            [((i / 255.0) ** inv_gamma) * 255 for i in range(256)],
+            dtype=np.uint8,
+        )
+        return cv2.LUT(gray, table)
+```
+
+### 参数
+
+| 参数 | 含义 | 你用过的范围 | 效果 |
+|---|---|---|---|
+| `gamma` | 校正指数 | 0.3~0.9(默认0.85) | **<1**→提亮中间调(深色变浅);**>1**→压暗中间调(浅色变深) |
+
+你的实验里 `gamma=0.85` 排第三(combined=0.499),因为提亮效果让文字轻微变浅,所以 sharpness 高但 fade 为负(水印反而更明显了)。
+
+---
+
+## Linear(分位线性拉伸)
+
+### 原理
+
+统计全图灰度的 `black_percentile` 分位值(如 2% 分位值=40)和 `white_percentile` 分位值(如 98% 分位值=245),然后把 `[p_low, p_high]` 这个区间线性映射到 `[0, 255]`:
+
+```
+output = (input - p_low) × 255 / (p_high - p_low)
+```
+
+代码:
+
+```103:109:ocr_tools/lab/../../ocr_utils/watermark/contrast.py
+    if method == "linear":
+        p_low = float(np.percentile(gray, black_percentile))
+        p_high = float(np.percentile(gray, white_percentile))
+        if p_high <= p_low + 1.0:
+            return gray
+        stretched = (gray.astype(np.float32) - p_low) * 255.0 / (p_high - p_low)
+        return np.clip(stretched, 0, 255).astype(np.uint8)
+```
+
+### 参数
+
+| 参数 | 含义 | 取值范围 | 效果 |
+|---|---|---|---|
+| `black_percentile` | 黑色锚点分位 | 1%~8%(默认2%) | **越大**→更多暗部被裁掉、整体更亮 |
+| `white_percentile` | 白色锚点分位 | 92%~98%(默认98%) | **越小**→更多亮部被裁掉、整体更暗 |
+
+**缺点**:这是全局操作,水印和正文同时被线性拉伸,水印不会单独被淡化。你的实验结果也印证了这点(fade 为负)。
+
+---
+
+## Text Restore(去水印后专用修复)
+
+### 原理
+
+专门为**去水印后文字变浅**的场景设计。核心逻辑:
+
+1. **背景保护**:`gray >= background_threshold`(默认 248)的白色背景像素**不动**,避免白底变灰
+2. **文字区域**:`gray < background_threshold` 的非背景像素做动态范围压缩
+3. **压缩公式**:取文字区域像素的 `[text_lo%, text_hi%]` 分位区间,线性映射到 `[0, text_black_target]`
+
+```40:42:ocr_tools/lab/../../ocr_utils/watermark/contrast.py
+    stretched = (vals - lo) * target / (hi - lo)
+    result[text_mask] = np.clip(stretched, 0, 255).astype(np.uint8)
+    return result
+```
+
+**对银行流水为什么有害**(fade=-0.769):水印的灰色像素(灰度 100~220)也落在 `gray < background_threshold` 范围内,被当成"需要恢复的文字"一起往深色(target=40~85)拉伸,结果水印反而变得更黑了。
+
+### 参数
+
+| 参数 | 含义 | 取值范围 | 效果 |
+|---|---|---|---|
+| `text_black_target` | 最深文字目标灰度 | 10~200(默认85) | **越小**→文字越黑、水印也跟着变黑 |
+| `background_threshold` | 背景判定阈值 | 200~255(默认248) | 大于等于此值的像素不动;**越低**→更多像素参与压缩 |
+| `text_lo_percentile` | 文字区下分位 | 1%~5%(默认1%) | 排除极暗的异常像素 |
+| `text_hi_percentile` | 文字区上分位 | 95%~99%(默认99%) | 排除极亮的异常像素 |
+
+---
+
+## 三个评测指标
+
+### fade_score(水印淡化程度)
+
+```150:168:ocr_tools/lab/watermark_lab/contrast_sweep.py
+def _compute_watermark_fade_score(
+    original: np.ndarray, enhanced: np.ndarray, window: int = 31
+) -> float:
+    o_f = original.astype(np.float32)
+    e_f = enhanced.astype(np.float32)
+    k = max(3, window) | 1
+
+    o_bg = cv2.medianBlur(o_f.astype(np.uint8), k).astype(np.float32)
+    e_bg = cv2.medianBlur(e_f.astype(np.uint8), k).astype(np.float32)
+
+    o_res = cv2.absdiff(o_f, o_bg)
+    e_res = cv2.absdiff(e_f, e_bg)
+
+    return float(1.0 - np.var(e_res) / max(np.var(o_res), 1.0))
+```
+
+**计算原理**:
+
+1. 用 31×31 大核中值滤波估计**背景**(抹掉文字,保留水印纹理和整体亮度)
+2. 计算残差 `|原图 - 背景|`:残差大 = 水印纹理明显(波动剧烈)
+3. 计算增强前后残差的方差比
+
+```
+fade = 1 - var(增强后的残差) / var(原始残差)
+```
+
+- **正值**(接近 1)→ 增强后残差方差变小很多 → 水印纹理解除了 → ✅
+- **接近 0** → 增强前后差不多
+- **负值** → 增强后残差反而更剧烈 → 水印被加强了 → ❌
+
+CLAHE 得到 0.009 的正值,说明水印纹理被轻微减弱。text_restore 得到 -0.769,说明它反而把水印纹理放大了。
+
+### sharpness_score(文字清晰度)
+
+```171:179:ocr_tools/lab/watermark_lab/contrast_sweep.py
+def _compute_text_sharpness_score(
+    enhanced: np.ndarray, win: int = 3
+) -> float:
+    """局部标准差均值,越大 = 文字越清晰。"""
+    e_f = enhanced.astype(np.float32)
+    kernel = np.ones((win, win), np.float32) / (win * win)
+    mean = cv2.filter2D(e_f, -1, kernel)
+    sq_mean = cv2.filter2D(e_f * e_f, -1, kernel)
+    var = np.maximum(sq_mean - mean * mean, 0)
+    return float(np.sqrt(var).mean())
+```
+
+**计算原理**:用 3×3 窗口计算每个像素的局部标准差(= 局部方差开根号),再取全图均值。
+
+- **高值**→ 像素之间的差异大 → 文字和背景分得开 → 文字清晰 ✅
+- **低值**→ 图像比较"平"→ 文字可能模糊
+
+CLAHE=13.42 的 sharpness 在正常范围内(不过不影响 OCR 识别)。注意这个值不能直接跨方法比较——gamma=20.49 是因为提亮让文字区像素差变大,而非 OCR 识别率更高。
+
+### combined_score(综合得分)
+
+```python
+combined = fade * 0.5 + (sharpness / max_sharpness) * 0.5
+```
+
+- `fade`:原始值(可能为负)
+- `sharpness`:归一化到 [0, 1](除以本轮扫描中最大的 sharpness)
+- 各占 50% 权重
+
+**设计意图**:在水印淡化和文字清晰度之间找平衡。实际使用中,如果你更关注水印去除效果,可以调整权重偏向 fade。

+ 7 - 0
ocr_tools/lab/__init__.py

@@ -0,0 +1,7 @@
+"""实验模块集合。
+
+子模块:
+- gan_experiments_lab: LaMa GAN inpainting 评估
+- watermark_lab:         水印掩膜 + 对比度增强扫描
+- cell_preprocess_lab:   单元格预处理 + OCR 参数扫描
+"""

+ 1 - 1
ocr_tools/cell_preprocess_lab/cell_preprocess_lab.py → ocr_tools/lab/cell_preprocess_lab/cell_preprocess_lab.py

@@ -25,7 +25,7 @@ import cv2
 import numpy as np
 import yaml
 
-_repo_root = Path(__file__).resolve().parents[2]
+_repo_root = Path(__file__).resolve().parents[3]
 _parser_root = _repo_root / "ocr_tools" / "universal_doc_parser"
 for _p in (_repo_root, _parser_root):
     if str(_p) not in sys.path:

+ 971 - 0
ocr_tools/lab/cell_preprocess_lab/cell_sweep.py

@@ -0,0 +1,971 @@
+#!/usr/bin/env python3
+"""
+单元格裁剪图预处理参数扫描:去水印 / contrast(clahe/gamma/linear/text_restore)/ upscale / det 阈值 / OCR 模式。
+
+支持 contrast 在放大前/后执行两种顺序对比。
+
+默认从 **原图**(`*_raw.png`)出发,与 pipeline 二次 OCR 一致,避免对已预处理 debug 图二次去水印。
+
+用法:
+    python cell_sweep.py cell219_empty_empty_raw.png -o ./out -t "ATM存折取款"
+    python cell_sweep.py /path/to/tablecell_ocr/ -o ./out
+    python cell_sweep.py cell.png --quick --no-save-images
+    python cell_sweep.py cell.png --contrast-orders before_upscale,after_upscale
+    OCR_DET_MODEL_PATH=... OCR_REC_MODEL_PATH=... python cell_sweep.py cell.png
+
+
+    # 统计出的最优参数 tag: threshold_t150_cl_1.0_8_ob_u128_det0.5
+    # 对目录下所有 *_raw.png 验证适配性
+    python cell_sweep.py /path/to/tablecell_ocr/ -o ./out --best-only
+
+    # 自定义最优参数
+    python cell_sweep.py /path/to/tablecell_ocr/ -o ./out --best-only \
+        --best-config threshold_t150_cl_1.0_8_ob_u128_det0.5
+
+    # 指定目标文字,自动统计 HIT 命中率
+    python cell_sweep.py /path/to/tablecell_ocr/ -o ./out --best-only -t "交易类型"
+"""
+from __future__ import annotations
+
+import argparse
+import json
+import os
+import sys
+from itertools import product
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Sequence, Tuple
+
+import cv2
+import numpy as np
+
+_repo_root = Path(__file__).resolve().parents[3]
+if str(_repo_root) not in sys.path:
+    sys.path.insert(0, str(_repo_root))
+
+from ocr_utils.watermark import WatermarkProcessor, merge_watermark_config
+from ocr_utils.watermark.contrast import enhance_document_contrast
+
+_IMAGE_SUFFIXES = {".png", ".jpg", ".jpeg", ".bmp", ".tif", ".tiff", ".webp"}
+_DEFAULT_MODEL_DIR = Path(
+    "/Users/zhch158/models/modelscope_cache/models/OpenDataLab/"
+    "PDF-Extract-Kit-1___0/models/OCR/paddleocr_torch"
+)
+
+
+def _parse_csv_ints(s: str) -> List[Optional[int]]:
+    out: List[Optional[int]] = []
+    for part in s.split(","):
+        part = part.strip()
+        if not part or part.lower() in ("none", "d", "default"):
+            out.append(None)
+        else:
+            out.append(int(part))
+    return out
+
+
+def _parse_csv_floats(s: str) -> List[float]:
+    return [float(x.strip()) for x in s.split(",") if x.strip()]
+
+
+def _parse_csv_bools(s: str) -> List[bool]:
+    out: List[bool] = []
+    for part in s.split(","):
+        p = part.strip().lower()
+        if p in ("1", "true", "yes", "on"):
+            out.append(True)
+        elif p in ("0", "false", "no", "off"):
+            out.append(False)
+        else:
+            raise ValueError(f"无效的 bool 值: {part!r}")
+    return out
+
+
+def _default_model_dir() -> Path:
+    det = os.environ.get("OCR_DET_MODEL_PATH")
+    if det:
+        return Path(det).parent
+    return _DEFAULT_MODEL_DIR
+
+
+def _upscale(img: np.ndarray, min_side: int) -> np.ndarray:
+    h, w = img.shape[:2]
+    if h >= min_side and w >= min_side:
+        return img
+    s = max(min_side / max(h, 1), min_side / max(w, 1), 1.0)
+    return cv2.resize(img, None, fx=s, fy=s, interpolation=cv2.INTER_CUBIC)
+
+
+# ── 对比度增强方法(clahe / gamma / linear / text_restore / none)──
+
+
+def _apply_contrast(
+    gray: np.ndarray,
+    *,
+    method: str,
+    clip_limit: float = 1.0,
+    tile_grid_size: int = 8,
+    gamma: float = 0.85,
+    black_percentile: float = 2.0,
+    white_percentile: float = 98.0,
+    text_black_target: int = 85,
+    background_threshold: int = 248,
+) -> np.ndarray:
+    """对灰度图应用对比度增强;method="none" 时原样返回。"""
+    if method == "none":
+        return gray
+    if method == "text_restore":
+        return enhance_document_contrast(
+            gray, method="text_restore",
+            text_black_target=text_black_target,
+            background_threshold=background_threshold,
+        )
+    if method == "clahe":
+        return enhance_document_contrast(
+            gray, method="clahe",
+            clip_limit=clip_limit, tile_grid_size=tile_grid_size,
+        )
+    if method == "gamma":
+        return enhance_document_contrast(gray, method="gamma", gamma=gamma)
+    if method == "linear":
+        return enhance_document_contrast(
+            gray, method="linear",
+            black_percentile=black_percentile,
+            white_percentile=white_percentile,
+        )
+    return gray
+
+
+def _contrast_tag(cfg: Dict[str, Any]) -> str:
+    """生成 contrast 配置的短标签。"""
+    m = cfg.get("method", "none")
+    if m == "none":
+        return "c0"
+    if m == "text_restore":
+        return f"tr_{cfg.get('text_black_target', 85)}"
+    if m == "clahe":
+        return f"cl_{cfg.get('clip_limit', 1.0)}_{cfg.get('tile_grid_size', 8)}"
+    if m == "gamma":
+        return f"gm_{cfg.get('gamma', 0.85)}"
+    if m == "linear":
+        return f"ln_{cfg.get('black_percentile', 2.0)}_{cfg.get('white_percentile', 98.0)}"
+    return m
+
+
+def _build_contrast_grid(quick: bool = False) -> List[Dict[str, Any]]:
+    """构建 contrast 参数网格(对齐 contrast_sweep.py 的设计)。
+
+    返回列表,每个元素是一个 Dict,至少包含 "method" 字段。
+    """
+    grid: List[Dict[str, Any]] = [{"method": "none"}]  # 对照组:不增强
+
+    # text_restore
+    if quick:
+        tbt = [60, 85]
+        bts = [240, 248]
+    else:
+        tbt = [60, 85, 100, 120]
+        bts = [240, 248, 252]
+    for target, bg_th in product(tbt, bts):
+        grid.append({"method": "text_restore", "text_black_target": target, "background_threshold": bg_th})
+
+    # clahe
+    if quick:
+        cl = [1.0, 2.0]
+        ts = [4, 8]
+    else:
+        cl = [0.5, 1.0, 2.0, 3.0, 5.0]
+        ts = [4, 8]
+    for clip, tile in product(cl, ts):
+        grid.append({"method": "clahe", "clip_limit": clip, "tile_grid_size": tile})
+
+    # # gamma
+    # if quick:
+    #     gvs = [0.5, 0.85]
+    # else:
+    #     gvs = [0.4, 0.55, 0.7, 0.85]
+    # for g in gvs:
+    #     grid.append({"method": "gamma", "gamma": g})
+
+    # # linear
+    # if quick:
+    #     bps = [2.0, 5.0]
+    #     wps = [95.0, 98.0]
+    # else:
+    #     bps = [2.0, 5.0, 8.0]
+    #     wps = [95.0, 98.0]
+    # for bp, wp in product(bps, wps):
+    #     grid.append({"method": "linear", "black_percentile": bp, "white_percentile": wp})
+
+    return grid
+
+
+def _preprocess(
+    raw: np.ndarray,
+    *,
+    method: str,
+    thresh: Optional[int],
+    contrast_cfg: Dict[str, Any],
+    upscale: int,
+    contrast_order: str = "before_upscale",
+) -> np.ndarray:
+    """预处理管线:去水印 → [contrast] → 放大(或去水印 → 放大 → contrast)。
+
+    method="none" 时跳过去水印,直接从原图开始处理。
+    """
+    if method == "none":
+        img = raw.copy()  # 不处理水印,直接使用原图
+    else:
+        user: Dict[str, Any] = {"enabled": True, "method": method}
+        if method == "threshold" and thresh is not None:
+            user["threshold"] = thresh
+        cfg = merge_watermark_config("cell", user)
+        img, _ = WatermarkProcessor(cfg, scope="cell").process(raw, force=True)
+
+    contrast_method = contrast_cfg.get("method", "none")
+    if contrast_method != "none" and contrast_order == "before_upscale":
+        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+        gray = _apply_contrast(gray, **contrast_cfg)
+        img = cv2.cvtColor(gray, cv2.COLOR_GRAY2BGR)
+
+    img = _upscale(img, upscale)
+
+    if contrast_method != "none" and contrast_order == "after_upscale":
+        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+        gray = _apply_contrast(gray, **contrast_cfg)
+        img = cv2.cvtColor(gray, cv2.COLOR_GRAY2BGR)
+
+    return img
+
+
+def _parse_rec_pair(rec_part: Any) -> Tuple[str, float]:
+    """从 OCR 返回的 (text, score) 或嵌套结构中解析识别结果。"""
+    if rec_part is None:
+        return "", 0.0
+    if isinstance(rec_part, (list, tuple)) and len(rec_part) >= 2:
+        if isinstance(rec_part[0], (list, tuple, dict)):
+            return "", 0.0
+        txt = str(rec_part[0] or "").strip()
+        try:
+            sc = float(rec_part[1] or 0.0)
+        except (TypeError, ValueError):
+            sc = 0.0
+        return txt, sc if txt else 0.0
+    if isinstance(rec_part, (list, tuple)) and len(rec_part) == 1:
+        txt = str(rec_part[0] or "").strip()
+        return txt, 0.0
+    return "", 0.0
+
+
+def _aggregate_rec_score(boxes: List[Dict[str, Any]]) -> float:
+    """按字符数加权平均识别分(与 pipeline aggregate_line_ocr 一致)。"""
+    total_len = sum(len(b.get("text") or "") for b in boxes)
+    if total_len <= 0:
+        return 0.0
+    weighted = sum(
+        len(b.get("text") or "") * float(b.get("score") or 0.0) for b in boxes
+    )
+    return weighted / total_len
+
+
+def _ocr(engine: Any, img: np.ndarray, *, det: bool, rec: bool) -> Dict[str, Any]:
+    empty: Dict[str, Any] = {
+        "text": "",
+        "score": 0.0,
+        "boxes": [],
+        "det": det,
+        "rec": rec,
+        "n_boxes": 0,
+    }
+    try:
+        res = engine.ocr(img, det=det, rec=rec)
+        items = res[0] if res and res[0] is not None else []
+        boxes_out: List[Dict[str, Any]] = []
+
+        if det:
+            for item in items:
+                if not item or len(item) < 2:
+                    continue
+                text, score = _parse_rec_pair(item[1])
+                bbox = item[0]
+                if hasattr(bbox, "tolist"):
+                    bbox = bbox.tolist()
+                entry: Dict[str, Any] = {
+                    "text": text,
+                    "score": round(score, 6),
+                }
+                if bbox is not None:
+                    entry["det_bbox"] = bbox
+                boxes_out.append(entry)
+        else:
+            for item in items:
+                text, score = _parse_rec_pair(item)
+                if not text and isinstance(item, (list, tuple)) and len(item) >= 1:
+                    text, score = _parse_rec_pair(item[0])
+                boxes_out.append({"text": text, "score": round(score, 6)})
+
+        text = "".join(b["text"] for b in boxes_out if b.get("text")).strip()
+        agg_score = _aggregate_rec_score(boxes_out)
+        return {
+            "text": text,
+            "score": round(agg_score, 6),
+            "boxes": boxes_out,
+            "det": det,
+            "rec": rec,
+            "n_boxes": len(boxes_out),
+        }
+    except Exception as e:
+        out = dict(empty)
+        out["error"] = str(e)
+        return out
+
+
+def _make_engine(det_thresh: float, model_dir: Path) -> Any:
+    from ocr_tools.pytorch_models.pytorch_paddle import PytorchPaddleOCR
+
+    det_path = os.environ.get("OCR_DET_MODEL_PATH") or str(
+        model_dir / "ch_PP-OCRv5_det_infer.pth"
+    )
+    rec_path = os.environ.get("OCR_REC_MODEL_PATH") or str(
+        model_dir / "ch_PP-OCRv4_rec_server_doc_infer.pth"
+    )
+    return PytorchPaddleOCR(
+        lang="ch",
+        det_model_path=det_path,
+        rec_model_path=rec_path,
+        det_db_box_thresh=det_thresh,
+    )
+
+
+def resolve_input_image(path: Path, *, prefer_raw: bool) -> Path:
+    """优先使用与 pipeline debug 配套的 *_raw.png。"""
+    if not prefer_raw or path.stem.endswith("_raw"):
+        return path
+    raw_path = path.parent / f"{path.stem}_raw{path.suffix}"
+    if raw_path.is_file():
+        print(f"  使用原图: {raw_path.name}(跳过 {path.name})")
+        return raw_path
+    return path
+
+
+def collect_inputs(path: Path, *, prefer_raw: bool) -> List[Path]:
+    if path.is_file():
+        if path.suffix.lower() not in _IMAGE_SUFFIXES:
+            raise ValueError(f"不支持的图像格式: {path}")
+        return [resolve_input_image(path, prefer_raw=prefer_raw)]
+
+    if not path.is_dir():
+        raise FileNotFoundError(path)
+
+    all_images = sorted(
+        p
+        for p in path.iterdir()
+        if p.is_file() and p.suffix.lower() in _IMAGE_SUFFIXES
+    )
+    if not all_images:
+        raise FileNotFoundError(f"目录内无图像: {path}")
+
+    if prefer_raw:
+        raws = [p for p in all_images if p.stem.endswith("_raw")]
+        if raws:
+            return raws
+
+    chosen: List[Path] = []
+    for p in all_images:
+        if p.stem.endswith("_raw"):
+            continue
+        raw_sibling = p.parent / f"{p.stem}_raw{p.suffix}"
+        if prefer_raw and raw_sibling.is_file():
+            continue
+        chosen.append(p)
+    return chosen or all_images
+
+
+def _match_hit(text: str, target: Optional[str]) -> Optional[str]:
+    if not text:
+        return None
+    if not target:
+        return "nonempty"
+    if target in text:
+        return "full"
+    if len(target) >= 6 and target.isdigit() and len(text) >= 6 and text.isdigit():
+        return "partial"
+    return None
+
+
+def run_sweep(
+    input_path: Path,
+    out_dir: Path,
+    *,
+    prefer_raw: bool,
+    target: Optional[str],
+    model_dir: Path,
+    methods: Sequence[str],
+    thresholds: Sequence[Optional[int]],
+    contrast_grid: List[Dict[str, Any]],
+    contrast_orders: Sequence[str],
+    upscales: Sequence[int],
+    det_threshs: Sequence[float],
+    save_images: bool,
+    run_baseline: bool,
+    baseline_upscale: int,
+) -> Dict[str, Any]:
+    resolved = resolve_input_image(input_path, prefer_raw=prefer_raw)
+    raw = cv2.imread(str(resolved))
+    if raw is None:
+        raise RuntimeError(f"无法读取图像: {resolved}")
+
+    stem = resolved.stem.removesuffix("_raw") if resolved.stem.endswith("_raw") else resolved.stem
+    cell_out = out_dir / stem
+    cell_out.mkdir(parents=True, exist_ok=True)
+
+    ocr_modes: List[Tuple[str, bool, bool]] = [
+        ("det_rec", True, True),
+        ("whole_rec", False, True),
+    ]
+
+    results: List[Dict[str, Any]] = []
+    hits: List[Dict[str, Any]] = []
+    engines: Dict[float, Any] = {}
+    total = 0
+
+    for method, thresh, contrast_cfg, c_order, upscale, det_th in product(
+        methods, thresholds, contrast_grid, contrast_orders, upscales, det_threshs
+    ):
+        # 过滤无效组合:非 threshold 方法不需要阈值
+        if method not in ("threshold",):
+            if thresh is not None:
+                continue
+        if det_th not in engines:
+            print(f"  [{stem}] 加载 OCR det_db_box_thresh={det_th} ...")
+            engines[det_th] = _make_engine(det_th, model_dir)
+
+        img = _preprocess(
+            raw,
+            method=method,
+            thresh=thresh,
+            contrast_cfg=contrast_cfg,
+            upscale=upscale,
+            contrast_order=c_order,
+        )
+        c_tag = _contrast_tag(contrast_cfg)
+        o_tag = "b" if c_order == "before_upscale" else "a"
+        tag = f"{method}_t{thresh or 'd'}_{c_tag}_o{o_tag}_u{upscale}_det{det_th}"
+        if save_images:
+            cv2.imwrite(str(cell_out / f"{tag}.png"), img)
+
+        for mode_name, det, rec in ocr_modes:
+            total += 1
+            ocr = _ocr(engines[det_th], img, det=det, rec=rec)
+            row: Dict[str, Any] = {
+                "tag": tag,
+                "method": method,
+                "threshold": thresh,
+                "contrast_method": contrast_cfg.get("method", "none"),
+                "contrast_order": c_order,
+                "contrast_cfg": contrast_cfg,
+                "upscale": upscale,
+                "det_db_box_thresh": det_th,
+                "ocr_mode": mode_name,
+                **ocr,
+            }
+            results.append(row)
+            m = _match_hit(row.get("text", ""), target)
+            if m:
+                row["match"] = m
+                hits.append(row)
+                print(
+                    f"  HIT [{m}] {mode_name} {tag} "
+                    f"score={row.get('score')} -> {row.get('text')!r}"
+                )
+
+    if run_baseline:
+        for det_th in det_threshs:
+            if det_th not in engines:
+                engines[det_th] = _make_engine(det_th, model_dir)
+            base_img = _upscale(raw, baseline_upscale)
+            if save_images:
+                cv2.imwrite(str(cell_out / f"baseline_upscale{baseline_upscale}.png"), base_img)
+            for mode_name, det, rec in ocr_modes:
+                ocr = _ocr(engines[det_th], base_img, det=det, rec=rec)
+                row = {
+                    "tag": f"baseline_upscale{baseline_upscale}",
+                    "det_db_box_thresh": det_th,
+                    "ocr_mode": mode_name,
+                    **ocr,
+                }
+                results.append(row)
+                m = _match_hit(row.get("text", ""), target)
+                if m:
+                    row["match"] = m
+                    hits.append(row)
+
+    report = {
+        "input": str(resolved),
+        "input_requested": str(input_path),
+        "output_dir": str(cell_out),
+        "target": target,
+        "total_trials": total,
+        "hits": hits,
+        "all_results": results,
+    }
+    report_path = cell_out / "sweep_report.json"
+    report_path.write_text(
+        json.dumps(report, ensure_ascii=False, indent=2), encoding="utf-8"
+    )
+
+    # ── 结论报告:按 OCR score 排序,分组对比 ──
+    _print_conclusions(stem, results, target)
+
+    return report
+
+
+def _print_conclusions(
+    stem: str,
+    results: List[Dict[str, Any]],
+    target: Optional[str],
+) -> None:
+    """打印实验结论:按 OCR score 排序,分组展示最优组合。"""
+    if not results:
+        return
+
+    print(f"\n{'='*70}")
+    print(f"  实验结论: {stem}")
+    if target:
+        print(f"  目标文字: {target}")
+    print(f"{'='*70}")
+
+    # 取 det_rec 模式的结果(优先用检测+识别完整结果)
+    dr_results = [r for r in results if r.get("ocr_mode") == "det_rec" and r.get("text")]
+    if not dr_results:
+        dr_results = [r for r in results if r.get("text")]
+
+    if not dr_results:
+        print("  (无有效 OCR 结果)")
+        return
+
+    # ── 1. 全局 Top-5 ──
+    scored = sorted(dr_results, key=lambda r: -(r.get("score") or 0))
+    print("\n  全局 OCR 得分 Top-5:")
+    for i, r in enumerate(scored[:5], 1):
+        print(f"    {i}. score={r.get('score', 0):.4f}  text={r.get('text', '')!r}")
+        print(f"       tag={r.get('tag', '')}")
+
+    # ── 2. 按 contrast 方法分组最佳 ──
+    print("\n  按 contrast 方法分组最优(score 最高):")
+    groups: Dict[str, List[Dict[str, Any]]] = {}
+    for r in scored:
+        cm = r.get("contrast_method", "?")
+        groups.setdefault(cm, []).append(r)
+
+    for cm in sorted(groups.keys()):
+        best = groups[cm][0]
+        wm = best.get("method", "?")
+        print(f"    [{cm}] 最佳: score={best.get('score', 0):.4f} "
+              f"wm={wm} upscale={best.get('upscale')} "
+              f"text={best.get('text', '')!r}")
+
+    # ── 3. 有 watermark 处理 vs 无 watermark 处理对比 ──
+    print("\n  去水印开关对比(同 contrast 方法,最高 score):")
+    wm_groups: Dict[str, Dict[str, Any]] = {}
+    for r in scored:
+        cm = r.get("contrast_method", "?")
+        wm = r.get("method", "?") if r.get("method") != "none" else "无去水印"
+        key = f"{cm}|{wm}"
+        cur_score = r.get("score") or 0
+        prev_score = (wm_groups.get(key) or {}).get("score") or 0
+        if key not in wm_groups or cur_score > prev_score:
+            wm_groups[key] = r
+
+    for cm in sorted(set(r.get("contrast_method", "?") for r in scored)):
+        wm_rows = [r for k, r in wm_groups.items() if k.startswith(cm + "|")]
+        if wm_rows:
+            best_row = max(wm_rows, key=lambda r: r.get("score") or 0)
+            wm_label = "无去水印" if best_row.get("method") == "none" else best_row.get("method", "?")
+            print(f"    [{cm}] 最优: wm={wm_label}  score={best_row.get('score', 0):.4f} "
+                  f"text={best_row.get('text', '')!r}")
+
+    # ── 4. 放大顺序对比 ──
+    print("\n  放大前/后对比(同方法,最高 score):")
+    order_data: Dict[str, Dict[str, Any]] = {}
+    for r in scored:
+        cm = r.get("contrast_method", "?")
+        co = r.get("contrast_order", "?")
+        key = f"{cm}|{co}"
+        cur_score = r.get("score") or 0
+        prev_score = (order_data.get(key) or {}).get("score") or 0
+        if key not in order_data or cur_score > prev_score:
+            order_data[key] = r
+
+    for cm in sorted(set(r.get("contrast_method", "?") for r in scored)):
+        b_score = (order_data.get(f"{cm}|before_upscale") or {}).get("score") or 0
+        a_score = (order_data.get(f"{cm}|after_upscale") or {}).get("score") or 0
+        better = "放大前" if b_score > a_score else ("放大后" if a_score > b_score else "持平")
+        if b_score or a_score:
+            print(f"    [{cm}] 放大前={b_score:.4f}  放大后={a_score:.4f}  更优: {better}")
+
+    # ── 5. HIT 命中率统计 ──
+    if target:
+        hit_count = sum(1 for r in results if r.get("match"))
+        hit_by_cm: Dict[str, int] = {}
+        for r in results:
+            if r.get("match"):
+                cm = r.get("contrast_method", "?")
+                hit_by_cm[cm] = hit_by_cm.get(cm, 0) + 1
+        print(f"\n  HIT 命中率 (target={target}): {hit_count}/{len(results)}")
+        for cm in sorted(hit_by_cm.keys()):
+            print(f"    [{cm}] HIT={hit_by_cm[cm]}")
+
+    print(f"{'='*70}\n")
+
+
+def _parse_best_config(tag: str) -> Dict[str, Any]:
+    """解析最优参数 tag,如 threshold_t150_cl_1.0_8_ob_u128_det0.5。
+
+    tag 格式: {method}_t{thresh}_{c_tag}_o{b|a}_u{upscale}_det{det_th}
+    """
+    import re
+    cfg: Dict[str, Any] = {}
+    tag = tag.strip()
+
+    # 解析 method: threshold | masked_adaptive | none
+    m = re.match(r"(threshold|masked_adaptive|none)_t(\w+?)_(.+?)_o([ba])_u(\d+)_det([\d.]+)$", tag)
+    if not m:
+        raise ValueError(f"无法解析 best-config tag: {tag!r}")
+    method, thresh_str, c_part, order_char, upscale, det_th = m.groups()
+    cfg["method"] = method
+    cfg["threshold"] = int(thresh_str) if thresh_str.isdigit() else None
+    cfg["contrast_order"] = "before_upscale" if order_char == "b" else "after_upscale"
+    cfg["upscale"] = int(upscale)
+    cfg["det_db_box_thresh"] = float(det_th)
+
+    # 解析 contrast 部分: cl_1.0_8 | tr_85 | gm_0.85 | ln_2.0_98.0 | c0
+    if c_part == "c0":
+        cfg["contrast_cfg"] = {"method": "none"}
+    elif c_part.startswith("cl_"):
+        parts = c_part.split("_")
+        cfg["contrast_cfg"] = {"method": "clahe", "clip_limit": float(parts[1]), "tile_grid_size": int(parts[2])}
+    elif c_part.startswith("tr_"):
+        parts = c_part.split("_")
+        cfg["contrast_cfg"] = {"method": "text_restore", "text_black_target": int(parts[1])}
+    elif c_part.startswith("gm_"):
+        parts = c_part.split("_")
+        cfg["contrast_cfg"] = {"method": "gamma", "gamma": float(parts[1])}
+    elif c_part.startswith("ln_"):
+        parts = c_part.split("_")
+        cfg["contrast_cfg"] = {"method": "linear", "black_percentile": float(parts[1]), "white_percentile": float(parts[2])}
+    else:
+        raise ValueError(f"无法解析 contrast tag: {c_part!r} (in {tag})")
+
+    return cfg
+
+
+def run_best_config(
+    input_path: Path,
+    out_dir: Path,
+    *,
+    prefer_raw: bool,
+    best_cfg: Dict[str, Any],
+    model_dir: Path,
+    save_images: bool,
+) -> Dict[str, Any]:
+    """对单图用指定最优参数跑一次 OCR。"""
+    resolved = resolve_input_image(input_path, prefer_raw=prefer_raw)
+    raw = cv2.imread(str(resolved))
+    if raw is None:
+        raise RuntimeError(f"无法读取图像: {resolved}")
+
+    stem = resolved.stem.removesuffix("_raw") if resolved.stem.endswith("_raw") else resolved.stem
+    cell_out = out_dir / stem
+    cell_out.mkdir(parents=True, exist_ok=True)
+
+    engine = _make_engine(best_cfg["det_db_box_thresh"], model_dir)
+    img = _preprocess(
+        raw,
+        method=best_cfg["method"],
+        thresh=best_cfg.get("threshold"),
+        contrast_cfg=best_cfg["contrast_cfg"],
+        upscale=best_cfg["upscale"],
+        contrast_order=best_cfg["contrast_order"],
+    )
+    tag = best_cfg.get("_tag", "best")
+    if save_images:
+        cv2.imwrite(str(cell_out / f"{tag}.png"), img)
+
+    ocr = _ocr(engine, img, det=True, rec=True)
+    row: Dict[str, Any] = {
+        "tag": tag,
+        "method": best_cfg["method"],
+        "threshold": best_cfg.get("threshold"),
+        "contrast_method": best_cfg["contrast_cfg"].get("method", "none"),
+        "contrast_order": best_cfg["contrast_order"],
+        "contrast_cfg": best_cfg["contrast_cfg"],
+        "upscale": best_cfg["upscale"],
+        "det_db_box_thresh": best_cfg["det_db_box_thresh"],
+        "ocr_mode": "det_rec",
+        **ocr,
+    }
+
+    report = {
+        "input": str(resolved),
+        "input_requested": str(input_path),
+        "output_dir": str(cell_out),
+        "result": row,
+    }
+    report_path = cell_out / "best_result.json"
+    report_path.write_text(json.dumps(report, ensure_ascii=False, indent=2), encoding="utf-8")
+    return report
+
+
+def _build_arg_parser() -> argparse.ArgumentParser:
+    p = argparse.ArgumentParser(
+        description="单元格图预处理 + OCR 参数网格扫描(对齐 pipeline 格级二次 OCR)",
+    )
+    p.add_argument(
+        "input",
+        type=Path,
+        help="单元格裁剪图路径,或 tablecell_ocr 目录(批量扫描)",
+    )
+    p.add_argument(
+        "-o",
+        "--output",
+        type=Path,
+        default=None,
+        help="输出目录,默认 <input_dir|input_parent>/sweep_out/<stem>",
+    )
+    p.add_argument(
+        "-t",
+        "--target",
+        default=None,
+        help="期望 OCR 文本;用于标记 HIT(子串匹配)。省略则任意非空为 HIT",
+    )
+    p.add_argument(
+        "--model-dir",
+        type=Path,
+        default=None,
+        help="PaddleOCR torch 模型目录(含 det/rec .pth),也可用 OCR_*_MODEL_PATH",
+    )
+    p.add_argument(
+        "--no-prefer-raw",
+        action="store_true",
+        help="不自动选用同名的 *_raw.png",
+    )
+    p.add_argument(
+        "--quick",
+        action="store_true",
+        help="缩小网格(threshold 155,165 × upscale 128,192 × det 0.5 × contrast 精简)",
+    )
+    p.add_argument(
+        "--methods",
+        default="threshold,masked_adaptive,none",
+        help="去水印方式,逗号分隔;none=不去水印",
+    )
+    p.add_argument(
+        "--thresholds",
+        default="155,165,none",
+        help="threshold 法的阈值;none=预设默认",
+    )
+    p.add_argument(
+        "--contrast-orders",
+        default="before_upscale,after_upscale",
+        help="contrast 执行顺序: before_upscale(放大前), after_upscale(放大后), 逗号组合",
+    )
+    p.add_argument(
+        "--upscales",
+        default="128,192",
+        help="最短边放大目标,逗号分隔整数",
+    )
+    p.add_argument(
+        "--det-threshs",
+        # default="0.2,0.3,0.4,0.5",
+        default="0.5",
+        help="det_db_box_thresh,逗号分隔",
+    )
+    p.add_argument(
+        "--no-save-images",
+        action="store_true",
+        help="不写出中间预处理 png(仅报告)",
+    )
+    p.add_argument(
+        "--no-baseline",
+        action="store_true",
+        help="跳过「仅放大、不去水印」对照组",
+    )
+    p.add_argument(
+        "--baseline-upscale",
+        type=int,
+        default=192,
+        help="baseline 对照组的最短边放大",
+    )
+    p.add_argument(
+        "--best-only",
+        action="store_true",
+        help="不跑参数网格,对目录下所有图用 --best-config 指定参数跑一次,验证适配性",
+    )
+    p.add_argument(
+        "--best-config",
+        default="threshold_t150_cl_1.0_8_ob_u128_det0.5",
+        help="最优参数 tag,如 threshold_t150_cl_1.0_8_ob_u128_det0.5",
+    )
+    return p
+
+
+def main(argv: Optional[Sequence[str]] = None) -> None:
+    args = _build_arg_parser().parse_args(argv)
+    inputs = collect_inputs(args.input, prefer_raw=not args.no_prefer_raw)
+    if not inputs:
+        raise SystemExit("未找到可扫描的图像")
+
+    if args.output is not None:
+        out_root = args.output
+    elif args.input.is_file():
+        out_root = args.input.parent / "sweep_out"
+    else:
+        out_root = args.input / "sweep_out"
+    out_root.mkdir(parents=True, exist_ok=True)
+
+    model_dir = args.model_dir or _default_model_dir()
+
+    if args.best_only:
+        # 验证适配性模式:对目录下所有图用最优参数跑一次
+        best_cfg = _parse_best_config(args.best_config)
+        best_cfg["_tag"] = args.best_config
+        print(f"最佳参数验证模式: {args.best_config}")
+        print(f"  解析: method={best_cfg['method']} contrast={best_cfg['contrast_cfg'].get('method')} "
+              f"upscale={best_cfg['upscale']} order={best_cfg['contrast_order']}")
+        print(f"  共 {len(inputs)} 张图")
+
+        all_texts: List[Dict[str, Any]] = []
+        hit_count = 0
+        for img_path in inputs:
+            report = run_best_config(
+                img_path, out_root,
+                prefer_raw=not args.no_prefer_raw,
+                best_cfg=best_cfg,
+                model_dir=model_dir,
+                save_images=not args.no_save_images,
+            )
+            result = report["result"]
+            text = result.get("text", "")
+            score = result.get("score", 0)
+            all_texts.append({
+                "input": img_path.name,
+                "text": text,
+                "score": score,
+                "report": str(Path(report["output_dir"]) / "best_result.json"),
+            })
+            m = _match_hit(text, args.target)
+            hit_info = f" [HIT: {m}]" if m else ""
+            print(f"  {img_path.name}: score={score:.4f} text={text!r}{hit_info}")
+            if m:
+                hit_count += 1
+
+        # 汇总
+        summary_path = out_root / "best_summary.json"
+        summary_data = {
+            "best_config": args.best_config,
+            "total": len(all_texts),
+            "hits": hit_count,
+            "target": args.target,
+            "results": all_texts,
+        }
+        summary_path.write_text(json.dumps(summary_data, ensure_ascii=False, indent=2), encoding="utf-8")
+        print(f"\n汇总: {hit_count}/{len(all_texts)} HIT -> {summary_path}")
+        return
+
+    # 正常参数网格扫描模式
+    methods = [m.strip() for m in args.methods.split(",") if m.strip()]
+    contrast_orders = [o.strip() for o in args.contrast_orders.split(",") if o.strip()]
+
+    if args.quick:
+        thresholds = [150, 155]
+        upscales = [128, 192]
+        det_threshs = [0.5]
+    else:
+        thresholds = _parse_csv_ints(args.thresholds)
+        upscales = [int(x) for x in args.upscales.split(",") if x.strip()]
+        det_threshs = _parse_csv_floats(args.det_threshs)
+
+    contrast_grid = _build_contrast_grid(quick=args.quick)
+
+    print(f"扫描 {len(inputs)} 张图 -> {out_root}")
+    print(f"  methods={methods} thresholds={thresholds} upscales={upscales}")
+    print(f"  contrast_methods={len(contrast_grid)} orders={contrast_orders}")
+    if args.target:
+        print(f"  target={args.target!r}")
+
+    summary: List[Dict[str, Any]] = []
+    for img_path in inputs:
+        print(f"\n=== {img_path.name} ===")
+        report = run_sweep(
+            img_path,
+            out_root,
+            prefer_raw=not args.no_prefer_raw,
+            target=args.target,
+            model_dir=model_dir,
+            methods=methods,
+            thresholds=thresholds,
+            contrast_grid=contrast_grid,
+            contrast_orders=contrast_orders,
+            upscales=upscales,
+            det_threshs=det_threshs,
+            save_images=not args.no_save_images,
+            run_baseline=not args.no_baseline,
+            baseline_upscale=args.baseline_upscale,
+        )
+        summary.append(
+            {
+                "input": report["input"],
+                "hits": len(report["hits"]),
+                "report": str(Path(report["output_dir"]) / "sweep_report.json"),
+            }
+        )
+
+    index_path = out_root / "sweep_index.json"
+    index_path.write_text(
+        json.dumps(summary, ensure_ascii=False, indent=2), encoding="utf-8"
+    )
+    print(f"\n全部完成,索引: {index_path}")
+    for s in summary:
+        print(f"  {s['input']}: {s['hits']} hits -> {s['report']}")
+
+
+if __name__ == "__main__":
+    if len(sys.argv) == 1:
+        print("ℹ️  未提供命令行参数,使用默认配置运行...")
+        default_config = {
+            # "input": "/Users/zhch158/workspace/data/流水分析/彭_广东兴宁农村商业银行/bank_statement_yusys_local/debug/table_recognition_wired/tablecell_ocr/彭_广东兴宁农村商业银行_page_002_0/cell219_empty_empty_raw.png",
+            # "output": "./output/彭_广东兴宁农村商业银行/cell219_sweep",
+            # "target": "ATM存折取款",
+
+            # "input": "/Users/zhch158/workspace/data/流水分析/彭_广东兴宁农村商业银行/bank_statement_yusys_local/debug/table_recognition_wired/tablecell_ocr/彭_广东兴宁农村商业银行_page_002_0/cell007_whole_longer_易型交类_raw.png",
+            # "output": "./output/彭_广东兴宁农村商业银行/cell007_sweep",
+            # "target": "交易类型",
+            # "quick": True,
+
+            # "input": "/Users/zhch158/workspace/data/流水分析/钟_广东陆丰农村商业银行/bank_statement_yusys_local/debug/table_recognition_wired/tablecell_ocr/钟_广东陆丰农村商业银行_page_001_0/cell217_empty_empty_raw.png",
+            # "output": "./output/钟_广东陆丰农村商业银行/cell217_sweep",
+            # "target": "专项资金",
+            # "quick": True,
+
+            # "input": "/Users/zhch158/workspace/data/流水分析/彭_广东兴宁农村商业银行/bank_statement_yusys_local/debug/table_recognition_wired/tablecell_ocr/彭_广东兴宁农村商业银行_page_002_0",
+            # "output": "./output/彭_广东兴宁农村商业银行",
+            # "best-config": "threshold_t150_cl_1.0_8_ob_u128_det0.5",
+            # "best-only": True,
+
+            "input": "/Users/zhch158/workspace/data/流水分析/钟_广东陆丰农村商业银行/bank_statement_yusys_local/debug/table_recognition_wired/tablecell_ocr/钟_广东陆丰农村商业银行_page_001_0",
+            "output": "./output/钟_广东陆丰农村商业银行",
+            # "best-config": "threshold_t150_cl_1.0_8_ob_u128_det0.5",
+            "best-config": "threshold_t150_cl_1.0_4_ob_u128_det0.5",
+            "best-only": True,
+        }
+        sys.argv = [sys.argv[0], default_config["input"]]
+        for key, value in default_config.items():
+            if key == "input":
+                continue
+            flag = f"--{key.replace('_', '-')}"
+            if isinstance(value, bool) and value:
+                sys.argv.append(flag)
+            elif not isinstance(value, bool):
+                sys.argv.extend([flag, str(value)])
+
+    sys.exit(main())

+ 31 - 4
ocr_tools/gan_experiments_lab/evaluate.py → ocr_tools/lab/gan_experiments_lab/evaluate.py

@@ -32,7 +32,7 @@ import cv2
 import numpy as np
 
 # 将 ocr_platform 根目录加入 sys.path,以便导入 ocr_utils
-_repo_root = Path(__file__).parents[2]
+_repo_root = Path(__file__).resolve().parents[3]
 if str(_repo_root) not in sys.path:
     sys.path.insert(0, str(_repo_root))
 
@@ -192,7 +192,6 @@ def _make_compare_image(
     wm_mask: Optional[np.ndarray] = None,
 ) -> np.ndarray:
     """生成四联对比图。"""
-    h, w = baseline_gray.shape[:2] if baseline_gray.ndim == 2 else baseline_gray.shape
 
     def _to_bgr(arr: np.ndarray) -> np.ndarray:
         if arr.ndim == 2:
@@ -229,7 +228,6 @@ def _make_compare_image(
 
     labeled = []
     for panel, label in zip(panels, labels):
-        h_p = panel.shape[0]
         # 底部加标签条
         bar = np.ones((36, panel.shape[1], 3), dtype=np.uint8) * 240
         cv2.putText(bar, label, (12, 24), cv2.FONT_HERSHEY_SIMPLEX, 0.55, (0, 0, 0), 1)
@@ -280,6 +278,9 @@ def evaluate(
     clean_dir: Optional[Path] = None,
     device: str = "cpu",
     gan_only: bool = False,
+    lama_ckpt: Optional[Path] = None,
+    lama_config: Optional[Path] = None,
+    lama_repo: Optional[Path] = None,
 ) -> None:
     """批量评估。"""
     img_files = sorted([
@@ -301,7 +302,12 @@ def evaluate(
     baseline_cfg = _baseline_config()
     wm_cfg = _gan_wm_config()
 
-    inpainter = LamaInpainter(device=device)
+    inpainter = LamaInpainter(
+        device=device,
+        model_ckpt_path=str(lama_ckpt) if lama_ckpt else None,
+        model_config_path=str(lama_config) if lama_config else None,
+        lama_repo_path=str(lama_repo) if lama_repo else None,
+    )
     available = inpainter.is_available
     logger.info(f"LaMa 可用: {available}, backend: {inpainter._backend or '未加载'}")
 
@@ -424,6 +430,24 @@ def main():
                         help="推理设备")
     parser.add_argument("--gan-only", action="store_true",
                         help="仅运行GAN(跳过baseline)")
+    parser.add_argument(
+        "--lama-ckpt",
+        type=Path,
+        default=Path("/Users/zhch158/models/big-lama/models/best.ckpt"),
+        help="LaMa 权重文件路径(默认使用本地已下载权重,不走下载)",
+    )
+    parser.add_argument(
+        "--lama-config",
+        type=Path,
+        default=None,
+        help="LaMa config.yaml 路径(默认自动从 ckpt 同级目录推断)",
+    )
+    parser.add_argument(
+        "--lama-repo",
+        type=Path,
+        default=None,
+        help="LaMa 仓库路径(用于导入 saicinpainting,默认自动探测)",
+    )
     args = parser.parse_args()
 
     evaluate(
@@ -432,6 +456,9 @@ def main():
         clean_dir=args.clean_dir,
         device=args.device,
         gan_only=args.gan_only,
+        lama_ckpt=args.lama_ckpt,
+        lama_config=args.lama_config,
+        lama_repo=args.lama_repo,
     )
 
 

+ 46 - 16
ocr_tools/gan_experiments_lab/lama_inpaint.py → ocr_tools/lab/gan_experiments_lab/lama_inpaint.py

@@ -34,7 +34,7 @@ def _check_lama_repo() -> Optional[Path]:
     """检查本地是否有 lama 仓库并已加入 sys.path。"""
     candidates = [
         Path(__file__).parent / "lama",
-        Path(__file__).parents[2] / "lama",
+        Path(__file__).parents[3] / "lama",
         Path.home() / "lama",
         Path("/tmp/lama"),
     ]
@@ -53,10 +53,16 @@ class LamaInpainter:
         device: str = "cpu",
         inference_size: Optional[int] = None,
         pad_to_multiple: int = 8,
+        model_ckpt_path: Optional[str] = None,
+        model_config_path: Optional[str] = None,
+        lama_repo_path: Optional[str] = None,
     ):
         self._device = device
         self._inference_size = inference_size  # None = 保持原尺寸
         self._pad_to_multiple = pad_to_multiple
+        self._model_ckpt_path = Path(model_ckpt_path).expanduser() if model_ckpt_path else None
+        self._model_config_path = Path(model_config_path).expanduser() if model_config_path else None
+        self._preferred_repo_path = Path(lama_repo_path).expanduser() if lama_repo_path else None
         self._model = None
         self._backend = None  # "simple_lama" | "lama_repo" | "opencv"
         self._lama_repo_path: Optional[Path] = None
@@ -65,6 +71,10 @@ class LamaInpainter:
     def is_available(self) -> bool:
         if self._backend is not None:
             return self._backend != "opencv"
+        # 显式提供本地权重/仓库时,优先走 lama_repo 流程
+        if self._model_ckpt_path or self._preferred_repo_path:
+            self._backend = "lama_repo"
+            return True
         if _check_simple_lama():
             self._backend = "simple_lama"
             return True
@@ -78,11 +88,16 @@ class LamaInpainter:
         if self._model is not None:
             return True
 
-        if _check_simple_lama():
+        if _check_simple_lama() and not self._model_ckpt_path:
             return self._load_simple_lama()
-        repo = _check_lama_repo()
-        if repo:
-            return self._load_lama_repo(repo)
+        repo = self._preferred_repo_path or _check_lama_repo()
+        if repo or self._model_ckpt_path:
+            if self._load_lama_repo(repo):
+                return True
+            # lama_repo 失败后,若环境有 simple_lama,退回 simple_lama 而非 OpenCV
+            if _check_simple_lama():
+                return self._load_simple_lama()
+            return False
 
         logger.warning("LaMa backends 都不可用,将回退 OpenCV inpainting")
         self._backend = "opencv"
@@ -99,22 +114,34 @@ class LamaInpainter:
             logger.warning(f"simple_lama_inpainting 加载失败: {e}")
             return False
 
-    def _load_lama_repo(self, repo_path: Path) -> bool:
+    def _load_lama_repo(self, repo_path: Optional[Path]) -> bool:
         try:
-            if str(repo_path) not in sys.path:
+            if repo_path and str(repo_path) not in sys.path:
                 sys.path.insert(0, str(repo_path))
 
             from omegaconf import OmegaConf
             from saicinpainting.training.trainers import load_checkpoint
 
-            config_path = repo_path / "big-lama" / "config.yaml"
-            ckpt_path = repo_path / "big-lama" / "models" / "best.ckpt"
+            if self._model_ckpt_path:
+                ckpt_path = self._model_ckpt_path
+            elif repo_path:
+                ckpt_path = repo_path / "big-lama" / "models" / "best.ckpt"
+            else:
+                logger.warning("未提供 LaMa 权重路径,且未找到 lama 仓库目录")
+                return False
+            config_path = self._model_config_path
+            if config_path is None:
+                # 优先使用权重目录的上一级 config.yaml(与官方 big-lama 目录结构一致)
+                if ckpt_path.parent.name == "models":
+                    sibling_cfg = ckpt_path.parent.parent / "config.yaml"
+                    if sibling_cfg.exists():
+                        config_path = sibling_cfg
+                if config_path is None and repo_path:
+                    config_path = repo_path / "big-lama" / "config.yaml"
 
             if not config_path.exists() or not ckpt_path.exists():
                 logger.warning(
-                    f"lama 模型文件缺失。请下载: "
-                    f"wget https://github.com/Sanster/models/releases/download/add_big_lama/big-lama.zip && "
-                    f"unzip big-lama.zip -d {repo_path}"
+                    f"lama 模型文件缺失: config={config_path}, ckpt={ckpt_path}"
                 )
                 return False
 
@@ -165,6 +192,8 @@ class LamaInpainter:
             if self._inference_size:
                 rgb, mask_u8, orig_size = self._resize_to_inference(rgb, mask_u8)
             result_rgb = self._model(rgb, mask_u8)
+            if not isinstance(result_rgb, np.ndarray):
+                result_rgb = np.asarray(result_rgb)
             if self._inference_size:
                 result_rgb = cv2.resize(result_rgb, (orig_size[1], orig_size[0]))
             return cv2.cvtColor(result_rgb, cv2.COLOR_RGB2BGR)
@@ -175,7 +204,6 @@ class LamaInpainter:
     def _inpaint_lama_repo(self, image: np.ndarray, mask: np.ndarray) -> Optional[np.ndarray]:
         try:
             import torch
-            import torch.nn.functional as F
             from saicinpainting.evaluation.data import pad_tensor_to_modulo
 
             rgb = cv2.cvtColor(image.astype(np.float32) / 255.0, cv2.COLOR_BGR2RGB)
@@ -197,9 +225,11 @@ class LamaInpainter:
                 mask_t = mask_t.cuda()
 
             with torch.no_grad():
-                output = self._model(img_t, mask_t)
-                # output shape: (B, C, H, W)
-                result = output[0].permute(1, 2, 0).cpu().numpy()
+                batch = {"image": img_t, "mask": mask_t}
+                assert self._model is not None
+                output = self._model(batch)
+                pred = output["inpainted"] if isinstance(output, dict) else batch["inpainted"]
+                result = pred[0].permute(1, 2, 0).cpu().numpy()
                 # 裁掉 pad
                 result = result[:orig_h, :orig_w, :]
 

+ 0 - 0
ocr_tools/gan_experiments_lab/test_images/input/彭_广东兴宁农村商业银行_page_002.png → ocr_tools/lab/gan_experiments_lab/test_images/input/彭_广东兴宁农村商业银行_page_002.png


+ 0 - 0
ocr_tools/gan_experiments_lab/watermark_synthesis.py → ocr_tools/lab/gan_experiments_lab/watermark_synthesis.py


+ 1 - 0
ocr_tools/lab/watermark_lab/__init__.py

@@ -0,0 +1 @@
+"""水印 mask 多策略融合实验模块。"""

+ 733 - 0
ocr_tools/lab/watermark_lab/contrast_sweep.py

@@ -0,0 +1,733 @@
+#!/usr/bin/env python3
+"""
+方向1:对比度增强参数网格扫描。
+
+不去水印,直接对原图做多种对比度增强,验证哪种参数组合能让水印
+在视觉上"淡化"、正文保持清晰,从而使后续 OCR 不受水印干扰。
+
+用法:
+    cd ocr_platform/ocr_tools/watermark_lab
+
+    # 单张图快速扫描
+    python contrast_sweep.py ../gan_experiments_lab/test_images/input/彭_广东兴宁农村商业银行_page_002.png --quick
+
+    # 全量扫描(更多参数组合 + 生成增强对比图)
+    python contrast_sweep.py ../gan_experiments_lab/test_images/input/
+
+    # 同时跑 OCR 整页对比(det+rec,每方法 Top-1 组合)
+    python contrast_sweep.py input.png --ocr --model-dir /path/to/models
+
+    # 每方法 Top-3 组合跑 OCR 对比
+    python contrast_sweep.py input.png --ocr --ocr-top-n 3
+
+输出:
+    output/<stem>/
+    ├── sweep_report.json          # 参数扫描结果汇总(含 OCR 对比结果)
+    ├── sweep_summary.csv           # CSV 表格
+    ├── quad_compare.png            # 四宫格对比图
+    ├── text_restore_t60_bg248.png  # 各组合增强结果图
+    ├── clahe_cl3.0_t8.png
+    ├── gamma_g0.5.png
+    └── ocr/                        # OCR 对比结果(--ocr 时生成)
+        ├── <stem>_original_ocr_spans.png   # 原始图 OCR 可视化
+        ├── <stem>_original_ocr_spans.json  # 原始图 OCR JSON
+        ├── <stem>_<tag>_ocr_spans.png      # 各增强组合 OCR 可视化
+        ├── <stem>_<tag>_ocr_spans.json     # 各增强组合 OCR JSON
+        └── ocr_comparison.json             # OCR 差异汇总报告
+"""
+from __future__ import annotations
+
+import argparse
+import json
+import sys
+import time
+from itertools import product
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Sequence, Tuple
+
+import cv2
+import numpy as np
+
+_repo_root = Path(__file__).resolve().parents[3]
+if str(_repo_root) not in sys.path:
+    sys.path.insert(0, str(_repo_root))
+
+from loguru import logger
+from ocr_utils.watermark.contrast import enhance_document_contrast
+
+_IMAGE_SUFFIXES = {".png", ".jpg", ".jpeg", ".bmp", ".tif", ".tiff", ".webp"}
+
+
+# ── 参数网格 ────────────────────────────────────────────────────
+
+
+def _build_param_grid(quick: bool = False) -> List[Dict[str, Any]]:
+    """构建参数网格。
+
+    四个维度:
+      1. method: text_restore | clahe | gamma | linear
+      2. text_restore 专属: text_black_target + background_threshold
+      3. clahe 专属: clip_limit + tile_grid_size
+      4. gamma 专属: gamma
+    """
+    grid: List[Dict[str, Any]] = []
+
+    # ── text_restore ──
+    if quick:
+        tbt = [40, 60, 85]
+        bts = [240, 248]
+    else:
+        tbt = [40, 60, 80, 100, 120]
+        bts = [235, 240, 248, 252]
+
+    for target, bg_th in product(tbt, bts):
+        grid.append({
+            "method": "text_restore",
+            "text_black_target": target,
+            "background_threshold": bg_th,
+        })
+
+    # ── clahe ──
+    if quick:
+        cl = [1.0, 3.0, 5.0]
+        ts = [8, 16]
+    else:
+        cl = [0.5, 1.0, 2.0, 3.0, 5.0, 8.0]
+        ts = [4, 8, 16, 32]
+
+    for clip, tile in product(cl, ts):
+        grid.append({
+            "method": "clahe",
+            "clip_limit": clip,
+            "tile_grid_size": tile,
+        })
+
+    # ── gamma ──
+    if quick:
+        gvs = [0.4, 0.55, 0.7, 0.85]
+    else:
+        gvs = [0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
+
+    for g in gvs:
+        grid.append({"method": "gamma", "gamma": g})
+
+    # ── linear ──
+    if quick:
+        bps = [2.0, 5.0]
+        wps = [95.0, 98.0]
+    else:
+        bps = [1.0, 2.0, 5.0, 8.0]
+        wps = [92.0, 95.0, 98.0]
+
+    for bp, wp in product(bps, wps):
+        grid.append({"method": "linear", "black_percentile": bp, "white_percentile": wp})
+
+    return grid
+
+
+# ── 标签生成 ────────────────────────────────────────────────────
+
+
+def _tag_from_cfg(cfg: Dict[str, Any]) -> str:
+    m = cfg["method"]
+    if m == "text_restore":
+        return f"{m}_t{cfg['text_black_target']}_bg{cfg['background_threshold']}"
+    if m == "clahe":
+        return f"{m}_cl{cfg['clip_limit']}_t{cfg['tile_grid_size']}"
+    if m == "gamma":
+        return f"{m}_g{cfg['gamma']}"
+    if m == "linear":
+        return f"{m}_b{cfg['black_percentile']}_w{cfg['white_percentile']}"
+    return m
+
+
+# ── 工具函数 ────────────────────────────────────────────────────
+
+
+def _collect_images(path: Path) -> List[Path]:
+    if path.is_file():
+        if path.suffix.lower() not in _IMAGE_SUFFIXES:
+            raise ValueError(f"不支持的图像格式: {path}")
+        return [path]
+    if not path.is_dir():
+        raise FileNotFoundError(path)
+    return sorted(
+        p for p in path.iterdir() if p.is_file() and p.suffix.lower() in _IMAGE_SUFFIXES
+    )
+
+
+def _compute_watermark_fade_score(
+    original: np.ndarray, enhanced: np.ndarray, window: int = 31
+) -> float:
+    """
+    量化水印淡化程度。
+
+    原理:大核中值滤波估计背景,残差的方差越小 = 水印纹理越弱。
+    """
+    o_f = original.astype(np.float32)
+    e_f = enhanced.astype(np.float32)
+    k = max(3, window) | 1
+
+    o_bg = cv2.medianBlur(o_f.astype(np.uint8), k).astype(np.float32)
+    e_bg = cv2.medianBlur(e_f.astype(np.uint8), k).astype(np.float32)
+
+    o_res = cv2.absdiff(o_f, o_bg)
+    e_res = cv2.absdiff(e_f, e_bg)
+
+    return float(1.0 - np.var(e_res) / max(np.var(o_res), 1.0))
+
+
+def _compute_text_sharpness_score(
+    enhanced: np.ndarray, win: int = 3
+) -> float:
+    """局部标准差均值,越大 = 文字越清晰。"""
+    e_f = enhanced.astype(np.float32)
+    kernel = np.ones((win, win), np.float32) / (win * win)
+    mean = cv2.filter2D(e_f, -1, kernel)
+    sq_mean = cv2.filter2D(e_f * e_f, -1, kernel)
+    var = np.maximum(sq_mean - mean * mean, 0)
+    return float(np.sqrt(var).mean())
+
+
+# ── 对比图生成 ──────────────────────────────────────────────────
+
+
+def _make_quad_compare(
+    original: np.ndarray,
+    top_enhanced: List[Tuple[str, np.ndarray]],
+) -> np.ndarray:
+    """生成四宫格对比图:原图 | 最佳 text_restore | 最佳 clahe | 最佳 gamma。"""
+    panels = [original]
+    labels = ["Original"]
+    for label, img in top_enhanced:
+        panels.append(img)
+        labels.append(label)
+
+    # 全部转 BGR
+    bgr_panels: List[np.ndarray] = []
+    for p in panels:
+        if p.ndim == 2:
+            bgr_panels.append(cv2.cvtColor(p, cv2.COLOR_GRAY2BGR))
+        else:
+            bgr_panels.append(p)
+
+    # 统一高度
+    h = max(p.shape[0] for p in bgr_panels)
+    w = max(p.shape[1] for p in bgr_panels)
+
+    resized: List[np.ndarray] = []
+    for p, label in zip(bgr_panels, labels):
+        if p.shape[0] != h or p.shape[1] != w:
+            p = cv2.resize(p, (w, h))
+        bar = np.ones((40, w, 3), dtype=np.uint8) * 240
+        cv2.putText(bar, label, (12, 28), cv2.FONT_HERSHEY_SIMPLEX, 0.55, (0, 0, 0), 2)
+        resized.append(np.vstack([p, bar]))
+
+    return np.hstack(resized)
+
+
+# ── OCR(整页对比)───────────────────────────────────────────────
+
+
+def _poly_to_bbox(poly: List[List[float]]) -> List[int]:
+    """四点 polygon 转轴对齐 bbox [x0,y0,x1,y1]."""
+    xs = [p[0] for p in poly]
+    ys = [p[1] for p in poly]
+    return [int(min(xs)), int(min(ys)), int(max(xs)), int(max(ys))]
+
+
+def _ocr_full_page(engine: Any, img: np.ndarray) -> List[Dict[str, Any]]:
+    """整页 OCR(det+rec),返回 spans 列表。
+
+    每个 span: {"poly": [[x,y],...], "bbox": [x0,y0,x1,y1], "text": "...", "confidence": 0.9}
+    """
+    res = engine.ocr(img, det=True, rec=True)
+    items = res[0] if res and res[0] is not None else []
+    spans: List[Dict[str, Any]] = []
+    for item in items:
+        if not item or len(item) < 2:
+            continue
+        box, rec_part = item[0], item[1]
+        text = str(rec_part[0] if isinstance(rec_part, (list, tuple)) else rec_part)
+        conf = float(rec_part[1]) if isinstance(rec_part, (list, tuple)) and len(rec_part) > 1 else 0.0
+        poly = [[float(p[0]), float(p[1])] for p in box] if box else []
+        spans.append({
+            "poly": poly,
+            "bbox": _poly_to_bbox(poly) if poly else [],
+            "text": text.strip(),
+            "confidence": round(conf, 4),
+        })
+    from ocr_tools.universal_doc_parser.core.layout_utils import SpanMatcher
+    spans = SpanMatcher.remove_duplicate_spans(spans)
+    return spans
+
+
+def _save_ocr_debug_for_sweep(
+    image: np.ndarray,
+    spans: List[Dict[str, Any]],
+    out_dir: Path,
+    tag: str,
+) -> Dict[str, str]:
+    """保存 OCR 可视化图和 JSON。复用 module_debug_viz.draw_ocr_spans_cv2。"""
+    from ocr_utils.module_debug_viz import draw_ocr_spans_cv2
+
+    ocr_dir = out_dir / "ocr"
+    ocr_dir.mkdir(parents=True, exist_ok=True)
+
+    # 可视化图
+    vis = draw_ocr_spans_cv2(image, spans)
+    img_path = ocr_dir / f"{tag}_ocr_spans.png"
+    cv2.imwrite(str(img_path), vis)
+
+    # JSON
+    json_data = {
+        "tag": tag,
+        "count": len(spans),
+        "spans": [
+            {
+                "bbox": s.get("bbox"),
+                "poly": s.get("poly"),
+                "text": s.get("text"),
+                "confidence": s.get("confidence"),
+            }
+            for s in spans
+        ],
+    }
+    json_path = ocr_dir / f"{tag}_ocr_spans.json"
+    json_path.write_text(json.dumps(json_data, ensure_ascii=False, indent=2), encoding="utf-8")
+
+    logger.info(f"    OCR debug: {img_path}")
+    return {"image": str(img_path), "json": str(json_path)}
+
+
+def _compare_ocr_results(
+    orig_spans: List[Dict[str, Any]],
+    enh_spans: List[Dict[str, Any]],
+    iou_threshold: float = 0.5,
+) -> Dict[str, Any]:
+    """对比两组 OCR spans 的检测+识别差异。
+
+    Returns:
+        {
+            "detection": { orig_count, enh_count, matched, new, missing },
+            "recognition": { text_changed_count, char_diff_rate, details: [...] },
+            "summary": "一句话摘要"
+        }
+    """
+
+    def _bbox_iou(a: List[int], b: List[int]) -> float:
+        if not a or not b:
+            return 0.0
+        xa = max(a[0], b[0])
+        ya = max(a[1], b[1])
+        xb = min(a[2], b[2])
+        yb = min(a[3], b[3])
+        inter = max(0, xb - xa) * max(0, yb - ya)
+        area_a = max(0, a[2] - a[0]) * max(0, a[3] - a[1])
+        area_b = max(0, b[2] - b[0]) * max(0, b[3] - b[1])
+        union = area_a + area_b - inter
+        return inter / union if union > 0 else 0.0
+
+    # ── 检测对比 ──
+    orig_boxes = [s.get("bbox", []) for s in orig_spans]
+    enh_boxes = [s.get("bbox", []) for s in enh_spans]
+
+    matched_orig_idxs: set = set()
+    matched_enh_idxs: set = set()
+    recognition_details: List[Dict[str, Any]] = []
+
+    for i, ob in enumerate(orig_boxes):
+        if not ob:
+            continue
+        best_j, best_iou = -1, 0.0
+        for j, eb in enumerate(enh_boxes):
+            if j in matched_enh_idxs or not eb:
+                continue
+            iou = _bbox_iou(ob, eb)
+            if iou > best_iou:
+                best_iou, best_j = iou, j
+        if best_iou >= iou_threshold:
+            matched_orig_idxs.add(i)
+            matched_enh_idxs.add(best_j)
+            orig_text = orig_spans[i].get("text", "")
+            enh_text = enh_spans[best_j].get("text", "")
+            orig_score = orig_spans[i].get("confidence", 0)
+            enh_score = enh_spans[best_j].get("confidence", 0)
+            rec_detail: Dict[str, Any] = {
+                "orig_bbox": ob,
+                "orig_text": orig_text,
+                "orig_score": orig_score,
+                "enh_text": enh_text,
+                "enh_score": enh_score,
+                "iou": round(best_iou, 4),
+            }
+            if orig_text != enh_text:
+                rec_detail["text_changed"] = True
+            else:
+                rec_detail["text_changed"] = False
+            recognition_details.append(rec_detail)
+
+    new_boxes = len(enh_boxes) - len(matched_enh_idxs)
+    missing_boxes = len(orig_boxes) - len(matched_orig_idxs)
+
+    # 字符差异率
+    orig_concat = "".join(s.get("text", "") for s in orig_spans)
+    enh_concat = "".join(s.get("text", "") for s in enh_spans)
+    total_chars = max(len(orig_concat), len(enh_concat), 1)
+    char_diff = sum(1 for a, b in zip(orig_concat, enh_concat) if a != b) + abs(
+        len(orig_concat) - len(enh_concat)
+    )
+    char_diff_rate = round(char_diff / total_chars, 4)
+
+    detection = {
+        "orig_count": len(orig_boxes),
+        "enh_count": len(enh_boxes),
+        "matched": len(matched_orig_idxs),
+        "new": new_boxes,
+        "missing": missing_boxes,
+    }
+    recognition = {
+        "text_changed_count": len(recognition_details),
+        "char_diff_rate": char_diff_rate,
+        "details": recognition_details[:50],  # 最多保存50条差异明细
+    }
+
+    summary = (
+        f"检测: {detection['orig_count']}→{detection['enh_count']} (匹配{detection['matched']}, "
+        f"新增{detection['new']}, 遗失{detection['missing']}); "
+        f"识别: 文字变化{recognition['text_changed_count']}处, 字符差异率{char_diff_rate:.2%}"
+    )
+
+    return {"detection": detection, "recognition": recognition, "summary": summary}
+
+
+def _load_paddle_engine(model_dir: Path, det_thresh: float = 0.3):
+    from ocr_tools.pytorch_models.pytorch_paddle import PytorchPaddleOCR
+    det = model_dir / "ch_PP-OCRv5_det_infer.pth"
+    rec = model_dir / "ch_PP-OCRv4_rec_server_doc_infer.pth"
+    return PytorchPaddleOCR(
+        lang="ch",
+        det_model_path=str(det) if det.exists() else None,
+        rec_model_path=str(rec) if rec.exists() else None,
+        det_db_box_thresh=det_thresh,
+    )
+
+
+# ── 扫描核心 ────────────────────────────────────────────────────
+
+
+def run_sweep(
+    input_path: Path,
+    out_dir: Path,
+    *,
+    quick: bool = False,
+    save_images: bool = True,
+    ocr_enabled: bool = False,
+    model_dir: Optional[Path] = None,
+    ocr_top_n: int = 1,
+    ocr_all: bool = False,
+) -> Dict[str, Any]:
+    bgr = cv2.imread(str(input_path))
+    if bgr is None:
+        raise RuntimeError(f"无法读取图像: {input_path}")
+    gray = cv2.cvtColor(bgr, cv2.COLOR_BGR2GRAY)
+
+    stem = input_path.stem
+    img_out = out_dir / stem
+    img_out.mkdir(parents=True, exist_ok=True)
+
+    grid = _build_param_grid(quick=quick)
+    logger.info(f"  {stem}: {len(grid)} 组参数组合")
+
+    engine = None
+    baseline_spans: List[Dict[str, Any]] = []
+    if ocr_enabled:
+        try:
+            md = model_dir or Path(
+                "/Users/zhch158/models/modelscope_cache/models/OpenDataLab/"
+                "PDF-Extract-Kit-1___0/models/OCR/paddleocr_torch"
+            )
+            engine = _load_paddle_engine(md)
+            logger.info("  OCR 引擎已加载")
+            # 基线 OCR(原始灰度图)
+            baseline_spans = _ocr_full_page(engine, gray)
+            logger.info(f"  基线 OCR: {len(baseline_spans)} 个文本块")
+            _save_ocr_debug_for_sweep(bgr, baseline_spans, img_out, f"{stem}_original")
+        except Exception as e:
+            logger.warning(f"  OCR 引擎加载失败: {e}")
+
+    results: List[Dict[str, Any]] = []
+
+    # 按 method 分组, 便于后面取各类别最优
+    method_groups: Dict[str, List[Dict[str, Any]]] = {}
+
+    for cfg in grid:
+        tag = _tag_from_cfg(cfg)
+        t0 = time.perf_counter()
+        try:
+            enhanced = enhance_document_contrast(gray, **cfg)
+        except Exception as e:
+            logger.warning(f"  [{tag}] 增强失败: {e}")
+            continue
+        elapsed = time.perf_counter() - t0
+
+        fade = _compute_watermark_fade_score(gray, enhanced)
+        sharpness = _compute_text_sharpness_score(enhanced)
+
+        # 综合分:fade(水印淡化) 和 sharpness(文字清晰度) 同等权重
+        combined = round(fade * 0.5 + sharpness / max(sharpness, 0.01) * 0.5, 4)
+
+        row: Dict[str, Any] = {
+            "tag": tag,
+            **cfg,
+            "fade_score": round(fade, 6),
+            "sharpness_score": round(sharpness, 4),
+            "combined_score": round(combined, 4),
+            "time_ms": round(elapsed * 1000, 1),
+        }
+
+        if save_images:
+            out_path = img_out / f"{tag}.png"
+            cv2.imwrite(str(out_path), enhanced)
+            row["image_path"] = str(out_path)
+
+        results.append(row)
+        method = cfg["method"]
+        method_groups.setdefault(method, []).append(row)
+
+    # ── 排序 ──
+    results.sort(key=lambda r: -r["combined_score"])
+    for mname, entries in method_groups.items():
+        entries.sort(key=lambda r: -r["combined_score"])
+
+    # Top 各方法最优
+    tops: List[Tuple[str, str, float]] = []
+    for mname, entries in method_groups.items():
+        if entries:
+            top = entries[0]
+            tops.append((mname, top["tag"], top["combined_score"]))
+            logger.info(f"  [{mname}] Top: {top['tag']}  combined={top['combined_score']:.4f}")
+
+    logger.info(f"  全局 Top1: {results[0]['tag']}  combined={results[0]['combined_score']:.4f}")
+
+    # ── 阶段二:OCR 对比(整页)─────────────────────────────────
+    ocr_comparisons: List[Dict[str, Any]] = []
+    if engine and baseline_spans:
+        # 选择要跑 OCR 的组合列表
+        if ocr_all:
+            ocr_candidates = results
+        else:
+            ocr_candidates: List[Dict[str, Any]] = []
+            for mname, entries in method_groups.items():
+                for r in entries[:ocr_top_n]:
+                    if r not in ocr_candidates:
+                        ocr_candidates.append(r)
+
+        logger.info(f"  OCR 对比 {len(ocr_candidates)} 个组合(每方法 Top-{ocr_top_n})")
+        for r in ocr_candidates:
+            tag = r["tag"]
+            enhanced_path = r.get("image_path")
+            if enhanced_path:
+                enhanced_bgr = cv2.imread(enhanced_path)
+                if enhanced_bgr is None:
+                    logger.warning(f"    [{tag}] 无法读取增强图")
+                    continue
+                enhanced_gray = cv2.cvtColor(enhanced_bgr, cv2.COLOR_BGR2GRAY)
+            else:
+                # 从 raws 重新生成
+                enhanced_gray = enhance_document_contrast(gray, **{
+                    k: v for k, v in r.items()
+                    if k not in ("tag", "image_path", "fade_score", "sharpness_score", "combined_score", "time_ms")
+                })
+                enhanced_bgr = cv2.cvtColor(enhanced_gray, cv2.COLOR_GRAY2BGR)
+
+            try:
+                enh_spans = _ocr_full_page(engine, enhanced_gray)
+            except Exception as e:
+                logger.warning(f"    [{tag}] OCR 失败: {e}")
+                continue
+
+            _save_ocr_debug_for_sweep(enhanced_bgr, enh_spans, img_out, f"{stem}_{tag}")
+
+            cmp = _compare_ocr_results(baseline_spans, enh_spans)
+            cmp["tag"] = tag
+            cmp["method"] = r["method"]
+            r["ocr_comparison"] = cmp
+            ocr_comparisons.append(cmp)
+
+            for k, v in cmp["detection"].items():
+                r[f"ocr_det_{k}"] = v
+            for k, v in cmp["recognition"].items():
+                if not isinstance(v, list):
+                    r[f"ocr_rec_{k}"] = v
+            logger.info(f"    [{tag}] {cmp['summary']}")
+
+    # ── 四宫格对比图 ──
+    if save_images and len(tops) >= 3:
+        selected_labels = []
+        selected_imgs = []
+        seen_methods = set()
+        for r in results:
+            m = r["method"]
+            if m in seen_methods:
+                continue
+            seen_methods.add(m)
+            path = r.get("image_path")
+            if path:
+                img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
+                if img is not None:
+                    selected_labels.append(r["tag"])
+                    selected_imgs.append(img)
+                if len(selected_imgs) >= 3:
+                    break
+
+        if selected_imgs:
+            quad = _make_quad_compare(gray, list(zip(selected_labels, selected_imgs)))
+            quad_path = img_out / "quad_compare.png"
+            cv2.imwrite(str(quad_path), quad)
+            logger.info(f"  四宫格对比图: {quad_path}")
+
+    # ── 报告 ──
+    report: Dict[str, Any] = {
+        "input": str(input_path),
+        "output_dir": str(img_out),
+        "n_configs_tested": len(results),
+        "top_overall": results[0] if results else None,
+        "top_by_method": {
+            m: e[0] for m, e in method_groups.items() if e
+        },
+    }
+    if engine:
+        baseline_text = "".join(s.get("text", "") for s in baseline_spans)
+        report["baseline_ocr"] = {
+            "span_count": len(baseline_spans),
+            "full_text": baseline_text,
+        }
+        report["ocr_comparisons"] = {
+            "n_compared": len(ocr_comparisons),
+            "results": ocr_comparisons,
+        }
+
+    report_path = img_out / "contrast_report.json"
+    report_path.write_text(
+        json.dumps(report, ensure_ascii=False, indent=2), encoding="utf-8"
+    )
+
+    # 单独的 OCR 对比汇总报告(含完整检测+识别对比数据)
+    if engine:
+        ocr_summary_path = img_out / "ocr" / "ocr_comparison.json"
+        ocr_summary_path.parent.mkdir(parents=True, exist_ok=True)
+        ocr_summary = {
+            "input": str(input_path),
+            "baseline_spans": len(baseline_spans),
+            "compared": ocr_comparisons,
+        }
+        ocr_summary_path.write_text(
+            json.dumps(ocr_summary, ensure_ascii=False, indent=2), encoding="utf-8"
+        )
+
+    # CSV
+    if results:
+        csv_keys = [k for k in results[0].keys() if not k.endswith("_path") and k != "ocr_comparison"]
+        lines = [",".join(csv_keys)]
+        for r in results:
+            lines.append(",".join(str(r.get(k, "")) for k in csv_keys))
+        (img_out / "contrast_summary.csv").write_text("\n".join(lines), encoding="utf-8")
+
+    logger.info(f"  报告: {report_path}")
+    return report
+
+
+# ── CLI ──────────────────────────────────────────────────────────
+
+
+def _build_arg_parser() -> argparse.ArgumentParser:
+    p = argparse.ArgumentParser(
+        description="对比度增强参数网格扫描(不去水印,直接增强前后对比)",
+    )
+    p.add_argument("input", type=Path, help="单张图片路径或图片目录")
+    p.add_argument("-o", "--output", type=Path, default=None,
+                   help="输出根目录,默认 input 同级 contrast_out/<stem>")
+    p.add_argument("--quick", action="store_true",
+                   help="缩小参数网格")
+    p.add_argument("--no-save-images", action="store_true",
+                   help="不写出增强结果图")
+    p.add_argument("--ocr", action="store_true",
+                   help="启用整页 OCR 对比(det+rec):基线 OCR + Top-N 增强图 OCR,输出 spans 可视化和 JSON")
+    p.add_argument("--ocr-top-n", type=int, default=1,
+                   help="OCR 对比时每方法取 Top-N 组合(默认 1)")
+    p.add_argument("--ocr-all", action="store_true",
+                   help="对所有参数组合跑 OCR 对比(覆盖 --ocr-top-n)")
+    p.add_argument("--model-dir", type=Path, default=None,
+                   help="PaddleOCR 模型目录")
+    return p
+
+
+def main(argv: Optional[Sequence[str]] = None) -> None:
+    args = _build_arg_parser().parse_args(argv)
+
+    images = _collect_images(args.input)
+    if not images:
+        raise SystemExit("未找到可扫描的图像")
+
+    if args.output is not None:
+        out_root = args.output
+    elif args.input.is_file():
+        out_root = args.input.parent / "contrast_out"
+    else:
+        out_root = args.input / "contrast_out"
+    out_root.mkdir(parents=True, exist_ok=True)
+
+    logger.info(f"扫描 {len(images)} 张图 -> {out_root}")
+    logger.info(f"  quick={args.quick}  ocr={args.ocr}  ocr_top_n={args.ocr_top_n}  ocr_all={args.ocr_all}")
+
+    summary: List[Dict[str, Any]] = []
+    for img_path in images:
+        logger.info(f"\n=== {img_path.name} ===")
+        report = run_sweep(
+            img_path,
+            out_root,
+            quick=args.quick,
+            save_images=not args.no_save_images,
+            ocr_enabled=args.ocr,
+            model_dir=args.model_dir,
+            ocr_top_n=args.ocr_top_n,
+            ocr_all=args.ocr_all,
+        )
+        to = report.get("top_overall")
+        summary.append({
+            "input": report["input"],
+            "n_tested": report["n_configs_tested"],
+            "top_tag": to["tag"] if to else None,
+            "top_combined": to["combined_score"] if to else None,
+            "report": str(Path(report["output_dir"]) / "contrast_report.json"),
+        })
+
+    index_path = out_root / "contrast_index.json"
+    index_path.write_text(json.dumps(summary, ensure_ascii=False, indent=2), encoding="utf-8")
+    logger.info(f"\n全部完成。索引: {index_path}")
+    for s in summary:
+        logger.info(f"  {Path(s['input']).name}: Top={s['top_tag']} combined={s['top_combined']}")
+
+
+if __name__ == "__main__":
+    # python contrast_sweep.py ../gan_experiments_lab/test_images/input/彭_广东兴宁农村商业银行_page_002.png --ocr --ocr-top-n 1
+    if len(sys.argv) == 1:
+        print("ℹ️  未提供命令行参数,使用默认配置运行...")
+        default_config = {
+            "input": "../gan_experiments_lab/test_images/input/彭_广东兴宁农村商业银行_page_002.png",
+            "output": "./output/彭_广东兴宁农村商业银行_page_002/contrast_sweep",
+            "ocr": True,
+            "ocr_top_n": 3,
+            "quick": True,
+        }
+        sys.argv = [sys.argv[0], default_config["input"]]
+        for key, value in default_config.items():
+            if key == "input":
+                continue
+            flag = f"--{key.replace('_', '-')}"
+            if isinstance(value, bool) and value:
+                sys.argv.append(flag)
+            elif not isinstance(value, bool):
+                sys.argv.extend([flag, str(value)])
+    sys.exit(main())

+ 209 - 0
ocr_tools/lab/watermark_lab/fused_mask.py

@@ -0,0 +1,209 @@
+"""
+方案3:多策略融合水印 mask。
+
+在现有 light_on_white / diagonal_midtone 基础上新增背景差异策略,
+三种策略结果做 OR 融合,覆盖更多水印类型。
+
+用法:
+    from watermark_lab.fused_mask import build_fused_watermark_mask
+    mask, debug = build_fused_watermark_mask(gray, bgr=bgr)
+"""
+from __future__ import annotations
+
+import sys
+from pathlib import Path
+from typing import Any, Dict, Optional
+
+import cv2
+import numpy as np
+
+_repo_root = Path(__file__).resolve().parents[3]
+if str(_repo_root) not in sys.path:
+    sys.path.insert(0, str(_repo_root))
+
+import ocr_utils.watermark.algorithms as _algo
+
+
+# ── 策略C:背景差异残差 ──────────────────────────────────────────
+
+def _build_background_diff_mask(
+    gray: np.ndarray,
+    *,
+    median_kernel: int = 31,
+    diff_low: float = 6.0,
+    diff_high: float = 50.0,
+    morph_open_kernel: int = 3,
+    dilate_radius: int = 0,
+) -> np.ndarray:
+    """
+    大核中值滤波估计背景 → 残差 → 提取半透明水印纹理。
+
+    原理:
+    - 大核 medianBlur 抹掉文字(高频),保留背景(低频) + 水印(中低频)
+    - 残差 |original - bg| 里,正文差异大(>50),水印差异中等(6~50),纯背景差异小(<6)
+    - 取 6~50 范围的残差作为水印候选
+
+    Args:
+        gray:       灰度图 ndarray (H, W)
+        median_kernel: 中值滤波核大小,越大抹文字越干净但水印也可能被模糊
+        diff_low:   残差下限
+        diff_high:  残差上限
+        morph_open_kernel: 形态学开运算核大小(去噪点)
+        dilate_radius:     膨胀半径(连接碎片水印)
+
+    Returns:
+        bool ndarray (H, W),True=疑似水印
+    """
+    gray_f = np.asarray(gray, dtype=np.float32)
+    ksize = max(3, int(median_kernel)) | 1  # 确保奇数
+    bg = cv2.medianBlur(gray, ksize).astype(np.float32)
+    diff = cv2.absdiff(gray_f, bg)
+
+    mask = (diff > diff_low) & (diff < diff_high)
+
+    if morph_open_kernel > 0 and np.any(mask):
+        k = cv2.getStructuringElement(
+            cv2.MORPH_ELLIPSE, (morph_open_kernel, morph_open_kernel)
+        )
+        mask = cv2.morphologyEx(mask.astype(np.uint8), cv2.MORPH_OPEN, k) > 0
+
+    if dilate_radius > 0 and np.any(mask):
+        k = cv2.getStructuringElement(
+            cv2.MORPH_ELLIPSE, (dilate_radius * 2 + 1, dilate_radius * 2 + 1)
+        )
+        mask = cv2.dilate(mask.astype(np.uint8), k) > 0
+
+    return mask
+
+
+# ── 融合主函数 ──────────────────────────────────────────────────
+
+
+def build_fused_watermark_mask(
+    gray: np.ndarray,
+    *,
+    bgr: Optional[np.ndarray] = None,
+    # ── 策略A: light_on_white ──
+    a_enabled: bool = True,
+    a_light_gray_low: int = 220,
+    a_light_gray_high: int = 254,
+    a_direction_filter: str = "hough",
+    a_text_protect_gray_max: int = 130,
+    a_min_component_area: int = 200,
+    # ── 策略B: diagonal_midtone ──
+    b_enabled: bool = True,
+    b_midtone_low: int = 100,
+    b_midtone_high: int = 230,
+    b_background_threshold: int = 248,
+    # ── 策略C: 背景差异 ──
+    c_enabled: bool = True,
+    c_median_kernel: int = 31,
+    c_diff_low: float = 6.0,
+    c_diff_high: float = 50.0,
+    c_morph_open_kernel: int = 3,
+    c_dilate_radius: int = 0,
+    # ── 通用后处理 ──
+    min_component_area: int = 200,
+    seal_protect: bool = True,
+) -> tuple[np.ndarray, Dict[str, Any]]:
+    """
+    多策略融合水印 mask。
+
+    三种策略 OR 融合 → 连通域过滤 → 公章保护 → 最终 mask。
+
+    Returns:
+        mask:  bool ndarray (H, W)
+        debug: 包含每项策略的中间结果和统计信息
+    """
+    masks: list[np.ndarray] = []
+    debug: Dict[str, Any] = {"strategies": {}, "fused_ratio": None}
+
+    # ── 策略A: light_on_white ──
+    if a_enabled:
+        try:
+            ma, da = _algo.build_watermark_mask(
+                gray,
+                bgr=bgr,
+                mask_mode="light_on_white",
+                light_gray_low=a_light_gray_low,
+                light_gray_high=a_light_gray_high,
+                direction_filter=a_direction_filter,
+                text_protect_gray_max=a_text_protect_gray_max,
+                min_component_area=a_min_component_area,
+                seal_protect=seal_protect,
+            )
+            masks.append(ma)
+            debug["strategies"]["light_on_white"] = {
+                "ratio": float(ma.sum() / gray.size),
+                **{k: v for k, v in da.items() if not isinstance(v, np.ndarray)},
+            }
+        except Exception as e:
+            debug["strategies"]["light_on_white"] = {"error": str(e), "ratio": 0.0}
+
+    # ── 策略B: diagonal_midtone ──
+    if b_enabled:
+        try:
+            mb, db = _algo.build_watermark_mask(
+                gray,
+                bgr=bgr,
+                mask_mode="diagonal_midtone",
+                midtone_low=b_midtone_low,
+                midtone_high=b_midtone_high,
+                background_threshold=b_background_threshold,
+                min_component_area=min_component_area,
+            )
+            masks.append(mb)
+            debug["strategies"]["diagonal_midtone"] = {
+                "ratio": float(mb.sum() / gray.size),
+                **{k: v for k, v in db.items() if not isinstance(v, np.ndarray)},
+            }
+        except Exception as e:
+            debug["strategies"]["diagonal_midtone"] = {"error": str(e), "ratio": 0.0}
+
+    # ── 策略C: 背景差异 ──
+    if c_enabled:
+        try:
+            mc = _build_background_diff_mask(
+                gray,
+                median_kernel=c_median_kernel,
+                diff_low=c_diff_low,
+                diff_high=c_diff_high,
+                morph_open_kernel=c_morph_open_kernel,
+                dilate_radius=c_dilate_radius,
+            )
+            masks.append(mc)
+            debug["strategies"]["background_diff"] = {
+                "ratio": float(mc.sum() / gray.size),
+                "median_kernel": c_median_kernel,
+                "diff_low": c_diff_low,
+                "diff_high": c_diff_high,
+            }
+        except Exception as e:
+            debug["strategies"]["background_diff"] = {"error": str(e), "ratio": 0.0}
+
+    # ── OR 融合 ──
+    fused = np.zeros_like(gray, dtype=bool)
+    for m in masks:
+        fused |= m
+
+    # ── 连通域过滤 ──
+    if min_component_area > 0 and np.any(fused):
+        n_labels, labels, stats, _ = cv2.connectedComponentsWithStats(
+            fused.astype(np.uint8), connectivity=8
+        )
+        filtered = np.zeros_like(fused)
+        for i in range(1, n_labels):
+            if stats[i, cv2.CC_STAT_AREA] >= min_component_area:
+                filtered[labels == i] = True
+        if np.any(filtered):
+            fused = filtered
+
+    # ── 公章保护 ──
+    if seal_protect and bgr is not None and bgr.ndim == 3:
+        seal = _algo._build_seal_protect_mask(bgr)
+        fused &= ~seal
+
+    debug["fused_ratio"] = float(fused.sum() / gray.size)
+    debug["n_strategies"] = len(masks)
+
+    return fused, debug

+ 430 - 0
ocr_tools/lab/watermark_lab/watermark_sweep.py

@@ -0,0 +1,430 @@
+#!/usr/bin/env python3
+"""
+水印 mask 参数网格扫描:对比 light_on_white / diagonal_midtone / fused 三种策略及参数组合。
+
+自动遍历多种参数组合,对每张输入图生成 mask overlay 并写入扫描报告,
+帮助评估哪种参数组合能最好地覆盖水印区域。
+
+用法:
+    cd ocr_platform/ocr_tools/watermark_lab
+
+    # 单张图
+    python watermark_sweep.py ../gan_experiments_lab/test_images/input/彭_广东兴宁农村商业银行_page_002.png
+
+    # 批量扫描目录
+    python watermark_sweep.py ../gan_experiments_lab/test_images/input/
+
+    # 快速模式(缩小参数网格)
+    python watermark_sweep.py ../gan_experiments_lab/test_images/input/彭_广东兴宁农村商业银行_page_002.png --quick
+
+    # 指定输出目录
+    python watermark_sweep.py input.png -o ./my_sweep_out
+
+    # 跳过 mask overlay 图片(仅出 JSON 报告)
+    python watermark_sweep.py input.png --no-save-images
+
+    # 同时运行 LaMa 修复(需要指定权重路径)
+    python watermark_sweep.py input.png --lama-ckpt /Users/zhch158/models/big-lama/models/best.ckpt
+"""
+from __future__ import annotations
+
+import argparse
+import json
+import sys
+import time
+from itertools import product
+from pathlib import Path
+from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple
+
+import cv2
+import numpy as np
+
+_repo_root = Path(__file__).resolve().parents[3]
+if str(_repo_root) not in sys.path:
+    sys.path.insert(0, str(_repo_root))
+
+from loguru import logger
+
+from fused_mask import build_fused_watermark_mask
+
+_IMAGE_SUFFIXES = {".png", ".jpg", ".jpeg", ".bmp", ".tif", ".tiff", ".webp"}
+
+
+# ── 工具函数 ────────────────────────────────────────────────────
+
+
+def _render_mask_overlay(bgr: np.ndarray, mask: np.ndarray, color=(0, 0, 255)) -> np.ndarray:
+    """将 mask 以指定颜色叠加到原图上。"""
+    ov = bgr.copy()
+    ov[mask] = (ov[mask] * 0.4 + np.array(color, dtype=np.float32) * 0.6).astype(np.uint8)
+    return ov
+
+
+def _tag_from_config(cfg: Dict[str, Any]) -> str:
+    """从参数配置生成可读标签。"""
+    mode = cfg.get("mask_mode", "?")
+    parts = [mode]
+
+    if mode in ("light_on_white", "fused"):
+        parts.append(f"l{cfg.get('light_gray_low', '?')}")
+        parts.append(f"t{cfg.get('text_protect', '?')}")
+        parts.append(f"{cfg.get('direction_filter', '?')}")
+
+    parts.append(f"ca{cfg.get('min_component_area', '?')}")
+
+    if mode == "fused":
+        parts.append(f"mk{cfg.get('median_kernel', '?')}")
+
+    return "_".join(parts)
+
+
+# ── 扫描核心 ────────────────────────────────────────────────────
+
+
+def _collect_images(path: Path) -> List[Path]:
+    """收集输入图片。"""
+    if path.is_file():
+        if path.suffix.lower() not in _IMAGE_SUFFIXES:
+            raise ValueError(f"不支持的图像格式: {path}")
+        return [path]
+
+    if not path.is_dir():
+        raise FileNotFoundError(path)
+
+    return sorted(
+        p for p in path.iterdir() if p.is_file() and p.suffix.lower() in _IMAGE_SUFFIXES
+    )
+
+
+def _build_param_grid(quick: bool = False) -> List[Dict[str, Any]]:
+    """
+    构建参数网格。
+
+    三组正交维度:
+      1. mask_mode: light_on_white | diagonal_midtone | fused
+      2. light_on_white / fused 专属: light_gray_low, text_protect, direction_filter
+      3. fused 专属: median_kernel
+      4. 通用: min_component_area
+    """
+    if quick:
+        modes = ["light_on_white", "fused"]
+        light_gray_lows = [200, 236]
+        text_protects = [110, 130]
+        direction_filters = ["none", "hough"]
+        min_areas = [80, 200]
+        median_kernels = [21, 31]
+    else:
+        modes = ["light_on_white", "diagonal_midtone", "fused"]
+        light_gray_lows = [200, 220, 236]
+        text_protects = [110, 130]
+        direction_filters = ["none", "hough"]
+        min_areas = [80, 200, 500]
+        median_kernels = [21, 31, 41]
+
+    grid: List[Dict[str, Any]] = []
+    for mode in modes:
+        if mode == "diagonal_midtone":
+            for ca in min_areas:
+                grid.append({"mask_mode": "diagonal_midtone", "min_component_area": ca})
+        elif mode == "light_on_white":
+            for lgl, tp, df, ca in product(light_gray_lows, text_protects, direction_filters, min_areas):
+                grid.append({
+                    "mask_mode": "light_on_white",
+                    "light_gray_low": lgl,
+                    "text_protect": tp,
+                    "direction_filter": df,
+                    "min_component_area": ca,
+                })
+        else:  # fused
+            for lgl, tp, df, ca, mk in product(
+                light_gray_lows, text_protects, direction_filters, min_areas, median_kernels
+            ):
+                grid.append({
+                    "mask_mode": "fused",
+                    "light_gray_low": lgl,
+                    "text_protect": tp,
+                    "direction_filter": df,
+                    "min_component_area": ca,
+                    "median_kernel": mk,
+                })
+
+    return grid
+
+
+def _build_mask(bgr: np.ndarray, gray: np.ndarray, cfg: Dict[str, Any]) -> Tuple[np.ndarray, Dict[str, Any]]:
+    """根据参数配置构建水印 mask。"""
+    mode = cfg["mask_mode"]
+
+    if mode == "fused":
+        return build_fused_watermark_mask(
+            gray,
+            bgr=bgr,
+            a_enabled=True,
+            a_light_gray_low=cfg["light_gray_low"],
+            a_direction_filter=cfg["direction_filter"],
+            a_text_protect_gray_max=cfg["text_protect"],
+            a_min_component_area=cfg["min_component_area"],
+            b_enabled=True,
+            c_enabled=True,
+            c_median_kernel=cfg["median_kernel"],
+            min_component_area=cfg["min_component_area"],
+            seal_protect=True,
+        )
+
+    # 单策略模式
+    import ocr_utils.watermark.algorithms as _algo
+
+    if mode == "light_on_white":
+        return _algo.build_watermark_mask(
+            gray,
+            bgr=bgr,
+            mask_mode="light_on_white",
+            light_gray_low=cfg["light_gray_low"],
+            direction_filter=cfg["direction_filter"],
+            text_protect_gray_max=cfg["text_protect"],
+            min_component_area=cfg["min_component_area"],
+            seal_protect=True,
+        )
+
+    if mode == "diagonal_midtone":
+        return _algo.build_watermark_mask(
+            gray,
+            bgr=bgr,
+            mask_mode="diagonal_midtone",
+            min_component_area=cfg["min_component_area"],
+        )
+
+    raise ValueError(f"未知 mask_mode: {mode}")
+
+
+def run_sweep(
+    input_path: Path,
+    out_dir: Path,
+    *,
+    quick: bool = False,
+    save_images: bool = True,
+    lama_ckpt: Optional[Path] = None,
+    lama_repo: Optional[Path] = None,
+) -> Dict[str, Any]:
+    """对单张图执行参数网格扫描。"""
+    bgr = cv2.imread(str(input_path))
+    if bgr is None:
+        raise RuntimeError(f"无法读取图像: {input_path}")
+    gray = cv2.cvtColor(bgr, cv2.COLOR_BGR2GRAY)
+
+    stem = input_path.stem
+    img_out = out_dir / stem
+    img_out.mkdir(parents=True, exist_ok=True)
+
+    grid = _build_param_grid(quick=quick)
+    logger.info(f"  {stem}: {len(grid)} 组参数组合")
+
+    # LaMa(按需)
+    lama = None
+    if lama_ckpt:
+        try:
+            from lab.gan_experiments_lab.lama_inpaint import LamaInpainter
+            lama = LamaInpainter(
+                device="cpu",
+                model_ckpt_path=str(lama_ckpt),
+                lama_repo_path=str(lama_repo) if lama_repo else None,
+            )
+            lama.is_available  # 触发检测
+            logger.info(f"  LaMa 后端: {lama._backend}")
+        except Exception as e:
+            logger.warning(f"  加载 LaMa 失败: {e}")
+
+    results: List[Dict[str, Any]] = []
+
+    for cfg in grid:
+        tag = _tag_from_config(cfg)
+        t0 = time.perf_counter()
+        try:
+            mask, debug = _build_mask(bgr, gray, cfg)
+        except Exception as e:
+            logger.warning(f"  [{tag}] 构建 mask 失败: {e}")
+            continue
+
+        elapsed = time.perf_counter() - t0
+        ratio = float(mask.sum() / gray.size)
+
+        row: Dict[str, Any] = {
+            "tag": tag,
+            **cfg,
+            "wm_mask_ratio": round(ratio, 6),
+            "mask_build_time_s": round(elapsed, 3),
+        }
+
+        # 融合模式下记录各策略 ratio
+        if cfg["mask_mode"] == "fused" and "strategies" in debug:
+            for strategy_name, sinfo in debug["strategies"].items():
+                if isinstance(sinfo, dict) and "ratio" in sinfo:
+                    row[f"ratio_{strategy_name}"] = round(sinfo["ratio"], 6)
+
+        # mask overlay 图片
+        if save_images:
+            overlay = _render_mask_overlay(bgr, mask)
+            overlay_path = img_out / f"{tag}_overlay.png"
+            cv2.imwrite(str(overlay_path), overlay)
+            row["overlay_path"] = str(overlay_path)
+
+        # LaMa 修复(如启用)
+        if lama and np.any(mask):
+            try:
+                t1 = time.perf_counter()
+                result = lama.inpaint(bgr, mask)
+                lama_time = time.perf_counter() - t1
+                if result is not None and save_images:
+                    inpaint_path = img_out / f"{tag}_inpainted.png"
+                    cv2.imwrite(str(inpaint_path), result)
+                    row["inpainted_path"] = str(inpaint_path)
+                row["lama_success"] = result is not None
+                row["lama_time_s"] = round(lama_time, 2)
+            except Exception as e:
+                logger.warning(f"  [{tag}] LaMa 修复失败: {e}")
+                row["lama_success"] = False
+
+        results.append(row)
+
+    # ── 排序 ──
+    # 1. 排除异常的 ratio(如 0 或接近全图)
+    reasonable = [r for r in results if 0.005 < r["wm_mask_ratio"] < 0.80]
+
+    # 2. 按 mask_ratio 接近中位数排序(太小的可能漏检,太大的可能过检)
+    if reasonable:
+        ratios = [r["wm_mask_ratio"] for r in reasonable]
+        median_ratio = np.median(ratios)
+        logger.info(f"  ratio 中位数: {median_ratio:.4f}")
+
+        def _score(r: Dict[str, Any]) -> float:
+            return -abs(r["wm_mask_ratio"] - median_ratio)
+
+        reasonable.sort(key=_score, reverse=True)
+        top_n = min(5, len(reasonable))
+        for i, r in enumerate(reasonable[:top_n]):
+            logger.info(
+                f"  Top{i+1}: {r['tag']}  ratio={r['wm_mask_ratio']:.4f}"
+            )
+
+    # ── 写入报告 ──
+    report = {
+        "input": str(input_path),
+        "output_dir": str(img_out),
+        "n_configs_tested": len(results),
+        "n_reasonable": len(reasonable),
+        "median_ratio": round(float(median_ratio), 6) if reasonable else None,
+        "top_results": reasonable[:5] if reasonable else [],
+        "all_results": results,
+    }
+    report_path = img_out / "sweep_report.json"
+    report_path.write_text(
+        json.dumps(report, ensure_ascii=False, indent=2), encoding="utf-8"
+    )
+
+    # CSV 摘要
+    csv_path = img_out / "sweep_summary.csv"
+    if results:
+        csv_keys = [k for k in results[0].keys() if not k.endswith("_path")]
+        lines = [",".join(csv_keys)]
+        for r in results:
+            vals = [str(r.get(k, "")) for k in csv_keys]
+            lines.append(",".join(vals))
+        csv_path.write_text("\n".join(lines), encoding="utf-8")
+
+    logger.info(f"  报告: {report_path}")
+    return report
+
+
+# ── CLI ──────────────────────────────────────────────────────────
+
+
+def _build_arg_parser() -> argparse.ArgumentParser:
+    p = argparse.ArgumentParser(
+        description="水印 mask 参数网格扫描(light_on_white / diagonal_midtone / fused)",
+    )
+    p.add_argument(
+        "input",
+        type=Path,
+        help="单张图片路径或图片目录",
+    )
+    p.add_argument(
+        "-o", "--output",
+        type=Path,
+        default=None,
+        help="输出根目录,默认 input 同级 sweep_out/<stem>",
+    )
+    p.add_argument(
+        "--quick",
+        action="store_true",
+        help="缩小参数网格",
+    )
+    p.add_argument(
+        "--no-save-images",
+        action="store_true",
+        help="不写出 mask overlay 图片(仅 JSON 报告)",
+    )
+    p.add_argument(
+        "--lama-ckpt",
+        type=Path,
+        default=None,
+        help="LaMa 权重文件路径(启用则每组跑 LaMa 修复)",
+    )
+    p.add_argument(
+        "--lama-repo",
+        type=Path,
+        default=None,
+        help="LaMa 仓库路径(用于导入 saicinpainting)",
+    )
+    return p
+
+
+def main(argv: Optional[Sequence[str]] = None) -> None:
+    args = _build_arg_parser().parse_args(argv)
+
+    images = _collect_images(args.input)
+    if not images:
+        raise SystemExit("未找到可扫描的图像")
+
+    if args.output is not None:
+        out_root = args.output
+    elif args.input.is_file():
+        out_root = args.input.parent / "sweep_out"
+    else:
+        out_root = args.input / "sweep_out"
+    out_root.mkdir(parents=True, exist_ok=True)
+
+    logger.info(f"扫描 {len(images)} 张图 -> {out_root}")
+    logger.info(f"  quick={args.quick}")
+
+    summary: List[Dict[str, Any]] = []
+    for img_path in images:
+        logger.info(f"\n=== {img_path.name} ===")
+        report = run_sweep(
+            img_path,
+            out_root,
+            quick=args.quick,
+            save_images=not args.no_save_images,
+            lama_ckpt=args.lama_ckpt,
+            lama_repo=args.lama_repo,
+        )
+        summary.append({
+            "input": report["input"],
+            "n_tested": report["n_configs_tested"],
+            "median_ratio": report["median_ratio"],
+            "report": str(Path(report["output_dir"]) / "sweep_report.json"),
+        })
+
+    index_path = out_root / "sweep_index.json"
+    index_path.write_text(
+        json.dumps(summary, ensure_ascii=False, indent=2), encoding="utf-8"
+    )
+    logger.info(f"\n全部完成。索引: {index_path}")
+    for s in summary:
+        logger.info(
+            f"  {Path(s['input']).name}: "
+            f"{s['n_tested']} 组, median_ratio={s['median_ratio']} -> {s['report']}"
+        )
+
+
+if __name__ == "__main__":
+    main()