"""水印 调试图保存(由 ocr_utils.watermark_utils 迁入)。""" from __future__ import annotations import json import re from pathlib import Path from typing import Any, Dict, Optional, Tuple, Union import cv2 import numpy as np from loguru import logger from PIL import Image from ocr_utils.watermark.removal import render_watermark_mask_overlay def _image_to_bgr_for_debug(img: np.ndarray) -> np.ndarray: """将 ndarray 转为 BGR,供 cv2.imwrite 使用。""" if img.ndim == 2: return cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) out = img.copy() if out.shape[2] == 3: return cv2.cvtColor(out, cv2.COLOR_RGB2BGR) return out def save_watermark_removal_debug( before: Union[np.ndarray, Image.Image], after: Union[np.ndarray, Image.Image], output_dir: Union[str, Path], page_name: str, *, processing_params: Optional[Dict[str, Any]] = None, image_format: str = "png", save_compare: bool = True, subdir: str = "watermark_removal", mask_overlay: Optional[np.ndarray] = None, ) -> Dict[str, str]: """ 保存去水印调试图(before / after / compare / meta.json)。 与 universal_doc_parser 的 module debug 目录结构一致: ``{output_dir}/debug/{subdir}/`` Args: before: 处理前图像(RGB/BGR/灰度) after: 处理后图像 output_dir: 输出根目录(通常为 pipeline 或工具的输出目录) page_name: 文件名前缀(如 ``doc_page_002``) processing_params: 写入 meta.json 的参数(threshold、contrast_enhancement 等) image_format: 图片格式,png/jpg save_compare: 是否保存左右拼接对比图 subdir: debug 根目录下的子目录名(默认 watermark_removal) Returns: 已保存文件路径字典(before/after/compare/meta,未保存的键省略) """ if isinstance(before, Image.Image): before = np.array(before) if isinstance(after, Image.Image): after = np.array(after) from ocr_utils.module_debug_viz import resolve_module_debug_dir debug_dir = resolve_module_debug_dir(output_dir, subdir) fmt = (image_format or "png").lstrip(".") before_bgr = _image_to_bgr_for_debug(before) after_bgr = _image_to_bgr_for_debug(after) paths: Dict[str, str] = {} before_path = debug_dir / f"{page_name}_watermark_before.{fmt}" after_path = debug_dir / f"{page_name}_watermark_after.{fmt}" cv2.imwrite(str(before_path), before_bgr) cv2.imwrite(str(after_path), after_bgr) paths["before"] = str(before_path) paths["after"] = str(after_path) if save_compare: h = max(before_bgr.shape[0], after_bgr.shape[0]) if before_bgr.shape[0] != h: before_bgr = cv2.resize(before_bgr, (before_bgr.shape[1], h)) if after_bgr.shape[0] != h: after_bgr = cv2.resize(after_bgr, (after_bgr.shape[1], h)) compare = np.hstack([before_bgr, after_bgr]) compare_path = debug_dir / f"{page_name}_watermark_compare.{fmt}" cv2.imwrite(str(compare_path), compare) paths["compare"] = str(compare_path) logger.info(f"Saved watermark compare: {compare_path}") if mask_overlay is not None: mask_bgr = _image_to_bgr_for_debug(mask_overlay) mask_path = debug_dir / f"{page_name}_watermark_mask.{fmt}" cv2.imwrite(str(mask_path), mask_bgr) paths["mask"] = str(mask_path) meta: Dict[str, Any] = {"page_name": page_name} if processing_params: _skip_meta = ( "midtone_mask", "wm_mask", "wm_candidate", "geom_region", "geom_candidate", "diag_region", "text_protect", "seal_protect", "hough_lines_bgr", "diag_ratio_heatmap", "hv_ratio_heatmap", ) meta_params = { k: v for k, v in processing_params.items() if k not in _skip_meta } meta.update(meta_params) else: meta.update({}) meta["before"] = paths["before"] meta["after"] = paths["after"] if "compare" in paths: meta["compare"] = paths["compare"] meta_path = debug_dir / f"{page_name}_watermark_meta.json" meta_path.write_text(json.dumps(meta, ensure_ascii=False, indent=2), encoding="utf-8") paths["meta"] = str(meta_path) logger.info(f"Saved watermark debug: {before_path}, {after_path}") return paths