debug.py 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129
  1. """水印 调试图保存(由 ocr_utils.watermark_utils 迁入)。"""
  2. from __future__ import annotations
  3. import json
  4. import re
  5. from pathlib import Path
  6. from typing import Any, Dict, Optional, Tuple, Union
  7. import cv2
  8. import numpy as np
  9. from loguru import logger
  10. from PIL import Image
  11. from ocr_utils.watermark.removal import render_watermark_mask_overlay
  12. def _image_to_bgr_for_debug(img: np.ndarray) -> np.ndarray:
  13. """将 ndarray 转为 BGR,供 cv2.imwrite 使用。"""
  14. if img.ndim == 2:
  15. return cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
  16. out = img.copy()
  17. if out.shape[2] == 3:
  18. return cv2.cvtColor(out, cv2.COLOR_RGB2BGR)
  19. return out
  20. def save_watermark_removal_debug(
  21. before: Union[np.ndarray, Image.Image],
  22. after: Union[np.ndarray, Image.Image],
  23. output_dir: Union[str, Path],
  24. page_name: str,
  25. *,
  26. processing_params: Optional[Dict[str, Any]] = None,
  27. image_format: str = "png",
  28. save_compare: bool = True,
  29. subdir: str = "watermark_removal",
  30. mask_overlay: Optional[np.ndarray] = None,
  31. ) -> Dict[str, str]:
  32. """
  33. 保存去水印调试图(before / after / compare / meta.json)。
  34. 与 universal_doc_parser 的 module debug 目录结构一致:
  35. ``{output_dir}/debug/{subdir}/``
  36. Args:
  37. before: 处理前图像(RGB/BGR/灰度)
  38. after: 处理后图像
  39. output_dir: 输出根目录(通常为 pipeline 或工具的输出目录)
  40. page_name: 文件名前缀(如 ``doc_page_002``)
  41. processing_params: 写入 meta.json 的参数(threshold、contrast_enhancement 等)
  42. image_format: 图片格式,png/jpg
  43. save_compare: 是否保存左右拼接对比图
  44. subdir: debug 根目录下的子目录名(默认 watermark_removal)
  45. Returns:
  46. 已保存文件路径字典(before/after/compare/meta,未保存的键省略)
  47. """
  48. if isinstance(before, Image.Image):
  49. before = np.array(before)
  50. if isinstance(after, Image.Image):
  51. after = np.array(after)
  52. from ocr_utils.module_debug_viz import resolve_module_debug_dir
  53. debug_dir = resolve_module_debug_dir(output_dir, subdir)
  54. fmt = (image_format or "png").lstrip(".")
  55. before_bgr = _image_to_bgr_for_debug(before)
  56. after_bgr = _image_to_bgr_for_debug(after)
  57. paths: Dict[str, str] = {}
  58. before_path = debug_dir / f"{page_name}_watermark_before.{fmt}"
  59. after_path = debug_dir / f"{page_name}_watermark_after.{fmt}"
  60. cv2.imwrite(str(before_path), before_bgr)
  61. cv2.imwrite(str(after_path), after_bgr)
  62. paths["before"] = str(before_path)
  63. paths["after"] = str(after_path)
  64. if save_compare:
  65. h = max(before_bgr.shape[0], after_bgr.shape[0])
  66. if before_bgr.shape[0] != h:
  67. before_bgr = cv2.resize(before_bgr, (before_bgr.shape[1], h))
  68. if after_bgr.shape[0] != h:
  69. after_bgr = cv2.resize(after_bgr, (after_bgr.shape[1], h))
  70. compare = np.hstack([before_bgr, after_bgr])
  71. compare_path = debug_dir / f"{page_name}_watermark_compare.{fmt}"
  72. cv2.imwrite(str(compare_path), compare)
  73. paths["compare"] = str(compare_path)
  74. logger.info(f"Saved watermark compare: {compare_path}")
  75. if mask_overlay is not None:
  76. mask_bgr = _image_to_bgr_for_debug(mask_overlay)
  77. mask_path = debug_dir / f"{page_name}_watermark_mask.{fmt}"
  78. cv2.imwrite(str(mask_path), mask_bgr)
  79. paths["mask"] = str(mask_path)
  80. meta: Dict[str, Any] = {"page_name": page_name}
  81. if processing_params:
  82. _skip_meta = (
  83. "midtone_mask",
  84. "wm_mask",
  85. "wm_candidate",
  86. "geom_region",
  87. "geom_candidate",
  88. "diag_region",
  89. "text_protect",
  90. "seal_protect",
  91. "hough_lines_bgr",
  92. "diag_ratio_heatmap",
  93. "hv_ratio_heatmap",
  94. )
  95. meta_params = {
  96. k: v
  97. for k, v in processing_params.items()
  98. if k not in _skip_meta
  99. }
  100. meta.update(meta_params)
  101. else:
  102. meta.update({})
  103. meta["before"] = paths["before"]
  104. meta["after"] = paths["after"]
  105. if "compare" in paths:
  106. meta["compare"] = paths["compare"]
  107. meta_path = debug_dir / f"{page_name}_watermark_meta.json"
  108. meta_path.write_text(json.dumps(meta, ensure_ascii=False, indent=2), encoding="utf-8")
  109. paths["meta"] = str(meta_path)
  110. logger.info(f"Saved watermark debug: {before_path}, {after_path}")
  111. return paths