Selaa lähdekoodia

feat(增强水印去除工具的配置与处理能力): 更新remove_watermark.py,重构水印设置加载逻辑,支持根据scope参数选择不同的水印配置,新增WatermarkProcessor类以优化水印去除过程,提升OCR处理的灵活性和准确性。

zhch158_admin 3 päivää sitten
vanhempi
commit
0ba1d33741
1 muutettua tiedostoa jossa 55 lisäystä ja 29 poistoa
  1. 55 29
      ocr_tools/remove_watermark_tool/remove_watermark.py

+ 55 - 29
ocr_tools/remove_watermark_tool/remove_watermark.py

@@ -45,14 +45,15 @@ if str(_repo_root) not in sys.path:
     sys.path.insert(0, str(_repo_root))
 
 from loguru import logger
-from ocr_utils.watermark_utils import (
+from ocr_utils.watermark import (
+    WatermarkProcessor,
     detect_watermark,
-    remove_watermark_from_image_rgb,
+    merge_watermark_config,
+    remove_txt_pdf_watermark,
     render_watermark_mask_overlay,
-    save_watermark_removal_debug,
     save_watermark_mask_debug_layers,
+    save_watermark_removal_debug,
     scan_pdf_watermark_xobjs,
-    remove_txt_pdf_watermark,
 )
 
 # 支持的图片后缀(小写)
@@ -72,6 +73,7 @@ class WatermarkToolSettings:
     morph_close_kernel: int = 0
     dpi: int = 200
     method: str = "threshold"
+    scope: str = "page"
     contrast_enhancement: Optional[Dict[str, Any]] = None
     debug_options: Optional[Dict[str, Any]] = None
     watermark_enabled: bool = True
@@ -83,11 +85,15 @@ class WatermarkToolSettings:
         return str(opts.get("image_format") or "png").lstrip(".")
 
 
-def load_watermark_settings(config_path: Path) -> WatermarkToolSettings:
+def load_watermark_settings(
+    config_path: Path,
+    *,
+    scope: str = "page",
+) -> WatermarkToolSettings:
     """
     从 universal_doc_parser 场景配置读取 preprocessor.watermark_removal 与 input.dpi。
 
-    不依赖完整 ConfigManager,避免仅调试水印时强依赖 layout/ocr 等段。
+    scope=cell 时读取 table_recognition_wired.second_pass_ocr.cell_preprocess.watermark
     """
     config_path = Path(config_path)
     if not config_path.is_file():
@@ -96,25 +102,33 @@ def load_watermark_settings(config_path: Path) -> WatermarkToolSettings:
     with open(config_path, encoding="utf-8") as f:
         raw = yaml.safe_load(f) or {}
 
-    preprocessor = raw.get("preprocessor") or {}
-    wm = preprocessor.get("watermark_removal") or {}
     input_cfg = raw.get("input") or {}
+    if scope == "cell":
+        wired = raw.get("table_recognition_wired") or {}
+        sp = wired.get("second_pass_ocr") or {}
+        cpp = sp.get("cell_preprocess") or {}
+        wm_user = cpp.get("watermark") or {}
+        wm_full = merge_watermark_config("cell", wm_user)
+    else:
+        preprocessor = raw.get("preprocessor") or {}
+        wm_user = preprocessor.get("watermark_removal") or {}
+        wm_full = merge_watermark_config("page", wm_user)
 
-    contrast = wm.get("contrast_enhancement")
+    contrast = wm_full.get("contrast_enhancement")
     if contrast is not None and not isinstance(contrast, dict):
         contrast = None
 
-    wm_full = copy.deepcopy(wm)
     return WatermarkToolSettings(
-        threshold=int(wm.get("threshold", 160)),
-        morph_close_kernel=int(wm.get("morph_close_kernel", 0)),
+        threshold=int(wm_full.get("threshold", 160)),
+        morph_close_kernel=int(wm_full.get("morph_close_kernel", 0)),
         dpi=int(input_cfg.get("dpi", 200)),
-        method=str(wm.get("method") or "threshold"),
+        method=str(wm_full.get("method") or "masked_adaptive"),
+        scope=scope,
         contrast_enhancement=copy.deepcopy(contrast) if contrast else None,
-        debug_options=copy.deepcopy(wm.get("debug_options"))
-        if wm.get("debug_options")
+        debug_options=copy.deepcopy(wm_full.get("debug_options"))
+        if wm_full.get("debug_options")
         else None,
-        watermark_enabled=bool(wm.get("enabled", True)),
+        watermark_enabled=bool(wm_full.get("enabled", True)),
         watermark_config=wm_full,
     )
 
@@ -122,6 +136,7 @@ def load_watermark_settings(config_path: Path) -> WatermarkToolSettings:
 def resolve_watermark_settings(
     config_path: Path,
     *,
+    scope: str = "page",
     threshold: Optional[int] = None,
     morph_close_kernel: Optional[int] = None,
     dpi: Optional[int] = None,
@@ -130,7 +145,7 @@ def resolve_watermark_settings(
     method: Optional[str] = None,
 ) -> WatermarkToolSettings:
     """加载配置并应用命令行覆盖。"""
-    settings = load_watermark_settings(config_path)
+    settings = load_watermark_settings(config_path, scope=scope)
 
     if threshold is not None:
         settings.threshold = threshold
@@ -176,21 +191,19 @@ def _apply_image_watermark_removal(
     contrast_enhancement: Optional[Dict[str, Any]] = None,
     apply_watermark_removal: bool = True,
     removal_debug: Optional[Dict[str, Any]] = None,
+    scope: str = "page",
 ) -> np.ndarray:
     """与 universal_doc_parser 一致的 RGB 去水印 + 可选对比度增强。"""
-    wm_cfg = _watermark_removal_cfg_for_method(settings, settings.method)
-    return np.asarray(
-        remove_watermark_from_image_rgb(
-            img_np,
-            threshold=settings.threshold,
-            morph_close_kernel=settings.morph_close_kernel,
-            contrast_enhancement=contrast_enhancement,
-            apply_watermark_removal=apply_watermark_removal,
-            watermark_removal_cfg=wm_cfg,
-            removal_debug=removal_debug,
-            return_pil=False,
-        )
+    proc = WatermarkProcessor(settings.watermark_config or {}, scope=scope)  # type: ignore[arg-type]
+    apply_contrast = contrast_enhancement is not None
+    cleaned, _ = proc.process(
+        img_np,
+        apply_removal=apply_watermark_removal,
+        contrast_override=contrast_enhancement,
+        removal_debug=removal_debug,
+        force=scope == "cell",
     )
+    return np.asarray(cleaned)
 
 
 def _active_contrast_enhancement(
@@ -418,6 +431,7 @@ def process_document(
                     contrast_enhancement=contrast_enhancement,
                     apply_watermark_removal=apply_watermark_removal,
                     removal_debug=removal_dbg,
+                    scope=settings.scope,
                 )
                 if save_debug:
                     _maybe_save_watermark_debug(
@@ -459,6 +473,7 @@ def process_document(
             contrast_enhancement=contrast_enhancement,
             apply_watermark_removal=apply_watermark_removal,
             removal_debug=removal_dbg,
+            scope=settings.scope,
         )
         if save_debug:
             _maybe_save_watermark_debug(
@@ -519,6 +534,7 @@ def preview_page(
         settings=settings,
         contrast_enhancement=contrast,
         apply_watermark_removal=settings.watermark_enabled,
+        scope=settings.scope,
     )
     cleaned = cv2.cvtColor(cleaned_rgb, cv2.COLOR_BGR2GRAY)
 
@@ -584,11 +600,13 @@ def compare_watermark_methods(
         sub = copy.deepcopy(settings)
         sub.method = method
         dbg: Dict[str, Any] = {}
+        sub.watermark_config = _watermark_removal_cfg_for_method(sub, method)
         out = _apply_image_watermark_removal(
             img_rgb,
             settings=sub,
             contrast_enhancement=contrast,
             removal_debug=dbg,
+            scope=settings.scope,
         )
         out_rgb = cv2.cvtColor(out, cv2.COLOR_BGR2RGB)
         results[method] = out_rgb
@@ -726,6 +744,13 @@ def main():
         help="覆盖 watermark_removal.method",
     )
     parser.add_argument(
+        "--scope",
+        type=str,
+        default="page",
+        choices=["page", "cell"],
+        help="page=页级 preprocessor;cell=二次 OCR 单元格 preset",
+    )
+    parser.add_argument(
         "--compare-methods",
         action="store_true",
         help="对比 threshold 与 masked_adaptive,输出三联图到 -o 目录",
@@ -736,6 +761,7 @@ def main():
     try:
         settings = resolve_watermark_settings(
             args.config,
+            scope=args.scope,
             threshold=args.threshold,
             morph_close_kernel=args.morph_kernel,
             dpi=args.dpi,