瀏覽代碼

feat(优化水印去除和方向校正): 增强remove_watermark方法以支持可选对比度增强,改进correct_orientation方法以处理PDF旋转和方向分类器,优化process方法以支持裁剪块处理,提升OCR图像预处理的灵活性和准确性。

zhch158_admin 5 天之前
父節點
當前提交
57178ab8f2
共有 1 個文件被更改,包括 60 次插入21 次删除
  1. 60 21
      ocr_tools/universal_doc_parser/models/adapters/mineru_adapter.py

+ 60 - 21
ocr_tools/universal_doc_parser/models/adapters/mineru_adapter.py

@@ -59,56 +59,95 @@ class MinerUPreprocessor(BasePreprocessor):
         pass
 
     def remove_watermark(self, image: Union[np.ndarray, Image.Image]) -> np.ndarray:
-        """页级水印去除(应在整页图像上调用一次,勿对裁剪块重复调用)。"""
+        """页级水印去除 + 可选对比度增强(整页调用一次,勿对裁剪块重复)。"""
         if isinstance(image, Image.Image):
             image = np.array(image)
 
         watermark_cfg = self.config.get('watermark_removal', {})
-        if not watermark_cfg.get('enabled', False):
+        wm_enabled = bool(watermark_cfg.get('enabled', False))
+        contrast_cfg = watermark_cfg.get('contrast_enhancement', {})
+        contrast_enabled = bool(
+            contrast_cfg.get('enabled', False) if isinstance(contrast_cfg, dict) else False
+        )
+
+        if not wm_enabled and not contrast_enabled:
             return image
 
         threshold = watermark_cfg.get('threshold', 160)
         morph_close_kernel = watermark_cfg.get('morph_close_kernel', 0)
+        before_image = image.copy()
         try:
             cleaned = remove_watermark_from_image_rgb(
                 image,
                 threshold=threshold,
                 morph_close_kernel=morph_close_kernel,
                 return_pil=False,
+                contrast_enhancement=contrast_cfg if isinstance(contrast_cfg, dict) else None,
+                apply_watermark_removal=wm_enabled,
+                watermark_removal_cfg=watermark_cfg,
             )
-            logger.info(f"🧹 Watermark removed (threshold={threshold})")
-            return cleaned
+            if wm_enabled:
+                method = watermark_cfg.get("method", "threshold")
+                logger.info(
+                    f"🧹 Watermark removed (method={method}, threshold={threshold})"
+                )
+            if contrast_enabled:
+                method = contrast_cfg.get('method', 'clahe') if isinstance(contrast_cfg, dict) else 'clahe'
+                logger.info(f"📈 Contrast enhanced (method={method})")
+            if self._is_watermark_debug_enabled():
+                try:
+                    self._save_watermark_debug_images(
+                        before_image,
+                        np.array(cleaned),
+                        threshold,
+                        morph_close_kernel,
+                        contrast_cfg if isinstance(contrast_cfg, dict) else None,
+                    )
+                except Exception as dbg_e:
+                    logger.warning(f"⚠️ Watermark debug save failed: {dbg_e}")
+            return np.array(cleaned)
         except Exception as e:
-            logger.warning(f"⚠️ Watermark removal failed, using original: {e}")
+            logger.warning(f"⚠️ Watermark/contrast preprocessing failed, using original: {e}")
             return image
 
-    def process(
+    def correct_orientation(
         self,
         image: Union[np.ndarray, Image.Image],
-        skip_watermark: bool = False,
+        *,
+        pdf_rotate_angle: Optional[int] = None,
+        use_orientation_classifier: bool = True,
     ) -> tuple[np.ndarray, int]:
-        """图像预处理:可选水印去除 + 方向校正。
-
-        Args:
-            image: 输入图像
-            skip_watermark: 为 True 时跳过水印(页级已去水印或裁剪块场景)
-        """
+        """方向校正(PDF 元数据旋转或 MinerU 方向分类器)。"""
         if isinstance(image, Image.Image):
             image = np.array(image)
 
-        rotate_angle = 0
-        processed_image = image if skip_watermark else self.remove_watermark(image)
+        if pdf_rotate_angle:
+            return super().correct_orientation(
+                image,
+                pdf_rotate_angle=pdf_rotate_angle,
+                use_orientation_classifier=False,
+            )
 
-        # 方向校正
-        if self.orientation_classifier is not None:
+        rotate_angle = 0
+        if use_orientation_classifier and self.orientation_classifier is not None:
             try:
-                rotate_angle = int(self.orientation_classifier.predict(processed_image))
-                processed_image = self._apply_rotation(processed_image, rotate_angle)
+                rotate_angle = int(self.orientation_classifier.predict(image))
+                image = self._apply_rotation(image, rotate_angle)
                 logger.info(f"📐 Applied rotation: {rotate_angle}")
             except Exception as e:
                 logger.error(f"⚠️ Orientation classification failed: {e}")
 
-        return processed_image, rotate_angle
+        return image, rotate_angle
+
+    def process(
+        self,
+        image: Union[np.ndarray, Image.Image],
+        skip_watermark: bool = False,
+    ) -> tuple[np.ndarray, int]:
+        """裁剪块仅方向校正;页级请用 prepare_detection_image()。"""
+        if skip_watermark:
+            return self.correct_orientation(image, use_orientation_classifier=True)
+        return self.prepare_detection_image(image, use_orientation_classifier=True)
 
 class MinerULayoutDetector(BaseLayoutDetector):
     """MinerU版式检测适配器"""
@@ -653,7 +692,7 @@ class MinerUOCRRecognizer(BaseOCRRecognizer):
                             'text': item[1][0],  # 识别文本
                             'confidence': item[1][1]  # 置信度
                         })
-                        
+
             return formatted_results
             
         except Exception as e: