Просмотр исходного кода

feat(优化水印处理): 在图像预处理流程中添加页级水印去除功能,更新相关处理器以支持跳过水印选项,提升表格方向校正的准确性。

zhch158_admin 2 дней назад
Родитель
Сommit
f32733271c

+ 4 - 2
ocr_tools/universal_doc_parser/core/element_processors.py

@@ -250,9 +250,11 @@ class ElementProcessors:
         
         table_angle = 0
         
-        # 1. 表格方向检测
+        # 1. 表格方向检测(页级已去水印,此处仅校正表格局部方向)
         try:
-            rotated_table, table_angle = self.preprocessor.process(cropped_table)
+            rotated_table, table_angle = self.preprocessor.process(
+                cropped_table, skip_watermark=True
+            )
             if table_angle != 0:
                 logger.info(f"📐 Table rotated {table_angle}°")
                 cropped_table = rotated_table  # cropped_table 现在是旋转后的图像

+ 6 - 1
ocr_tools/universal_doc_parser/core/pipeline_manager_v2.py

@@ -395,13 +395,18 @@ class EnhancedDocPipeline:
         # 用于检测的图片(可能被旋转)
         detection_image = original_image.copy()
         rotate_angle = 0
+
+        # 0. 页级水印去除(全页一次;表格裁剪等下游仅做方向校正,避免重复去水印)
+        detection_image = self.preprocessor.remove_watermark(detection_image)
         
         # 1. 页面方向识别
         # rotate_angle统一定义:图像需要逆时针旋转的角度(0/90/180/270)来变为正视
         if pdf_type == 'ocr':
             # 扫描件:使用OCR方向识别
             try:
-                detection_image, rotate_angle = self.preprocessor.process(original_image)
+                detection_image, rotate_angle = self.preprocessor.process(
+                    detection_image, skip_watermark=True
+                )
                 page_result['angle'] = rotate_angle
                 
                 if rotate_angle != 0:

+ 11 - 1
ocr_tools/universal_doc_parser/models/adapters/base.py

@@ -26,8 +26,18 @@ class BaseAdapter(ABC):
 class BasePreprocessor(BaseAdapter):
     """预处理器基类"""
     
+    def remove_watermark(self, image: Union[np.ndarray, Image.Image]) -> np.ndarray:
+        """页级水印去除(默认无操作,子类可覆盖)。"""
+        if isinstance(image, Image.Image):
+            return np.array(image)
+        return image
+
     @abstractmethod
-    def process(self, image: Union[np.ndarray, Image.Image]) -> tuple[np.ndarray, int]:
+    def process(
+        self,
+        image: Union[np.ndarray, Image.Image],
+        skip_watermark: bool = False,
+    ) -> tuple[np.ndarray, int]:
         """
         处理图像
         返回处理后的图像和旋转角度

+ 36 - 21
ocr_tools/universal_doc_parser/models/adapters/mineru_adapter.py

@@ -58,31 +58,46 @@ class MinerUPreprocessor(BasePreprocessor):
         """清理资源"""
         pass
 
-    def process(self, image: Union[np.ndarray, Image.Image]) -> tuple[np.ndarray, int]:
-        """图像预处理"""
-        # 转换为numpy数组
+    def remove_watermark(self, image: Union[np.ndarray, Image.Image]) -> np.ndarray:
+        """页级水印去除(应在整页图像上调用一次,勿对裁剪块重复调用)。"""
         if isinstance(image, Image.Image):
             image = np.array(image)
 
-        rotate_angle = 0
-        processed_image = image
-
-        # 水印去除(在方向校正之前,避免旋转引入额外噪声)
         watermark_cfg = self.config.get('watermark_removal', {})
-        if watermark_cfg.get('enabled', False):
-            threshold = watermark_cfg.get('threshold', 160)
-            morph_close_kernel = watermark_cfg.get('morph_close_kernel', 0)
-            try:
-                processed_image = remove_watermark_from_image_rgb(
-                    processed_image,
-                    threshold=threshold,
-                    morph_close_kernel=morph_close_kernel,
-                    return_pil=False,
-                )
-                logger.info(f"🧹 Watermark removed (threshold={threshold})")
-            except Exception as e:
-                logger.warning(f"⚠️ Watermark removal failed, using original: {e}")
-                processed_image = image
+        if not watermark_cfg.get('enabled', False):
+            return image
+
+        threshold = watermark_cfg.get('threshold', 160)
+        morph_close_kernel = watermark_cfg.get('morph_close_kernel', 0)
+        try:
+            cleaned = remove_watermark_from_image_rgb(
+                image,
+                threshold=threshold,
+                morph_close_kernel=morph_close_kernel,
+                return_pil=False,
+            )
+            logger.info(f"🧹 Watermark removed (threshold={threshold})")
+            return cleaned
+        except Exception as e:
+            logger.warning(f"⚠️ Watermark removal failed, using original: {e}")
+            return image
+
+    def process(
+        self,
+        image: Union[np.ndarray, Image.Image],
+        skip_watermark: bool = False,
+    ) -> tuple[np.ndarray, int]:
+        """图像预处理:可选水印去除 + 方向校正。
+
+        Args:
+            image: 输入图像
+            skip_watermark: 为 True 时跳过水印(页级已去水印或裁剪块场景)
+        """
+        if isinstance(image, Image.Image):
+            image = np.array(image)
+
+        rotate_angle = 0
+        processed_image = image if skip_watermark else self.remove_watermark(image)
 
         # 方向校正
         if self.orientation_classifier is not None: