|
|
@@ -59,56 +59,95 @@ class MinerUPreprocessor(BasePreprocessor):
|
|
|
pass
|
|
|
|
|
|
def remove_watermark(self, image: Union[np.ndarray, Image.Image]) -> np.ndarray:
|
|
|
- """页级水印去除(应在整页图像上调用一次,勿对裁剪块重复调用)。"""
|
|
|
+ """页级水印去除 + 可选对比度增强(整页调用一次,勿对裁剪块重复)。"""
|
|
|
if isinstance(image, Image.Image):
|
|
|
image = np.array(image)
|
|
|
|
|
|
watermark_cfg = self.config.get('watermark_removal', {})
|
|
|
- if not watermark_cfg.get('enabled', False):
|
|
|
+ wm_enabled = bool(watermark_cfg.get('enabled', False))
|
|
|
+ contrast_cfg = watermark_cfg.get('contrast_enhancement', {})
|
|
|
+ contrast_enabled = bool(
|
|
|
+ contrast_cfg.get('enabled', False) if isinstance(contrast_cfg, dict) else False
|
|
|
+ )
|
|
|
+
|
|
|
+ if not wm_enabled and not contrast_enabled:
|
|
|
return image
|
|
|
|
|
|
threshold = watermark_cfg.get('threshold', 160)
|
|
|
morph_close_kernel = watermark_cfg.get('morph_close_kernel', 0)
|
|
|
+ before_image = image.copy()
|
|
|
try:
|
|
|
cleaned = remove_watermark_from_image_rgb(
|
|
|
image,
|
|
|
threshold=threshold,
|
|
|
morph_close_kernel=morph_close_kernel,
|
|
|
return_pil=False,
|
|
|
+ contrast_enhancement=contrast_cfg if isinstance(contrast_cfg, dict) else None,
|
|
|
+ apply_watermark_removal=wm_enabled,
|
|
|
+ watermark_removal_cfg=watermark_cfg,
|
|
|
)
|
|
|
- logger.info(f"🧹 Watermark removed (threshold={threshold})")
|
|
|
- return cleaned
|
|
|
+ if wm_enabled:
|
|
|
+ method = watermark_cfg.get("method", "threshold")
|
|
|
+ logger.info(
|
|
|
+ f"🧹 Watermark removed (method={method}, threshold={threshold})"
|
|
|
+ )
|
|
|
+ if contrast_enabled:
|
|
|
+ method = contrast_cfg.get('method', 'clahe') if isinstance(contrast_cfg, dict) else 'clahe'
|
|
|
+ logger.info(f"📈 Contrast enhanced (method={method})")
|
|
|
+ if self._is_watermark_debug_enabled():
|
|
|
+ try:
|
|
|
+ self._save_watermark_debug_images(
|
|
|
+ before_image,
|
|
|
+ np.array(cleaned),
|
|
|
+ threshold,
|
|
|
+ morph_close_kernel,
|
|
|
+ contrast_cfg if isinstance(contrast_cfg, dict) else None,
|
|
|
+ )
|
|
|
+ except Exception as dbg_e:
|
|
|
+ logger.warning(f"⚠️ Watermark debug save failed: {dbg_e}")
|
|
|
+ return np.array(cleaned)
|
|
|
except Exception as e:
|
|
|
- logger.warning(f"⚠️ Watermark removal failed, using original: {e}")
|
|
|
+ logger.warning(f"⚠️ Watermark/contrast preprocessing failed, using original: {e}")
|
|
|
return image
|
|
|
|
|
|
- def process(
|
|
|
+ def correct_orientation(
|
|
|
self,
|
|
|
image: Union[np.ndarray, Image.Image],
|
|
|
- skip_watermark: bool = False,
|
|
|
+ *,
|
|
|
+ pdf_rotate_angle: Optional[int] = None,
|
|
|
+ use_orientation_classifier: bool = True,
|
|
|
) -> tuple[np.ndarray, int]:
|
|
|
- """图像预处理:可选水印去除 + 方向校正。
|
|
|
-
|
|
|
- Args:
|
|
|
- image: 输入图像
|
|
|
- skip_watermark: 为 True 时跳过水印(页级已去水印或裁剪块场景)
|
|
|
- """
|
|
|
+ """方向校正(PDF 元数据旋转或 MinerU 方向分类器)。"""
|
|
|
if isinstance(image, Image.Image):
|
|
|
image = np.array(image)
|
|
|
|
|
|
- rotate_angle = 0
|
|
|
- processed_image = image if skip_watermark else self.remove_watermark(image)
|
|
|
+ if pdf_rotate_angle:
|
|
|
+ return super().correct_orientation(
|
|
|
+ image,
|
|
|
+ pdf_rotate_angle=pdf_rotate_angle,
|
|
|
+ use_orientation_classifier=False,
|
|
|
+ )
|
|
|
|
|
|
- # 方向校正
|
|
|
- if self.orientation_classifier is not None:
|
|
|
+ rotate_angle = 0
|
|
|
+ if use_orientation_classifier and self.orientation_classifier is not None:
|
|
|
try:
|
|
|
- rotate_angle = int(self.orientation_classifier.predict(processed_image))
|
|
|
- processed_image = self._apply_rotation(processed_image, rotate_angle)
|
|
|
+ rotate_angle = int(self.orientation_classifier.predict(image))
|
|
|
+ image = self._apply_rotation(image, rotate_angle)
|
|
|
logger.info(f"📐 Applied rotation: {rotate_angle}")
|
|
|
except Exception as e:
|
|
|
logger.error(f"⚠️ Orientation classification failed: {e}")
|
|
|
|
|
|
- return processed_image, rotate_angle
|
|
|
+ return image, rotate_angle
|
|
|
+
|
|
|
+ def process(
|
|
|
+ self,
|
|
|
+ image: Union[np.ndarray, Image.Image],
|
|
|
+ skip_watermark: bool = False,
|
|
|
+ ) -> tuple[np.ndarray, int]:
|
|
|
+ """裁剪块仅方向校正;页级请用 prepare_detection_image()。"""
|
|
|
+ if skip_watermark:
|
|
|
+ return self.correct_orientation(image, use_orientation_classifier=True)
|
|
|
+ return self.prepare_detection_image(image, use_orientation_classifier=True)
|
|
|
|
|
|
class MinerULayoutDetector(BaseLayoutDetector):
|
|
|
"""MinerU版式检测适配器"""
|
|
|
@@ -653,7 +692,7 @@ class MinerUOCRRecognizer(BaseOCRRecognizer):
|
|
|
'text': item[1][0], # 识别文本
|
|
|
'confidence': item[1][1] # 置信度
|
|
|
})
|
|
|
-
|
|
|
+
|
|
|
return formatted_results
|
|
|
|
|
|
except Exception as e:
|