|
@@ -18,6 +18,7 @@ if str(ocr_platform_root) not in sys.path:
|
|
|
|
|
|
|
|
from .base import BasePreprocessor, BaseLayoutDetector, BaseVLRecognizer, BaseOCRRecognizer
|
|
from .base import BasePreprocessor, BaseLayoutDetector, BaseVLRecognizer, BaseOCRRecognizer
|
|
|
from ocr_utils.coordinate_utils import CoordinateUtils
|
|
from ocr_utils.coordinate_utils import CoordinateUtils
|
|
|
|
|
+from ocr_utils.watermark_utils import remove_watermark_from_image_rgb
|
|
|
|
|
|
|
|
# 导入MinerU组件
|
|
# 导入MinerU组件
|
|
|
try:
|
|
try:
|
|
@@ -65,11 +66,28 @@ class MinerUPreprocessor(BasePreprocessor):
|
|
|
|
|
|
|
|
rotate_angle = 0
|
|
rotate_angle = 0
|
|
|
processed_image = image
|
|
processed_image = image
|
|
|
-
|
|
|
|
|
|
|
+
|
|
|
|
|
+ # 水印去除(在方向校正之前,避免旋转引入额外噪声)
|
|
|
|
|
+ watermark_cfg = self.config.get('watermark_removal', {})
|
|
|
|
|
+ if watermark_cfg.get('enabled', False):
|
|
|
|
|
+ threshold = watermark_cfg.get('threshold', 160)
|
|
|
|
|
+ morph_close_kernel = watermark_cfg.get('morph_close_kernel', 0)
|
|
|
|
|
+ try:
|
|
|
|
|
+ processed_image = remove_watermark_from_image_rgb(
|
|
|
|
|
+ processed_image,
|
|
|
|
|
+ threshold=threshold,
|
|
|
|
|
+ morph_close_kernel=morph_close_kernel,
|
|
|
|
|
+ return_pil=False,
|
|
|
|
|
+ )
|
|
|
|
|
+ logger.info(f"🧹 Watermark removed (threshold={threshold})")
|
|
|
|
|
+ except Exception as e:
|
|
|
|
|
+ logger.warning(f"⚠️ Watermark removal failed, using original: {e}")
|
|
|
|
|
+ processed_image = image
|
|
|
|
|
+
|
|
|
# 方向校正
|
|
# 方向校正
|
|
|
if self.orientation_classifier is not None:
|
|
if self.orientation_classifier is not None:
|
|
|
try:
|
|
try:
|
|
|
- rotate_angle = int(self.orientation_classifier.predict(image))
|
|
|
|
|
|
|
+ rotate_angle = int(self.orientation_classifier.predict(processed_image))
|
|
|
processed_image = self._apply_rotation(processed_image, rotate_angle)
|
|
processed_image = self._apply_rotation(processed_image, rotate_angle)
|
|
|
logger.info(f"📐 Applied rotation: {rotate_angle}")
|
|
logger.info(f"📐 Applied rotation: {rotate_angle}")
|
|
|
except Exception as e:
|
|
except Exception as e:
|