| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145 |
- """
- 增强版文档预处理 Pipeline - 独立版本
- """
- import cv2
- import numpy as np
- from pathlib import Path
- from typing import Union, List
- from dataclasses import dataclass, field
- from orientation_classifier_v2 import OrientationClassifierV2, OrientationResult
- @dataclass
- class DocPreprocessResult:
- """文档预处理结果"""
- input_path: str = None
- original_shape: tuple = field(default_factory=tuple)
- processed_shape: tuple = field(default_factory=tuple)
- processed_image: np.ndarray = None
-
- # 旋转信息
- orientation_result: OrientationResult = None
- rotated: bool = False
-
- def __str__(self):
- lines = [
- f"DocPreprocessResult:",
- f" Input: {Path(self.input_path).name if self.input_path else 'numpy array'}",
- f" Original: {self.original_shape}",
- f" Processed: {self.processed_shape}",
- ]
-
- if self.orientation_result:
- lines.append(f" Rotation: {self.orientation_result.rotation_angle}° (conf={self.orientation_result.confidence:.3f})")
- lines.append(f" Rotated: {self.rotated}")
- lines.append(f" Vertical texts: {self.orientation_result.vertical_text_count}")
-
- return "\n".join(lines)
- class DocPreprocessorV2:
- """
- 文档预处理 Pipeline V2
-
- 改进点:
- 1. 使用两阶段旋转检测策略
- 2. 支持批量处理
- 3. 独立运行,无需 PaddleX 依赖
- """
-
- def __init__(
- self,
- orientation_model: str = None,
- text_detector = None,
- use_orientation_classify: bool = True,
- aspect_ratio_threshold: float = 1.2,
- use_gpu: bool = False,
- **kwargs
- ):
- """
- Args:
- orientation_model: 方向分类模型路径
- text_detector: 文本检测器(可选)
- use_orientation_classify: 是否使用方向分类
- aspect_ratio_threshold: 长宽比阈值
- use_gpu: 是否使用GPU
- """
- self.use_orientation_classify = use_orientation_classify
-
- if use_orientation_classify and orientation_model:
- self.orientation_classifier = OrientationClassifierV2(
- model_path=orientation_model,
- text_detector=text_detector,
- aspect_ratio_threshold=aspect_ratio_threshold,
- use_gpu=use_gpu
- )
- else:
- self.orientation_classifier = None
-
- def predict(
- self,
- input: Union[str, np.ndarray, List],
- return_debug: bool = False
- ) -> List[DocPreprocessResult]:
- """
- 预测并预处理文档图像
-
- Args:
- input: 图像路径、numpy数组或列表
- return_debug: 是否输出调试信息
-
- Returns:
- 预处理结果列表
- """
- # 批量处理
- if isinstance(input, list):
- results = []
- for i, img in enumerate(input):
- print(f"\n[{i+1}/{len(input)}] Processing...")
- result = self._predict_single(img, return_debug)
- results.append(result)
- return results
- else:
- return [self._predict_single(input, return_debug)]
-
- def _predict_single(
- self,
- input: Union[str, np.ndarray],
- return_debug: bool = False
- ) -> DocPreprocessResult:
- """处理单张图像"""
- # 读取图像
- if isinstance(input, str):
- img = cv2.imread(input)
- if img is None:
- raise ValueError(f"Failed to read image: {input}")
- input_path = input
- else:
- img = input.copy()
- input_path = None
-
- result = DocPreprocessResult()
- result.input_path = input_path
- result.original_shape = img.shape[:2]
-
- # 方向分类
- if self.orientation_classifier:
- ori_result = self.orientation_classifier.predict(img, return_debug)
- result.orientation_result = ori_result
-
- # 旋转图像
- if ori_result.needs_rotation:
- img = self.orientation_classifier.rotate_image(
- img,
- ori_result.rotation_angle
- )
- result.rotated = True
-
- if return_debug:
- print(f" ✅ Rotated {ori_result.rotation_angle}°")
-
- result.processed_image = img
- result.processed_shape = img.shape[:2]
-
- return result
|