doc_preprocessor_v2.py 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145
  1. """
  2. 增强版文档预处理 Pipeline - 独立版本
  3. """
  4. import cv2
  5. import numpy as np
  6. from pathlib import Path
  7. from typing import Union, List
  8. from dataclasses import dataclass, field
  9. from orientation_classifier_v2 import OrientationClassifierV2, OrientationResult
  10. @dataclass
  11. class DocPreprocessResult:
  12. """文档预处理结果"""
  13. input_path: str = None
  14. original_shape: tuple = field(default_factory=tuple)
  15. processed_shape: tuple = field(default_factory=tuple)
  16. processed_image: np.ndarray = None
  17. # 旋转信息
  18. orientation_result: OrientationResult = None
  19. rotated: bool = False
  20. def __str__(self):
  21. lines = [
  22. f"DocPreprocessResult:",
  23. f" Input: {Path(self.input_path).name if self.input_path else 'numpy array'}",
  24. f" Original: {self.original_shape}",
  25. f" Processed: {self.processed_shape}",
  26. ]
  27. if self.orientation_result:
  28. lines.append(f" Rotation: {self.orientation_result.rotation_angle}° (conf={self.orientation_result.confidence:.3f})")
  29. lines.append(f" Rotated: {self.rotated}")
  30. lines.append(f" Vertical texts: {self.orientation_result.vertical_text_count}")
  31. return "\n".join(lines)
  32. class DocPreprocessorV2:
  33. """
  34. 文档预处理 Pipeline V2
  35. 改进点:
  36. 1. 使用两阶段旋转检测策略
  37. 2. 支持批量处理
  38. 3. 独立运行,无需 PaddleX 依赖
  39. """
  40. def __init__(
  41. self,
  42. orientation_model: str = None,
  43. text_detector = None,
  44. use_orientation_classify: bool = True,
  45. aspect_ratio_threshold: float = 1.2,
  46. use_gpu: bool = False,
  47. **kwargs
  48. ):
  49. """
  50. Args:
  51. orientation_model: 方向分类模型路径
  52. text_detector: 文本检测器(可选)
  53. use_orientation_classify: 是否使用方向分类
  54. aspect_ratio_threshold: 长宽比阈值
  55. use_gpu: 是否使用GPU
  56. """
  57. self.use_orientation_classify = use_orientation_classify
  58. if use_orientation_classify and orientation_model:
  59. self.orientation_classifier = OrientationClassifierV2(
  60. model_path=orientation_model,
  61. text_detector=text_detector,
  62. aspect_ratio_threshold=aspect_ratio_threshold,
  63. use_gpu=use_gpu
  64. )
  65. else:
  66. self.orientation_classifier = None
  67. def predict(
  68. self,
  69. input: Union[str, np.ndarray, List],
  70. return_debug: bool = False
  71. ) -> List[DocPreprocessResult]:
  72. """
  73. 预测并预处理文档图像
  74. Args:
  75. input: 图像路径、numpy数组或列表
  76. return_debug: 是否输出调试信息
  77. Returns:
  78. 预处理结果列表
  79. """
  80. # 批量处理
  81. if isinstance(input, list):
  82. results = []
  83. for i, img in enumerate(input):
  84. print(f"\n[{i+1}/{len(input)}] Processing...")
  85. result = self._predict_single(img, return_debug)
  86. results.append(result)
  87. return results
  88. else:
  89. return [self._predict_single(input, return_debug)]
  90. def _predict_single(
  91. self,
  92. input: Union[str, np.ndarray],
  93. return_debug: bool = False
  94. ) -> DocPreprocessResult:
  95. """处理单张图像"""
  96. # 读取图像
  97. if isinstance(input, str):
  98. img = cv2.imread(input)
  99. if img is None:
  100. raise ValueError(f"Failed to read image: {input}")
  101. input_path = input
  102. else:
  103. img = input.copy()
  104. input_path = None
  105. result = DocPreprocessResult()
  106. result.input_path = input_path
  107. result.original_shape = img.shape[:2]
  108. # 方向分类
  109. if self.orientation_classifier:
  110. ori_result = self.orientation_classifier.predict(img, return_debug)
  111. result.orientation_result = ori_result
  112. # 旋转图像
  113. if ori_result.needs_rotation:
  114. img = self.orientation_classifier.rotate_image(
  115. img,
  116. ori_result.rotation_angle
  117. )
  118. result.rotated = True
  119. if return_debug:
  120. print(f" ✅ Rotated {ori_result.rotation_angle}°")
  121. result.processed_image = img
  122. result.processed_shape = img.shape[:2]
  123. return result