image_utils.py 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300
  1. """
  2. 图像处理工具模块
  3. 提供通用的图像处理功能:
  4. - 图像解码和格式转换
  5. - Alpha 通道处理
  6. - 图像预处理
  7. - BBox 和点坐标转换
  8. - 图像旋转和坐标转换
  9. - 水印去除
  10. """
  11. import cv2
  12. import numpy as np
  13. from typing import List, Tuple, Union, Optional, Dict, Any
  14. from PIL import Image
  15. def img_decode(content: bytes) -> np.ndarray:
  16. """
  17. 解码字节流为图像
  18. Args:
  19. content: 图像字节流
  20. Returns:
  21. np.ndarray: 解码后的图像
  22. """
  23. np_arr = np.frombuffer(content, dtype=np.uint8)
  24. return cv2.imdecode(np_arr, cv2.IMREAD_UNCHANGED)
  25. def check_img(img: Union[bytes, np.ndarray]) -> np.ndarray:
  26. """
  27. 检查并转换图像格式
  28. Args:
  29. img: 图像(可以是 bytes 或 np.ndarray)
  30. Returns:
  31. np.ndarray: BGR 格式图像
  32. """
  33. if isinstance(img, bytes):
  34. img = img_decode(img)
  35. if isinstance(img, np.ndarray) and len(img.shape) == 2:
  36. img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
  37. return img
  38. def alpha_to_color(img: np.ndarray, alpha_color: Tuple[int, int, int] = (255, 255, 255)) -> np.ndarray:
  39. """
  40. 将带 alpha 通道的图像转换为 RGB
  41. Args:
  42. img: 输入图像
  43. alpha_color: 背景颜色 (B, G, R)
  44. Returns:
  45. np.ndarray: RGB 图像
  46. """
  47. if len(img.shape) == 3 and img.shape[2] == 4:
  48. B, G, R, A = cv2.split(img)
  49. alpha = A / 255
  50. R = (alpha_color[0] * (1 - alpha) + R * alpha).astype(np.uint8)
  51. G = (alpha_color[1] * (1 - alpha) + G * alpha).astype(np.uint8)
  52. B = (alpha_color[2] * (1 - alpha) + B * alpha).astype(np.uint8)
  53. img = cv2.merge((B, G, R))
  54. return img
  55. def preprocess_image(_image: np.ndarray) -> np.ndarray:
  56. """
  57. 预处理图像(去除 alpha 通道)
  58. Args:
  59. _image: 输入图像
  60. Returns:
  61. np.ndarray: 预处理后的图像
  62. """
  63. alpha_color = (255, 255, 255)
  64. _image = alpha_to_color(_image, alpha_color)
  65. return _image
  66. def bbox_to_points(bbox: List[float]) -> np.ndarray:
  67. """
  68. 将 bbox 格式转换为四个顶点的数组
  69. Args:
  70. bbox: [x0, y0, x1, y1]
  71. Returns:
  72. np.ndarray: [[x0, y0], [x1, y0], [x1, y1], [x0, y1]]
  73. """
  74. x0, y0, x1, y1 = bbox
  75. return np.array([[x0, y0], [x1, y0], [x1, y1], [x0, y1]]).astype('float32')
  76. def points_to_bbox(points: np.ndarray) -> List[float]:
  77. """
  78. 将四个顶点的数组转换为 bbox 格式
  79. Args:
  80. points: [[x0, y0], [x1, y1], [x2, y2], [x3, y3]]
  81. Returns:
  82. list: [x0, y0, x1, y1]
  83. """
  84. x0, y0 = points[0]
  85. x1, _ = points[1]
  86. _, y1 = points[2]
  87. return [x0, y0, x1, y1]
  88. def detect_watermark(
  89. image: Union[np.ndarray, Image.Image],
  90. midtone_low: int = 100,
  91. midtone_high: int = 220,
  92. ratio_threshold: float = 0.03,
  93. check_diagonal: bool = True,
  94. diagonal_angle_range: tuple = (30, 60),
  95. ) -> bool:
  96. """向后兼容别名,实现已迁移至 ocr_utils.watermark_utils.detect_watermark。"""
  97. from ocr_utils.watermark_utils import detect_watermark as _impl
  98. return _impl(
  99. image,
  100. midtone_low=midtone_low,
  101. midtone_high=midtone_high,
  102. ratio_threshold=ratio_threshold,
  103. check_diagonal=check_diagonal,
  104. diagonal_angle_range=diagonal_angle_range,
  105. )
  106. def remove_watermark_from_image(
  107. image: Union[np.ndarray, Image.Image],
  108. threshold: int = 160,
  109. morph_close_kernel: int = 2,
  110. return_pil: Optional[bool] = None,
  111. ) -> Union[np.ndarray, Image.Image]:
  112. """向后兼容别名,实现已迁移至 ocr_utils.watermark_utils.remove_watermark_from_image。"""
  113. from ocr_utils.watermark_utils import remove_watermark_from_image as _impl
  114. return _impl(image, threshold=threshold, morph_close_kernel=morph_close_kernel, return_pil=return_pil)
  115. def remove_watermark_from_image_rgb(
  116. image: Union[np.ndarray, Image.Image],
  117. threshold: int = 160,
  118. morph_close_kernel: int = 2,
  119. return_pil: Optional[bool] = None,
  120. ) -> Union[np.ndarray, Image.Image]:
  121. """向后兼容别名,实现已迁移至 ocr_utils.watermark_utils.remove_watermark_from_image_rgb。"""
  122. from ocr_utils.watermark_utils import remove_watermark_from_image_rgb as _impl
  123. return _impl(image, threshold=threshold, morph_close_kernel=morph_close_kernel, return_pil=return_pil)
  124. def rotate_image_and_coordinates(
  125. image: Image.Image,
  126. angle: float,
  127. coordinates_list: List[List[int]],
  128. rotate_coordinates: bool = True
  129. ) -> Tuple[Image.Image, List[List[int]]]:
  130. """
  131. 根据角度旋转图像和坐标 - 修正版本
  132. Args:
  133. image: 原始图像(PIL Image)
  134. angle: 旋转角度(度数:0, 90, 180, 270 或任意角度)
  135. coordinates_list: 坐标列表,每个坐标为[x1, y1, x2, y2]格式
  136. rotate_coordinates: 是否需要旋转坐标(针对不同OCR工具的处理方式)
  137. Returns:
  138. rotated_image: 旋转后的图像
  139. rotated_coordinates: 处理后的坐标列表
  140. """
  141. if angle == 0:
  142. return image, coordinates_list
  143. # 标准化旋转角度
  144. if angle == 270:
  145. rotation_angle = -90 # 顺时针90度
  146. elif angle == 90:
  147. rotation_angle = 90 # 逆时针90度
  148. elif angle == 180:
  149. rotation_angle = 180 # 180度
  150. else:
  151. rotation_angle = angle
  152. # 旋转图像
  153. rotated_image = image.rotate(rotation_angle, expand=True)
  154. # 如果不需要旋转坐标,直接返回原坐标
  155. if not rotate_coordinates:
  156. return rotated_image, coordinates_list
  157. # 获取原始和旋转后的图像尺寸
  158. orig_width, orig_height = image.size
  159. new_width, new_height = rotated_image.size
  160. # 计算旋转后的坐标
  161. rotated_coordinates = []
  162. for coord in coordinates_list:
  163. if len(coord) < 4:
  164. rotated_coordinates.append(coord)
  165. continue
  166. x1, y1, x2, y2 = coord[:4]
  167. # 验证原始坐标是否有效
  168. if x1 < 0 or y1 < 0 or x2 <= x1 or y2 <= y1:
  169. print(f"警告: 无效坐标 {coord}")
  170. rotated_coordinates.append([0, 0, 50, 50]) # 使用默认坐标
  171. continue
  172. # 根据旋转角度变换坐标
  173. if rotation_angle == -90: # 顺时针90度 (270度逆时针)
  174. # 变换公式: (x, y) -> (orig_height - y, x)
  175. new_x1 = orig_height - y2 # 这里是y2
  176. new_y1 = x1
  177. new_x2 = orig_height - y1 # 这里是y1
  178. new_y2 = x2
  179. elif rotation_angle == 90: # 逆时针90度
  180. # 变换公式: (x, y) -> (y, orig_width - x)
  181. new_x1 = y1
  182. new_y1 = orig_width - x2 # 这里是x2
  183. new_x2 = y2
  184. new_y2 = orig_width - x1 # 这里是x1
  185. elif rotation_angle == 180: # 180度
  186. # 变换公式: (x, y) -> (orig_width - x, orig_height - y)
  187. new_x1 = orig_width - x2
  188. new_y1 = orig_height - y2
  189. new_x2 = orig_width - x1
  190. new_y2 = orig_height - y1
  191. else: # 任意角度算法 - 修正版本
  192. # 将角度转换为弧度
  193. angle_rad = np.radians(rotation_angle)
  194. cos_angle = np.cos(angle_rad)
  195. sin_angle = np.sin(angle_rad)
  196. # 原图像中心点
  197. orig_center_x = orig_width / 2
  198. orig_center_y = orig_height / 2
  199. # 旋转后图像中心点
  200. new_center_x = new_width / 2
  201. new_center_y = new_height / 2
  202. # 将bbox的四个角点转换为相对于原图像中心的坐标
  203. corners = [
  204. (x1 - orig_center_x, y1 - orig_center_y), # 左上角
  205. (x2 - orig_center_x, y1 - orig_center_y), # 右上角
  206. (x2 - orig_center_x, y2 - orig_center_y), # 右下角
  207. (x1 - orig_center_x, y2 - orig_center_y) # 左下角
  208. ]
  209. # 应用修正后的旋转矩阵变换每个角点
  210. rotated_corners = []
  211. for x, y in corners:
  212. # 修正后的旋转矩阵: [cos(θ) sin(θ)] [x]
  213. # [-sin(θ) cos(θ)] [y]
  214. rotated_x = x * cos_angle + y * sin_angle
  215. rotated_y = -x * sin_angle + y * cos_angle
  216. # 转换回绝对坐标(相对于新图像)
  217. abs_x = rotated_x + new_center_x
  218. abs_y = rotated_y + new_center_y
  219. rotated_corners.append((abs_x, abs_y))
  220. # 从旋转后的四个角点计算新的边界框
  221. x_coords = [corner[0] for corner in rotated_corners]
  222. y_coords = [corner[1] for corner in rotated_corners]
  223. new_x1 = int(min(x_coords))
  224. new_y1 = int(min(y_coords))
  225. new_x2 = int(max(x_coords))
  226. new_y2 = int(max(y_coords))
  227. # 确保坐标在有效范围内
  228. new_x1 = max(0, min(new_width, new_x1))
  229. new_y1 = max(0, min(new_height, new_y1))
  230. new_x2 = max(0, min(new_width, new_x2))
  231. new_y2 = max(0, min(new_height, new_y2))
  232. # 确保x1 < x2, y1 < y2
  233. if new_x1 > new_x2:
  234. new_x1, new_x2 = new_x2, new_x1
  235. if new_y1 > new_y2:
  236. new_y1, new_y2 = new_y2, new_y1
  237. rotated_coordinates.append([new_x1, new_y1, new_x2, new_y2])
  238. return rotated_image, rotated_coordinates