image_utils.py 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257
  1. """
  2. 图像处理工具模块
  3. 提供通用的图像处理功能:
  4. - 图像解码和格式转换
  5. - Alpha 通道处理
  6. - 图像预处理
  7. - BBox 和点坐标转换
  8. - 图像旋转和坐标转换
  9. """
  10. import cv2
  11. import numpy as np
  12. from typing import List, Tuple, Union
  13. from PIL import Image
  14. def img_decode(content: bytes) -> np.ndarray:
  15. """
  16. 解码字节流为图像
  17. Args:
  18. content: 图像字节流
  19. Returns:
  20. np.ndarray: 解码后的图像
  21. """
  22. np_arr = np.frombuffer(content, dtype=np.uint8)
  23. return cv2.imdecode(np_arr, cv2.IMREAD_UNCHANGED)
  24. def check_img(img: Union[bytes, np.ndarray]) -> np.ndarray:
  25. """
  26. 检查并转换图像格式
  27. Args:
  28. img: 图像(可以是 bytes 或 np.ndarray)
  29. Returns:
  30. np.ndarray: BGR 格式图像
  31. """
  32. if isinstance(img, bytes):
  33. img = img_decode(img)
  34. if isinstance(img, np.ndarray) and len(img.shape) == 2:
  35. img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
  36. return img
  37. def alpha_to_color(img: np.ndarray, alpha_color: Tuple[int, int, int] = (255, 255, 255)) -> np.ndarray:
  38. """
  39. 将带 alpha 通道的图像转换为 RGB
  40. Args:
  41. img: 输入图像
  42. alpha_color: 背景颜色 (B, G, R)
  43. Returns:
  44. np.ndarray: RGB 图像
  45. """
  46. if len(img.shape) == 3 and img.shape[2] == 4:
  47. B, G, R, A = cv2.split(img)
  48. alpha = A / 255
  49. R = (alpha_color[0] * (1 - alpha) + R * alpha).astype(np.uint8)
  50. G = (alpha_color[1] * (1 - alpha) + G * alpha).astype(np.uint8)
  51. B = (alpha_color[2] * (1 - alpha) + B * alpha).astype(np.uint8)
  52. img = cv2.merge((B, G, R))
  53. return img
  54. def preprocess_image(_image: np.ndarray) -> np.ndarray:
  55. """
  56. 预处理图像(去除 alpha 通道)
  57. Args:
  58. _image: 输入图像
  59. Returns:
  60. np.ndarray: 预处理后的图像
  61. """
  62. alpha_color = (255, 255, 255)
  63. _image = alpha_to_color(_image, alpha_color)
  64. return _image
  65. def bbox_to_points(bbox: List[float]) -> np.ndarray:
  66. """
  67. 将 bbox 格式转换为四个顶点的数组
  68. Args:
  69. bbox: [x0, y0, x1, y1]
  70. Returns:
  71. np.ndarray: [[x0, y0], [x1, y0], [x1, y1], [x0, y1]]
  72. """
  73. x0, y0, x1, y1 = bbox
  74. return np.array([[x0, y0], [x1, y0], [x1, y1], [x0, y1]]).astype('float32')
  75. def points_to_bbox(points: np.ndarray) -> List[float]:
  76. """
  77. 将四个顶点的数组转换为 bbox 格式
  78. Args:
  79. points: [[x0, y0], [x1, y1], [x2, y2], [x3, y3]]
  80. Returns:
  81. list: [x0, y0, x1, y1]
  82. """
  83. x0, y0 = points[0]
  84. x1, _ = points[1]
  85. _, y1 = points[2]
  86. return [x0, y0, x1, y1]
  87. def rotate_image_and_coordinates(
  88. image: Image.Image,
  89. angle: float,
  90. coordinates_list: List[List[int]],
  91. rotate_coordinates: bool = True
  92. ) -> Tuple[Image.Image, List[List[int]]]:
  93. """
  94. 根据角度旋转图像和坐标 - 修正版本
  95. Args:
  96. image: 原始图像(PIL Image)
  97. angle: 旋转角度(度数:0, 90, 180, 270 或任意角度)
  98. coordinates_list: 坐标列表,每个坐标为[x1, y1, x2, y2]格式
  99. rotate_coordinates: 是否需要旋转坐标(针对不同OCR工具的处理方式)
  100. Returns:
  101. rotated_image: 旋转后的图像
  102. rotated_coordinates: 处理后的坐标列表
  103. """
  104. if angle == 0:
  105. return image, coordinates_list
  106. # 标准化旋转角度
  107. if angle == 270:
  108. rotation_angle = -90 # 顺时针90度
  109. elif angle == 90:
  110. rotation_angle = 90 # 逆时针90度
  111. elif angle == 180:
  112. rotation_angle = 180 # 180度
  113. else:
  114. rotation_angle = angle
  115. # 旋转图像
  116. rotated_image = image.rotate(rotation_angle, expand=True)
  117. # 如果不需要旋转坐标,直接返回原坐标
  118. if not rotate_coordinates:
  119. return rotated_image, coordinates_list
  120. # 获取原始和旋转后的图像尺寸
  121. orig_width, orig_height = image.size
  122. new_width, new_height = rotated_image.size
  123. # 计算旋转后的坐标
  124. rotated_coordinates = []
  125. for coord in coordinates_list:
  126. if len(coord) < 4:
  127. rotated_coordinates.append(coord)
  128. continue
  129. x1, y1, x2, y2 = coord[:4]
  130. # 验证原始坐标是否有效
  131. if x1 < 0 or y1 < 0 or x2 <= x1 or y2 <= y1:
  132. print(f"警告: 无效坐标 {coord}")
  133. rotated_coordinates.append([0, 0, 50, 50]) # 使用默认坐标
  134. continue
  135. # 根据旋转角度变换坐标
  136. if rotation_angle == -90: # 顺时针90度 (270度逆时针)
  137. # 变换公式: (x, y) -> (orig_height - y, x)
  138. new_x1 = orig_height - y2 # 这里是y2
  139. new_y1 = x1
  140. new_x2 = orig_height - y1 # 这里是y1
  141. new_y2 = x2
  142. elif rotation_angle == 90: # 逆时针90度
  143. # 变换公式: (x, y) -> (y, orig_width - x)
  144. new_x1 = y1
  145. new_y1 = orig_width - x2 # 这里是x2
  146. new_x2 = y2
  147. new_y2 = orig_width - x1 # 这里是x1
  148. elif rotation_angle == 180: # 180度
  149. # 变换公式: (x, y) -> (orig_width - x, orig_height - y)
  150. new_x1 = orig_width - x2
  151. new_y1 = orig_height - y2
  152. new_x2 = orig_width - x1
  153. new_y2 = orig_height - y1
  154. else: # 任意角度算法 - 修正版本
  155. # 将角度转换为弧度
  156. angle_rad = np.radians(rotation_angle)
  157. cos_angle = np.cos(angle_rad)
  158. sin_angle = np.sin(angle_rad)
  159. # 原图像中心点
  160. orig_center_x = orig_width / 2
  161. orig_center_y = orig_height / 2
  162. # 旋转后图像中心点
  163. new_center_x = new_width / 2
  164. new_center_y = new_height / 2
  165. # 将bbox的四个角点转换为相对于原图像中心的坐标
  166. corners = [
  167. (x1 - orig_center_x, y1 - orig_center_y), # 左上角
  168. (x2 - orig_center_x, y1 - orig_center_y), # 右上角
  169. (x2 - orig_center_x, y2 - orig_center_y), # 右下角
  170. (x1 - orig_center_x, y2 - orig_center_y) # 左下角
  171. ]
  172. # 应用修正后的旋转矩阵变换每个角点
  173. rotated_corners = []
  174. for x, y in corners:
  175. # 修正后的旋转矩阵: [cos(θ) sin(θ)] [x]
  176. # [-sin(θ) cos(θ)] [y]
  177. rotated_x = x * cos_angle + y * sin_angle
  178. rotated_y = -x * sin_angle + y * cos_angle
  179. # 转换回绝对坐标(相对于新图像)
  180. abs_x = rotated_x + new_center_x
  181. abs_y = rotated_y + new_center_y
  182. rotated_corners.append((abs_x, abs_y))
  183. # 从旋转后的四个角点计算新的边界框
  184. x_coords = [corner[0] for corner in rotated_corners]
  185. y_coords = [corner[1] for corner in rotated_corners]
  186. new_x1 = int(min(x_coords))
  187. new_y1 = int(min(y_coords))
  188. new_x2 = int(max(x_coords))
  189. new_y2 = int(max(y_coords))
  190. # 确保坐标在有效范围内
  191. new_x1 = max(0, min(new_width, new_x1))
  192. new_y1 = max(0, min(new_height, new_y1))
  193. new_x2 = max(0, min(new_width, new_x2))
  194. new_y2 = max(0, min(new_height, new_y2))
  195. # 确保x1 < x2, y1 < y2
  196. if new_x1 > new_x2:
  197. new_x1, new_x2 = new_x2, new_x1
  198. if new_y1 > new_y2:
  199. new_y1, new_y2 = new_y2, new_y1
  200. rotated_coordinates.append([new_x1, new_y1, new_x2, new_y2])
  201. return rotated_image, rotated_coordinates