""" 图像处理工具模块 提供通用的图像处理功能: - 图像解码和格式转换 - Alpha 通道处理 - 图像预处理 - BBox 和点坐标转换 - 图像旋转和坐标转换 """ import cv2 import numpy as np from typing import List, Tuple, Union from PIL import Image def img_decode(content: bytes) -> np.ndarray: """ 解码字节流为图像 Args: content: 图像字节流 Returns: np.ndarray: 解码后的图像 """ np_arr = np.frombuffer(content, dtype=np.uint8) return cv2.imdecode(np_arr, cv2.IMREAD_UNCHANGED) def check_img(img: Union[bytes, np.ndarray]) -> np.ndarray: """ 检查并转换图像格式 Args: img: 图像(可以是 bytes 或 np.ndarray) Returns: np.ndarray: BGR 格式图像 """ if isinstance(img, bytes): img = img_decode(img) if isinstance(img, np.ndarray) and len(img.shape) == 2: img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) return img def alpha_to_color(img: np.ndarray, alpha_color: Tuple[int, int, int] = (255, 255, 255)) -> np.ndarray: """ 将带 alpha 通道的图像转换为 RGB Args: img: 输入图像 alpha_color: 背景颜色 (B, G, R) Returns: np.ndarray: RGB 图像 """ if len(img.shape) == 3 and img.shape[2] == 4: B, G, R, A = cv2.split(img) alpha = A / 255 R = (alpha_color[0] * (1 - alpha) + R * alpha).astype(np.uint8) G = (alpha_color[1] * (1 - alpha) + G * alpha).astype(np.uint8) B = (alpha_color[2] * (1 - alpha) + B * alpha).astype(np.uint8) img = cv2.merge((B, G, R)) return img def preprocess_image(_image: np.ndarray) -> np.ndarray: """ 预处理图像(去除 alpha 通道) Args: _image: 输入图像 Returns: np.ndarray: 预处理后的图像 """ alpha_color = (255, 255, 255) _image = alpha_to_color(_image, alpha_color) return _image def bbox_to_points(bbox: List[float]) -> np.ndarray: """ 将 bbox 格式转换为四个顶点的数组 Args: bbox: [x0, y0, x1, y1] Returns: np.ndarray: [[x0, y0], [x1, y0], [x1, y1], [x0, y1]] """ x0, y0, x1, y1 = bbox return np.array([[x0, y0], [x1, y0], [x1, y1], [x0, y1]]).astype('float32') def points_to_bbox(points: np.ndarray) -> List[float]: """ 将四个顶点的数组转换为 bbox 格式 Args: points: [[x0, y0], [x1, y1], [x2, y2], [x3, y3]] Returns: list: [x0, y0, x1, y1] """ x0, y0 = points[0] x1, _ = points[1] _, y1 = points[2] return [x0, y0, x1, y1] def rotate_image_and_coordinates( image: Image.Image, angle: float, coordinates_list: List[List[int]], rotate_coordinates: bool = True ) -> Tuple[Image.Image, List[List[int]]]: """ 根据角度旋转图像和坐标 - 修正版本 Args: image: 原始图像(PIL Image) angle: 旋转角度(度数:0, 90, 180, 270 或任意角度) coordinates_list: 坐标列表,每个坐标为[x1, y1, x2, y2]格式 rotate_coordinates: 是否需要旋转坐标(针对不同OCR工具的处理方式) Returns: rotated_image: 旋转后的图像 rotated_coordinates: 处理后的坐标列表 """ if angle == 0: return image, coordinates_list # 标准化旋转角度 if angle == 270: rotation_angle = -90 # 顺时针90度 elif angle == 90: rotation_angle = 90 # 逆时针90度 elif angle == 180: rotation_angle = 180 # 180度 else: rotation_angle = angle # 旋转图像 rotated_image = image.rotate(rotation_angle, expand=True) # 如果不需要旋转坐标,直接返回原坐标 if not rotate_coordinates: return rotated_image, coordinates_list # 获取原始和旋转后的图像尺寸 orig_width, orig_height = image.size new_width, new_height = rotated_image.size # 计算旋转后的坐标 rotated_coordinates = [] for coord in coordinates_list: if len(coord) < 4: rotated_coordinates.append(coord) continue x1, y1, x2, y2 = coord[:4] # 验证原始坐标是否有效 if x1 < 0 or y1 < 0 or x2 <= x1 or y2 <= y1: print(f"警告: 无效坐标 {coord}") rotated_coordinates.append([0, 0, 50, 50]) # 使用默认坐标 continue # 根据旋转角度变换坐标 if rotation_angle == -90: # 顺时针90度 (270度逆时针) # 变换公式: (x, y) -> (orig_height - y, x) new_x1 = orig_height - y2 # 这里是y2 new_y1 = x1 new_x2 = orig_height - y1 # 这里是y1 new_y2 = x2 elif rotation_angle == 90: # 逆时针90度 # 变换公式: (x, y) -> (y, orig_width - x) new_x1 = y1 new_y1 = orig_width - x2 # 这里是x2 new_x2 = y2 new_y2 = orig_width - x1 # 这里是x1 elif rotation_angle == 180: # 180度 # 变换公式: (x, y) -> (orig_width - x, orig_height - y) new_x1 = orig_width - x2 new_y1 = orig_height - y2 new_x2 = orig_width - x1 new_y2 = orig_height - y1 else: # 任意角度算法 - 修正版本 # 将角度转换为弧度 angle_rad = np.radians(rotation_angle) cos_angle = np.cos(angle_rad) sin_angle = np.sin(angle_rad) # 原图像中心点 orig_center_x = orig_width / 2 orig_center_y = orig_height / 2 # 旋转后图像中心点 new_center_x = new_width / 2 new_center_y = new_height / 2 # 将bbox的四个角点转换为相对于原图像中心的坐标 corners = [ (x1 - orig_center_x, y1 - orig_center_y), # 左上角 (x2 - orig_center_x, y1 - orig_center_y), # 右上角 (x2 - orig_center_x, y2 - orig_center_y), # 右下角 (x1 - orig_center_x, y2 - orig_center_y) # 左下角 ] # 应用修正后的旋转矩阵变换每个角点 rotated_corners = [] for x, y in corners: # 修正后的旋转矩阵: [cos(θ) sin(θ)] [x] # [-sin(θ) cos(θ)] [y] rotated_x = x * cos_angle + y * sin_angle rotated_y = -x * sin_angle + y * cos_angle # 转换回绝对坐标(相对于新图像) abs_x = rotated_x + new_center_x abs_y = rotated_y + new_center_y rotated_corners.append((abs_x, abs_y)) # 从旋转后的四个角点计算新的边界框 x_coords = [corner[0] for corner in rotated_corners] y_coords = [corner[1] for corner in rotated_corners] new_x1 = int(min(x_coords)) new_y1 = int(min(y_coords)) new_x2 = int(max(x_coords)) new_y2 = int(max(y_coords)) # 确保坐标在有效范围内 new_x1 = max(0, min(new_width, new_x1)) new_y1 = max(0, min(new_height, new_y1)) new_x2 = max(0, min(new_width, new_x2)) new_y2 = max(0, min(new_height, new_y2)) # 确保x1 < x2, y1 < y2 if new_x1 > new_x2: new_x1, new_x2 = new_x2, new_x1 if new_y1 > new_y2: new_y1, new_y2 = new_y2, new_y1 rotated_coordinates.append([new_x1, new_y1, new_x2, new_y2]) return rotated_image, rotated_coordinates