| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257 |
- """
- 图像处理工具模块
- 提供通用的图像处理功能:
- - 图像解码和格式转换
- - Alpha 通道处理
- - 图像预处理
- - BBox 和点坐标转换
- - 图像旋转和坐标转换
- """
- import cv2
- import numpy as np
- from typing import List, Tuple, Union
- from PIL import Image
- def img_decode(content: bytes) -> np.ndarray:
- """
- 解码字节流为图像
-
- Args:
- content: 图像字节流
-
- Returns:
- np.ndarray: 解码后的图像
- """
- np_arr = np.frombuffer(content, dtype=np.uint8)
- return cv2.imdecode(np_arr, cv2.IMREAD_UNCHANGED)
- def check_img(img: Union[bytes, np.ndarray]) -> np.ndarray:
- """
- 检查并转换图像格式
-
- Args:
- img: 图像(可以是 bytes 或 np.ndarray)
-
- Returns:
- np.ndarray: BGR 格式图像
- """
- if isinstance(img, bytes):
- img = img_decode(img)
- if isinstance(img, np.ndarray) and len(img.shape) == 2:
- img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
- return img
- def alpha_to_color(img: np.ndarray, alpha_color: Tuple[int, int, int] = (255, 255, 255)) -> np.ndarray:
- """
- 将带 alpha 通道的图像转换为 RGB
-
- Args:
- img: 输入图像
- alpha_color: 背景颜色 (B, G, R)
-
- Returns:
- np.ndarray: RGB 图像
- """
- if len(img.shape) == 3 and img.shape[2] == 4:
- B, G, R, A = cv2.split(img)
- alpha = A / 255
- R = (alpha_color[0] * (1 - alpha) + R * alpha).astype(np.uint8)
- G = (alpha_color[1] * (1 - alpha) + G * alpha).astype(np.uint8)
- B = (alpha_color[2] * (1 - alpha) + B * alpha).astype(np.uint8)
- img = cv2.merge((B, G, R))
- return img
- def preprocess_image(_image: np.ndarray) -> np.ndarray:
- """
- 预处理图像(去除 alpha 通道)
-
- Args:
- _image: 输入图像
-
- Returns:
- np.ndarray: 预处理后的图像
- """
- alpha_color = (255, 255, 255)
- _image = alpha_to_color(_image, alpha_color)
- return _image
- def bbox_to_points(bbox: List[float]) -> np.ndarray:
- """
- 将 bbox 格式转换为四个顶点的数组
-
- Args:
- bbox: [x0, y0, x1, y1]
-
- Returns:
- np.ndarray: [[x0, y0], [x1, y0], [x1, y1], [x0, y1]]
- """
- x0, y0, x1, y1 = bbox
- return np.array([[x0, y0], [x1, y0], [x1, y1], [x0, y1]]).astype('float32')
- def points_to_bbox(points: np.ndarray) -> List[float]:
- """
- 将四个顶点的数组转换为 bbox 格式
-
- Args:
- points: [[x0, y0], [x1, y1], [x2, y2], [x3, y3]]
-
- Returns:
- list: [x0, y0, x1, y1]
- """
- x0, y0 = points[0]
- x1, _ = points[1]
- _, y1 = points[2]
- return [x0, y0, x1, y1]
- def rotate_image_and_coordinates(
- image: Image.Image,
- angle: float,
- coordinates_list: List[List[int]],
- rotate_coordinates: bool = True
- ) -> Tuple[Image.Image, List[List[int]]]:
- """
- 根据角度旋转图像和坐标 - 修正版本
-
- Args:
- image: 原始图像(PIL Image)
- angle: 旋转角度(度数:0, 90, 180, 270 或任意角度)
- coordinates_list: 坐标列表,每个坐标为[x1, y1, x2, y2]格式
- rotate_coordinates: 是否需要旋转坐标(针对不同OCR工具的处理方式)
-
- Returns:
- rotated_image: 旋转后的图像
- rotated_coordinates: 处理后的坐标列表
- """
- if angle == 0:
- return image, coordinates_list
-
- # 标准化旋转角度
- if angle == 270:
- rotation_angle = -90 # 顺时针90度
- elif angle == 90:
- rotation_angle = 90 # 逆时针90度
- elif angle == 180:
- rotation_angle = 180 # 180度
- else:
- rotation_angle = angle
-
- # 旋转图像
- rotated_image = image.rotate(rotation_angle, expand=True)
-
- # 如果不需要旋转坐标,直接返回原坐标
- if not rotate_coordinates:
- return rotated_image, coordinates_list
-
- # 获取原始和旋转后的图像尺寸
- orig_width, orig_height = image.size
- new_width, new_height = rotated_image.size
-
- # 计算旋转后的坐标
- rotated_coordinates = []
-
- for coord in coordinates_list:
- if len(coord) < 4:
- rotated_coordinates.append(coord)
- continue
-
- x1, y1, x2, y2 = coord[:4]
-
- # 验证原始坐标是否有效
- if x1 < 0 or y1 < 0 or x2 <= x1 or y2 <= y1:
- print(f"警告: 无效坐标 {coord}")
- rotated_coordinates.append([0, 0, 50, 50]) # 使用默认坐标
- continue
-
- # 根据旋转角度变换坐标
- if rotation_angle == -90: # 顺时针90度 (270度逆时针)
- # 变换公式: (x, y) -> (orig_height - y, x)
- new_x1 = orig_height - y2 # 这里是y2
- new_y1 = x1
- new_x2 = orig_height - y1 # 这里是y1
- new_y2 = x2
-
- elif rotation_angle == 90: # 逆时针90度
- # 变换公式: (x, y) -> (y, orig_width - x)
- new_x1 = y1
- new_y1 = orig_width - x2 # 这里是x2
- new_x2 = y2
- new_y2 = orig_width - x1 # 这里是x1
- elif rotation_angle == 180: # 180度
- # 变换公式: (x, y) -> (orig_width - x, orig_height - y)
- new_x1 = orig_width - x2
- new_y1 = orig_height - y2
- new_x2 = orig_width - x1
- new_y2 = orig_height - y1
-
- else: # 任意角度算法 - 修正版本
- # 将角度转换为弧度
- angle_rad = np.radians(rotation_angle)
- cos_angle = np.cos(angle_rad)
- sin_angle = np.sin(angle_rad)
-
- # 原图像中心点
- orig_center_x = orig_width / 2
- orig_center_y = orig_height / 2
-
- # 旋转后图像中心点
- new_center_x = new_width / 2
- new_center_y = new_height / 2
-
- # 将bbox的四个角点转换为相对于原图像中心的坐标
- corners = [
- (x1 - orig_center_x, y1 - orig_center_y), # 左上角
- (x2 - orig_center_x, y1 - orig_center_y), # 右上角
- (x2 - orig_center_x, y2 - orig_center_y), # 右下角
- (x1 - orig_center_x, y2 - orig_center_y) # 左下角
- ]
-
- # 应用修正后的旋转矩阵变换每个角点
- rotated_corners = []
- for x, y in corners:
- # 修正后的旋转矩阵: [cos(θ) sin(θ)] [x]
- # [-sin(θ) cos(θ)] [y]
- rotated_x = x * cos_angle + y * sin_angle
- rotated_y = -x * sin_angle + y * cos_angle
-
- # 转换回绝对坐标(相对于新图像)
- abs_x = rotated_x + new_center_x
- abs_y = rotated_y + new_center_y
-
- rotated_corners.append((abs_x, abs_y))
-
- # 从旋转后的四个角点计算新的边界框
- x_coords = [corner[0] for corner in rotated_corners]
- y_coords = [corner[1] for corner in rotated_corners]
-
- new_x1 = int(min(x_coords))
- new_y1 = int(min(y_coords))
- new_x2 = int(max(x_coords))
- new_y2 = int(max(y_coords))
-
- # 确保坐标在有效范围内
- new_x1 = max(0, min(new_width, new_x1))
- new_y1 = max(0, min(new_height, new_y1))
- new_x2 = max(0, min(new_width, new_x2))
- new_y2 = max(0, min(new_height, new_y2))
-
- # 确保x1 < x2, y1 < y2
- if new_x1 > new_x2:
- new_x1, new_x2 = new_x2, new_x1
- if new_y1 > new_y2:
- new_y1, new_y2 = new_y2, new_y1
-
- rotated_coordinates.append([new_x1, new_y1, new_x2, new_y2])
-
- return rotated_image, rotated_coordinates
|