zhengchun
/
ocr_platform


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300
							"""
图像处理工具模块

提供通用的图像处理功能：
- 图像解码和格式转换
- Alpha 通道处理
- 图像预处理
- BBox 和点坐标转换
- 图像旋转和坐标转换
- 水印去除
"""
import cv2
import numpy as np
from typing import List, Tuple, Union, Optional, Dict, Any
from PIL import Image


def img_decode(content: bytes) -> np.ndarray:
    """
    解码字节流为图像
    
    Args:
        content: 图像字节流
        
    Returns:
        np.ndarray: 解码后的图像
    """
    np_arr = np.frombuffer(content, dtype=np.uint8)
    return cv2.imdecode(np_arr, cv2.IMREAD_UNCHANGED)


def check_img(img: Union[bytes, np.ndarray]) -> np.ndarray:
    """
    检查并转换图像格式
    
    Args:
        img: 图像（可以是 bytes 或 np.ndarray）
        
    Returns:
        np.ndarray: BGR 格式图像
    """
    if isinstance(img, bytes):
        img = img_decode(img)
    if isinstance(img, np.ndarray) and len(img.shape) == 2:
        img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
    return img


def alpha_to_color(img: np.ndarray, alpha_color: Tuple[int, int, int] = (255, 255, 255)) -> np.ndarray:
    """
    将带 alpha 通道的图像转换为 RGB
    
    Args:
        img: 输入图像
        alpha_color: 背景颜色 (B, G, R)
        
    Returns:
        np.ndarray: RGB 图像
    """
    if len(img.shape) == 3 and img.shape[2] == 4:
        B, G, R, A = cv2.split(img)
        alpha = A / 255

        R = (alpha_color[0] * (1 - alpha) + R * alpha).astype(np.uint8)
        G = (alpha_color[1] * (1 - alpha) + G * alpha).astype(np.uint8)
        B = (alpha_color[2] * (1 - alpha) + B * alpha).astype(np.uint8)

        img = cv2.merge((B, G, R))
    return img


def preprocess_image(_image: np.ndarray) -> np.ndarray:
    """
    预处理图像（去除 alpha 通道）
    
    Args:
        _image: 输入图像
        
    Returns:
        np.ndarray: 预处理后的图像
    """
    alpha_color = (255, 255, 255)
    _image = alpha_to_color(_image, alpha_color)
    return _image


def bbox_to_points(bbox: List[float]) -> np.ndarray:
    """
    将 bbox 格式转换为四个顶点的数组
    
    Args:
        bbox: [x0, y0, x1, y1]
        
    Returns:
        np.ndarray: [[x0, y0], [x1, y0], [x1, y1], [x0, y1]]
    """
    x0, y0, x1, y1 = bbox
    return np.array([[x0, y0], [x1, y0], [x1, y1], [x0, y1]]).astype('float32')


def points_to_bbox(points: np.ndarray) -> List[float]:
    """
    将四个顶点的数组转换为 bbox 格式
    
    Args:
        points: [[x0, y0], [x1, y1], [x2, y2], [x3, y3]]
        
    Returns:
        list: [x0, y0, x1, y1]
    """
    x0, y0 = points[0]
    x1, _ = points[1]
    _, y1 = points[2]
    return [x0, y0, x1, y1]


def detect_watermark(
    image: Union[np.ndarray, Image.Image],
    midtone_low: int = 100,
    midtone_high: int = 220,
    ratio_threshold: float = 0.03,
    check_diagonal: bool = True,
    diagonal_angle_range: tuple = (30, 60),
) -> bool:
    """向后兼容别名，实现已迁移至 ocr_utils.watermark_utils.detect_watermark。"""
    from ocr_utils.watermark_utils import detect_watermark as _impl
    return _impl(
        image,
        midtone_low=midtone_low,
        midtone_high=midtone_high,
        ratio_threshold=ratio_threshold,
        check_diagonal=check_diagonal,
        diagonal_angle_range=diagonal_angle_range,
    )


def remove_watermark_from_image(
    image: Union[np.ndarray, Image.Image],
    threshold: int = 160,
    morph_close_kernel: int = 2,
    return_pil: Optional[bool] = None,
) -> Union[np.ndarray, Image.Image]:
    """向后兼容别名，实现已迁移至 ocr_utils.watermark_utils.remove_watermark_from_image。"""
    from ocr_utils.watermark_utils import remove_watermark_from_image as _impl
    return _impl(image, threshold=threshold, morph_close_kernel=morph_close_kernel, return_pil=return_pil)


def remove_watermark_from_image_rgb(
    image: Union[np.ndarray, Image.Image],
    threshold: int = 160,
    morph_close_kernel: int = 2,
    return_pil: Optional[bool] = None,
) -> Union[np.ndarray, Image.Image]:
    """向后兼容别名，实现已迁移至 ocr_utils.watermark_utils.remove_watermark_from_image_rgb。"""
    from ocr_utils.watermark_utils import remove_watermark_from_image_rgb as _impl
    return _impl(image, threshold=threshold, morph_close_kernel=morph_close_kernel, return_pil=return_pil)


def rotate_image_and_coordinates(
    image: Image.Image, 
    angle: float, 
    coordinates_list: List[List[int]], 
    rotate_coordinates: bool = True
) -> Tuple[Image.Image, List[List[int]]]:
    """
    根据角度旋转图像和坐标 - 修正版本
    
    Args:
        image: 原始图像（PIL Image）
        angle: 旋转角度（度数：0, 90, 180, 270 或任意角度）
        coordinates_list: 坐标列表，每个坐标为[x1, y1, x2, y2]格式
        rotate_coordinates: 是否需要旋转坐标（针对不同OCR工具的处理方式）
    
    Returns:
        rotated_image: 旋转后的图像
        rotated_coordinates: 处理后的坐标列表
    """
    if angle == 0:
        return image, coordinates_list
    
    # 标准化旋转角度
    if angle == 270:
        rotation_angle = -90  # 顺时针90度
    elif angle == 90:
        rotation_angle = 90   # 逆时针90度
    elif angle == 180:
        rotation_angle = 180  # 180度
    else:
        rotation_angle = angle
    
    # 旋转图像
    rotated_image = image.rotate(rotation_angle, expand=True)
    
    # 如果不需要旋转坐标，直接返回原坐标
    if not rotate_coordinates:
        return rotated_image, coordinates_list
    
    # 获取原始和旋转后的图像尺寸
    orig_width, orig_height = image.size
    new_width, new_height = rotated_image.size
    
    # 计算旋转后的坐标
    rotated_coordinates = []
    
    for coord in coordinates_list:
        if len(coord) < 4:
            rotated_coordinates.append(coord)
            continue
            
        x1, y1, x2, y2 = coord[:4]
        
        # 验证原始坐标是否有效
        if x1 < 0 or y1 < 0 or x2 <= x1 or y2 <= y1:
            print(f"警告: 无效坐标 {coord}")
            rotated_coordinates.append([0, 0, 50, 50])  # 使用默认坐标
            continue
        
        # 根据旋转角度变换坐标
        if rotation_angle == -90:  # 顺时针90度 (270度逆时针)
            # 变换公式: (x, y) -> (orig_height - y, x)
            new_x1 = orig_height - y2  # 这里是y2
            new_y1 = x1
            new_x2 = orig_height - y1  # 这里是y1
            new_y2 = x2
            
        elif rotation_angle == 90:  # 逆时针90度
            # 变换公式: (x, y) -> (y, orig_width - x)
            new_x1 = y1
            new_y1 = orig_width - x2  # 这里是x2
            new_x2 = y2
            new_y2 = orig_width - x1  # 这里是x1

        elif rotation_angle == 180:  # 180度
            # 变换公式: (x, y) -> (orig_width - x, orig_height - y)
            new_x1 = orig_width - x2
            new_y1 = orig_height - y2
            new_x2 = orig_width - x1
            new_y2 = orig_height - y1
            
        else:  # 任意角度算法 - 修正版本
            # 将角度转换为弧度
            angle_rad = np.radians(rotation_angle)
            cos_angle = np.cos(angle_rad)
            sin_angle = np.sin(angle_rad)
            
            # 原图像中心点
            orig_center_x = orig_width / 2
            orig_center_y = orig_height / 2
            
            # 旋转后图像中心点
            new_center_x = new_width / 2
            new_center_y = new_height / 2
            
            # 将bbox的四个角点转换为相对于原图像中心的坐标
            corners = [
                (x1 - orig_center_x, y1 - orig_center_y),  # 左上角
                (x2 - orig_center_x, y1 - orig_center_y),  # 右上角
                (x2 - orig_center_x, y2 - orig_center_y),  # 右下角
                (x1 - orig_center_x, y2 - orig_center_y)   # 左下角
            ]
            
            # 应用修正后的旋转矩阵变换每个角点
            rotated_corners = []
            for x, y in corners:
                # 修正后的旋转矩阵: [cos(θ)  sin(θ)] [x]
                #                  [-sin(θ) cos(θ)] [y]
                rotated_x = x * cos_angle + y * sin_angle
                rotated_y = -x * sin_angle + y * cos_angle
                
                # 转换回绝对坐标（相对于新图像）
                abs_x = rotated_x + new_center_x
                abs_y = rotated_y + new_center_y
                
                rotated_corners.append((abs_x, abs_y))
            
            # 从旋转后的四个角点计算新的边界框
            x_coords = [corner[0] for corner in rotated_corners]
            y_coords = [corner[1] for corner in rotated_corners]
            
            new_x1 = int(min(x_coords))
            new_y1 = int(min(y_coords))
            new_x2 = int(max(x_coords))
            new_y2 = int(max(y_coords))
        
        # 确保坐标在有效范围内
        new_x1 = max(0, min(new_width, new_x1))
        new_y1 = max(0, min(new_height, new_y1))
        new_x2 = max(0, min(new_width, new_x2))
        new_y2 = max(0, min(new_height, new_y2))
        
        # 确保x1 < x2, y1 < y2
        if new_x1 > new_x2:
            new_x1, new_x2 = new_x2, new_x1
        if new_y1 > new_y2:
            new_y1, new_y2 = new_y2, new_y1
        
        rotated_coordinates.append([new_x1, new_y1, new_x2, new_y2])
    
    return rotated_image, rotated_coordinates