Browse Source

增强图像旋转和坐标处理功能,修正旋转算法,新增OpenCV图像方向检测功能

zhch158_admin 2 months ago
parent
commit
875dda1bea
1 changed files with 110 additions and 25 deletions
  1. 110 25
      ocr_validator_utils.py

+ 110 - 25
ocr_validator_utils.py

@@ -15,6 +15,7 @@ from html import unescape
 import yaml
 import base64
 from urllib.parse import urlparse
+import cv2
 import os
 
 
@@ -91,7 +92,7 @@ def rotate_image_and_coordinates(
     rotate_coordinates: bool = True
 ) -> Tuple[Image.Image, List[List[int]]]:
     """
-    根据角度旋转图像和坐标
+    根据角度旋转图像和坐标 - 修正版本
     
     Args:
         image: 原始图像
@@ -145,19 +146,19 @@ def rotate_image_and_coordinates(
         
         # 根据旋转角度变换坐标
         if rotation_angle == -90:  # 顺时针90度 (270度逆时针)
-            # 变换公式: (x, y) -> (y, orig_width - x)
-            new_x1 = y1
-            new_y1 = orig_width - x2
-            new_x2 = y2
-            new_y2 = orig_width - x1
-            
-        elif rotation_angle == 90:  # 逆时针90度
             # 变换公式: (x, y) -> (orig_height - y, x)
-            new_x1 = orig_height - y2
+            new_x1 = orig_height - y2 # 这里是y2
             new_y1 = x1
-            new_x2 = orig_height - y1
+            new_x2 = orig_height - y1 # 这里是y1
             new_y2 = x2
             
+        elif rotation_angle == 90:  # 逆时针90度
+            # 变换公式: (x, y) -> (y, orig_width - x)
+            new_x1 = y1
+            new_y1 = orig_width - x2 # 这里是x2
+            new_x2 = y2
+            new_y2 = orig_width - x1 # 这里是x1
+
         elif rotation_angle == 180:  # 180度
             # 变换公式: (x, y) -> (orig_width - x, orig_height - y)
             new_x1 = orig_width - x2
@@ -165,30 +166,43 @@ def rotate_image_and_coordinates(
             new_x2 = orig_width - x1
             new_y2 = orig_height - y1
             
-        else:
-            # 对于其他角度,使用通用的旋转矩阵
-            center_x, center_y = orig_width / 2, orig_height / 2
-            new_center_x, new_center_y = new_width / 2, new_height / 2
-            
+        else:  # 任意角度算法,目前90,-90不对
+            # 将角度转换为弧度
             angle_rad = np.radians(rotation_angle)
             cos_angle = np.cos(angle_rad)
             sin_angle = np.sin(angle_rad)
             
-            # 旋转四个角点
+            # 原图像中心点
+            orig_center_x = orig_width / 2
+            orig_center_y = orig_height / 2
+            
+            # 旋转后图像中心点
+            new_center_x = new_width / 2
+            new_center_y = new_height / 2
+            
+            # 将bbox的四个角点转换为相对于原图像中心的坐标
             corners = [
-                (x1 - center_x, y1 - center_y),
-                (x2 - center_x, y1 - center_y),
-                (x2 - center_x, y2 - center_y),
-                (x1 - center_x, y2 - center_y)
+                (x1 - orig_center_x, y1 - orig_center_y),  # 左上角
+                (x2 - orig_center_x, y1 - orig_center_y),  # 右上角
+                (x2 - orig_center_x, y2 - orig_center_y),  # 右下角
+                (x1 - orig_center_x, y2 - orig_center_y)   # 左下角
             ]
             
+            # 应用旋转矩阵变换每个角点
             rotated_corners = []
             for x, y in corners:
-                new_x = x * cos_angle - y * sin_angle
-                new_y = x * sin_angle + y * cos_angle
-                rotated_corners.append((new_x + new_center_x, new_y + new_center_y))
+                # 旋转矩阵: [cos(θ) -sin(θ)] [x]
+                #          [sin(θ)  cos(θ)] [y]
+                rotated_x = x * cos_angle - y * sin_angle
+                rotated_y = x * sin_angle + y * cos_angle
+                
+                # 转换回绝对坐标(相对于新图像)
+                abs_x = rotated_x + new_center_x
+                abs_y = rotated_y + new_center_y
+                
+                rotated_corners.append((abs_x, abs_y))
             
-            # 计算边界框
+            # 从旋转后的四个角点计算新的边界框
             x_coords = [corner[0] for corner in rotated_corners]
             y_coords = [corner[1] for corner in rotated_corners]
             
@@ -858,7 +872,6 @@ def get_ocr_tool_rotation_config(ocr_data: List, config: Dict) -> Dict:
     if not ocr_data or not isinstance(ocr_data, list):
         # 默认配置
         return {
-            'coordinates_need_rotation': True,
             'coordinates_are_pre_rotated': False
         }
     
@@ -878,4 +891,76 @@ def get_ocr_tool_rotation_config(ocr_data: List, config: Dict) -> Dict:
         # 默认配置
         return {
             'coordinates_are_pre_rotated': False
+        }
+
+def detect_image_orientation_by_opencv(image_path: str) -> Dict:
+    """
+    使用OpenCV的文本检测来判断图片方向
+    """
+    try:
+        # 读取图像
+        image = cv2.imread(image_path)
+        if image is None:
+            raise ValueError("无法读取图像文件")
+        
+        height, width = image.shape[:2]
+        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+        
+        # 使用EAST文本检测器或其他方法
+        # 这里使用简单的边缘检测和轮廓分析
+        edges = cv2.Canny(gray, 50, 150, apertureSize=3)
+        
+        # 检测直线
+        lines = cv2.HoughLines(edges, 1, np.pi/180, threshold=100)
+        
+        if lines is None:
+            return {
+                'detected_angle': 0.0,
+                'confidence': 0.0,
+                'method': 'opencv_analysis',
+                'message': '未检测到足够的直线特征'
+            }
+        
+        # 分析直线角度
+        angles = []
+        for rho, theta in lines[:, 0]:
+            angle = theta * 180 / np.pi
+            # 将角度标准化到0-180度
+            if angle > 90:
+                angle = angle - 180
+            angles.append(angle)
+        
+        # 统计主要角度
+        angle_hist = np.histogram(angles, bins=36, range=(-90, 90))[0]
+        dominant_angle_idx = np.argmax(angle_hist)
+        dominant_angle = -90 + dominant_angle_idx * 5  # 每个bin 5度
+        
+        # 将角度映射到标准旋转角度
+        if -22.5 <= dominant_angle <= 22.5:
+            detected_angle = 0.0
+        elif 22.5 < dominant_angle <= 67.5:
+            detected_angle = 270.0
+        elif 67.5 < dominant_angle <= 90 or -90 <= dominant_angle < -67.5:
+            detected_angle = 90.0
+        else:
+            detected_angle = 180.0
+        
+        confidence = angle_hist[dominant_angle_idx] / len(lines) if len(lines) > 0 else 0.0
+        
+        return {
+            'detected_angle': detected_angle,
+            'confidence': min(1.0, confidence),
+            'method': 'opencv_analysis',
+            'line_count': len(lines),
+            'dominant_angle': dominant_angle,
+            'message': f'基于{len(lines)}条直线检测到旋转角度: {detected_angle}°'
+        }
+        
+    except Exception as e:
+        return {
+            'detected_angle': 0.0,
+            'confidence': 0.0,
+            'method': 'opencv_analysis',
+            'error': str(e),
+            'message': f'OpenCV检测过程中发生错误: {str(e)}'
         }