|
|
@@ -2,7 +2,7 @@
|
|
|
bbox 提取模块
|
|
|
负责从 PaddleOCR 结果中提取文字框信息
|
|
|
"""
|
|
|
-from typing import List, Dict
|
|
|
+from typing import List, Dict, Tuple
|
|
|
import numpy as np
|
|
|
from pathlib import Path
|
|
|
|
|
|
@@ -11,7 +11,7 @@ class BBoxExtractor:
|
|
|
"""bbox 提取器"""
|
|
|
|
|
|
@staticmethod
|
|
|
- def extract_paddle_text_boxes(paddle_data: Dict) -> List[Dict]:
|
|
|
+ def extract_paddle_text_boxes(paddle_data: Dict) -> Tuple[List[Dict], float, Tuple[int, int]]:
|
|
|
"""
|
|
|
提取 PaddleOCR 的文字框信息
|
|
|
|
|
|
@@ -19,12 +19,14 @@ class BBoxExtractor:
|
|
|
paddle_data: PaddleOCR 输出的数据
|
|
|
|
|
|
Returns:
|
|
|
- 文字框列表(坐标已转换为 angle=0 时的坐标)
|
|
|
+ 文字框列表(保持旋转后的angle角度)和旋转角度
|
|
|
"""
|
|
|
text_boxes = []
|
|
|
+ rotation_angle = 0.0
|
|
|
+ orig_image_size = (0,0)
|
|
|
|
|
|
if 'overall_ocr_res' not in paddle_data:
|
|
|
- return text_boxes
|
|
|
+ return text_boxes, rotation_angle, orig_image_size
|
|
|
|
|
|
ocr_res = paddle_data['overall_ocr_res']
|
|
|
rec_texts = ocr_res.get('rec_texts', [])
|
|
|
@@ -33,9 +35,52 @@ class BBoxExtractor:
|
|
|
|
|
|
# 🎯 获取旋转角度
|
|
|
rotation_angle = BBoxExtractor._get_rotation_angle(paddle_data)
|
|
|
+ if rotation_angle != 0:
|
|
|
+ orig_image_size = BBoxExtractor._get_original_image_size(paddle_data)
|
|
|
+ print(f"🔄 检测到旋转角度: {rotation_angle}°")
|
|
|
+ print(f"📐 原始图像尺寸: {orig_image_size[0]} x {orig_image_size[1]}")
|
|
|
+
|
|
|
+ for i, (text, poly, score) in enumerate(zip(rec_texts, rec_polys, rec_scores)):
|
|
|
+ if text and text.strip():
|
|
|
+ # 计算 bbox (x_min, y_min, x_max, y_max)
|
|
|
+ bbox = BBoxExtractor._poly_to_bbox(poly)
|
|
|
+
|
|
|
+ text_boxes.append({
|
|
|
+ 'text': text,
|
|
|
+ 'bbox': bbox,
|
|
|
+ 'poly': poly,
|
|
|
+ 'score': score,
|
|
|
+ 'paddle_bbox_index': i,
|
|
|
+ 'used': False
|
|
|
+ })
|
|
|
|
|
|
- # 🎯 如果有旋转,需要获取原始图像尺寸
|
|
|
- orig_image_size = None
|
|
|
+ return text_boxes, rotation_angle, orig_image_size
|
|
|
+
|
|
|
+ @staticmethod
|
|
|
+ def extract_paddle_text_boxes_inverse_rotate(paddle_data: Dict) -> Tuple[List[Dict], float, Tuple[int, int]]:
|
|
|
+ """
|
|
|
+ 提取 PaddleOCR 的文字框信息
|
|
|
+
|
|
|
+ Args:
|
|
|
+ paddle_data: PaddleOCR 输出的数据
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ 文字框列表(坐标已转换为 angle=0 时的坐标)
|
|
|
+ """
|
|
|
+ text_boxes = []
|
|
|
+ rotation_angle = 0.0
|
|
|
+ orig_image_size = (0,0)
|
|
|
+
|
|
|
+ if 'overall_ocr_res' not in paddle_data:
|
|
|
+ return text_boxes, rotation_angle, orig_image_size
|
|
|
+
|
|
|
+ ocr_res = paddle_data['overall_ocr_res']
|
|
|
+ rec_texts = ocr_res.get('rec_texts', [])
|
|
|
+ rec_polys = ocr_res.get('rec_polys', [])
|
|
|
+ rec_scores = ocr_res.get('rec_scores', [])
|
|
|
+
|
|
|
+ # 🎯 获取旋转角度
|
|
|
+ rotation_angle = BBoxExtractor._get_rotation_angle(paddle_data)
|
|
|
|
|
|
if rotation_angle != 0:
|
|
|
orig_image_size = BBoxExtractor._get_original_image_size(paddle_data)
|
|
|
@@ -62,7 +107,7 @@ class BBoxExtractor:
|
|
|
'used': False
|
|
|
})
|
|
|
|
|
|
- return text_boxes
|
|
|
+ return text_boxes, rotation_angle, orig_image_size
|
|
|
|
|
|
@staticmethod
|
|
|
def _get_rotation_angle(paddle_data: Dict) -> float:
|
|
|
@@ -135,6 +180,53 @@ class BBoxExtractor:
|
|
|
return (2480, 3508)
|
|
|
|
|
|
@staticmethod
|
|
|
+ def rotate_box_coordinates(bbox: List[float],
|
|
|
+ angle: float,
|
|
|
+ orig_image_size: tuple) -> List[float]:
|
|
|
+ """
|
|
|
+ 旋转 bbox 坐标(与图像旋转保持一致)
|
|
|
+
|
|
|
+ 参考 ocr_validator_utils.rotate_image_and_coordinates 的操作
|
|
|
+
|
|
|
+ 旋转逻辑:
|
|
|
+ - 0°: 不旋转
|
|
|
+ - 90°: 逆时针旋转 90°
|
|
|
+ - 180°: 旋转 180°
|
|
|
+ - 270°: 顺时针旋转 90°(或逆时针 270°)
|
|
|
+
|
|
|
+ Args:
|
|
|
+ bbox: 原图像上的边界框 [x_min, y_min, x_max, y_max]
|
|
|
+ angle: 旋转角度(0, 90, 180, 270)
|
|
|
+ orig_image_size: 原始图像尺寸 (width, height)
|
|
|
+ """
|
|
|
+ poly = BBoxExtractor._bbox_to_poly(bbox)
|
|
|
+ rotated_poly = BBoxExtractor._rotate_coordinates(poly, angle, orig_image_size)
|
|
|
+ rotated_bbox = BBoxExtractor._poly_to_bbox(rotated_poly)
|
|
|
+ return rotated_bbox
|
|
|
+
|
|
|
+ @staticmethod
|
|
|
+ def inverse_rotate_box_coordinates(bbox: List[float],
|
|
|
+ angle: float,
|
|
|
+ orig_image_size: tuple) -> List[float]:
|
|
|
+ """
|
|
|
+ 反向旋转 bbox 坐标
|
|
|
+
|
|
|
+ 参考 ocr_validator_utils.rotate_image_and_coordinates 的逆操作
|
|
|
+
|
|
|
+ PaddleOCR 在旋转后的图像上识别,坐标是旋转后的
|
|
|
+ 我们需要将坐标转换回原始图像(未旋转)
|
|
|
+
|
|
|
+ Args:
|
|
|
+ bbox: 旋转后图像上的边界框 [x_min, y_min, x_max, y_max]
|
|
|
+ angle: 旋转角度(度数,PaddleX 使用的角度)
|
|
|
+ orig_image_size: 原始图像尺寸 (width, height)
|
|
|
+ """
|
|
|
+ poly = BBoxExtractor._bbox_to_poly(bbox)
|
|
|
+ inverse_poly = BBoxExtractor._inverse_rotate_coordinates(poly, angle, orig_image_size)
|
|
|
+ inverse_bbox = BBoxExtractor._poly_to_bbox(inverse_poly)
|
|
|
+ return inverse_bbox
|
|
|
+
|
|
|
+ @staticmethod
|
|
|
def _inverse_rotate_coordinates(poly: List[List[float]],
|
|
|
angle: float,
|
|
|
orig_image_size: tuple) -> List[List[float]]:
|
|
|
@@ -207,6 +299,109 @@ class BBoxExtractor:
|
|
|
return inverse_poly
|
|
|
|
|
|
@staticmethod
|
|
|
+ def _rotate_coordinates(poly: List[List[float]],
|
|
|
+ angle: float,
|
|
|
+ orig_image_size: tuple) -> List[List[float]]:
|
|
|
+ """
|
|
|
+ 旋转多边形坐标(与图像旋转保持一致)
|
|
|
+
|
|
|
+ 参考 ocr_validator_utils.rotate_image_and_coordinates 的操作
|
|
|
+
|
|
|
+ 旋转逻辑:
|
|
|
+ - 0°: 不旋转
|
|
|
+ - 90°: 逆时针旋转 90°
|
|
|
+ - 180°: 旋转 180°
|
|
|
+ - 270°: 顺时针旋转 90°(或逆时针 270°)
|
|
|
+
|
|
|
+ Args:
|
|
|
+ poly: 原图像上的多边形坐标 [[x', y'], ...]
|
|
|
+ angle: 旋转角度(0, 90, 180, 270)
|
|
|
+ orig_image_size: 原始图像尺寸 (width, height)
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ 旋转后的多边形坐标 [[x, y], ...]
|
|
|
+
|
|
|
+ Example:
|
|
|
+ >>> poly = [[100, 200], [150, 200], [150, 250], [100, 250]]
|
|
|
+ >>> rotated = rotate_coordinates(poly, 90, (1000, 800))
|
|
|
+ >>> print(rotated)
|
|
|
+ [[200, 900], [200, 850], [250, 850], [250, 900]]
|
|
|
+ """
|
|
|
+ if not poly or angle == 0:
|
|
|
+ return poly
|
|
|
+
|
|
|
+ orig_width, orig_height = orig_image_size
|
|
|
+ rotated_poly = []
|
|
|
+
|
|
|
+ for point in poly:
|
|
|
+ x, y = point[0], point[1]
|
|
|
+
|
|
|
+ if angle == 90:
|
|
|
+ # 逆时针旋转 90°
|
|
|
+ # 新坐标系: 宽度=原高度, 高度=原宽度
|
|
|
+ # x_new = y_old
|
|
|
+ # y_new = 原宽度 - x_old
|
|
|
+ new_x = y
|
|
|
+ new_y = orig_width - x
|
|
|
+
|
|
|
+ elif angle == 180:
|
|
|
+ # 旋转 180°
|
|
|
+ # 新坐标系: 宽度=原宽度, 高度=原高度
|
|
|
+ # x_new = 原宽度 - x_old
|
|
|
+ # y_new = 原高度 - y_old
|
|
|
+ new_x = orig_width - x
|
|
|
+ new_y = orig_height - y
|
|
|
+
|
|
|
+ elif angle == 270:
|
|
|
+ # 顺时针旋转 90°(或逆时针 270°)
|
|
|
+ # 新坐标系: 宽度=原高度, 高度=原宽度
|
|
|
+ # x_new = 原高度 - y_old
|
|
|
+ # y_new = x_old
|
|
|
+ new_x = orig_height - y
|
|
|
+ new_y = x
|
|
|
+
|
|
|
+ else:
|
|
|
+ # 不支持的角度,保持原坐标
|
|
|
+ new_x, new_y = x, y
|
|
|
+
|
|
|
+ rotated_poly.append([new_x, new_y])
|
|
|
+
|
|
|
+ return rotated_poly
|
|
|
+
|
|
|
+ @staticmethod
|
|
|
+ def _bbox_to_poly(bbox: List[float]) -> List[List[float]]:
|
|
|
+ """
|
|
|
+ 将 bbox 转换为多边形(4个角点,逆时针顺序)
|
|
|
+
|
|
|
+ Args:
|
|
|
+ bbox: 边界框 [x_min, y_min, x_max, y_max]
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ 多边形坐标 [[x1, y1], [x2, y2], [x3, y3], [x4, y4]]
|
|
|
+ 顺序:左上 -> 右上 -> 右下 -> 左下(逆时针)
|
|
|
+
|
|
|
+ Example:
|
|
|
+ >>> bbox = [100, 200, 150, 250]
|
|
|
+ >>> poly = BBoxExtractor._bbox_to_poly(bbox)
|
|
|
+ >>> print(poly)
|
|
|
+ [[100, 200], [150, 200], [150, 250], [100, 250]]
|
|
|
+ """
|
|
|
+ if not bbox or len(bbox) < 4:
|
|
|
+ return []
|
|
|
+
|
|
|
+ x_min, y_min, x_max, y_max = bbox[:4]
|
|
|
+
|
|
|
+ # 🎯 4个角点(逆时针顺序)
|
|
|
+ poly = [
|
|
|
+ [x_min, y_min], # 左上角
|
|
|
+ [x_max, y_min], # 右上角
|
|
|
+ [x_max, y_max], # 右下角
|
|
|
+ [x_min, y_max] # 左下角
|
|
|
+ ]
|
|
|
+
|
|
|
+ return poly
|
|
|
+
|
|
|
+ @staticmethod
|
|
|
def _poly_to_bbox(poly: List[List[float]]) -> List[float]:
|
|
|
"""将多边形转换为 bbox [x_min, y_min, x_max, y_max]"""
|
|
|
xs = [p[0] for p in poly]
|