|
|
@@ -3,6 +3,8 @@ bbox 提取模块
|
|
|
负责从 PaddleOCR 结果中提取文字框信息
|
|
|
"""
|
|
|
from typing import List, Dict
|
|
|
+import numpy as np
|
|
|
+from pathlib import Path
|
|
|
|
|
|
|
|
|
class BBoxExtractor:
|
|
|
@@ -17,7 +19,7 @@ class BBoxExtractor:
|
|
|
paddle_data: PaddleOCR 输出的数据
|
|
|
|
|
|
Returns:
|
|
|
- 文字框列表
|
|
|
+ 文字框列表(坐标已转换为 angle=0 时的坐标)
|
|
|
"""
|
|
|
text_boxes = []
|
|
|
|
|
|
@@ -28,9 +30,26 @@ class BBoxExtractor:
|
|
|
rec_texts = ocr_res.get('rec_texts', [])
|
|
|
rec_polys = ocr_res.get('rec_polys', [])
|
|
|
rec_scores = ocr_res.get('rec_scores', [])
|
|
|
-
|
|
|
+
|
|
|
+ # 🎯 获取旋转角度
|
|
|
+ rotation_angle = BBoxExtractor._get_rotation_angle(paddle_data)
|
|
|
+
|
|
|
+ # 🎯 如果有旋转,需要获取原始图像尺寸
|
|
|
+ orig_image_size = None
|
|
|
+
|
|
|
+ if rotation_angle != 0:
|
|
|
+ orig_image_size = BBoxExtractor._get_original_image_size(paddle_data)
|
|
|
+ print(f"🔄 检测到旋转角度: {rotation_angle}°")
|
|
|
+ print(f"📐 原始图像尺寸: {orig_image_size[0]} x {orig_image_size[1]}")
|
|
|
+
|
|
|
for i, (text, poly, score) in enumerate(zip(rec_texts, rec_polys, rec_scores)):
|
|
|
if text and text.strip():
|
|
|
+ # 🎯 如果有旋转角度,转换坐标
|
|
|
+ if rotation_angle != 0 and orig_image_size:
|
|
|
+ poly = BBoxExtractor._inverse_rotate_coordinates(
|
|
|
+ poly, rotation_angle, orig_image_size
|
|
|
+ )
|
|
|
+
|
|
|
# 计算 bbox (x_min, y_min, x_max, y_max)
|
|
|
bbox = BBoxExtractor._poly_to_bbox(poly)
|
|
|
|
|
|
@@ -42,12 +61,154 @@ class BBoxExtractor:
|
|
|
'paddle_bbox_index': i,
|
|
|
'used': False
|
|
|
})
|
|
|
-
|
|
|
+
|
|
|
return text_boxes
|
|
|
|
|
|
@staticmethod
|
|
|
+ def _get_rotation_angle(paddle_data: Dict) -> float:
|
|
|
+ """获取旋转角度"""
|
|
|
+ if 'doc_preprocessor_res' not in paddle_data:
|
|
|
+ return 0.0
|
|
|
+
|
|
|
+ doc_res = paddle_data['doc_preprocessor_res']
|
|
|
+ if isinstance(doc_res, dict) and 'angle' in doc_res:
|
|
|
+ return float(doc_res['angle'])
|
|
|
+
|
|
|
+ return 0.0
|
|
|
+
|
|
|
+ @staticmethod
|
|
|
+ def _get_original_image_size(paddle_data: Dict) -> tuple:
|
|
|
+ """
|
|
|
+ 获取原始图像尺寸(从图片文件读取)
|
|
|
+
|
|
|
+ Args:
|
|
|
+ paddle_data: PaddleOCR 数据
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ (width, height) 元组
|
|
|
+ """
|
|
|
+ from PIL import Image
|
|
|
+
|
|
|
+ # 🎯 从 input_path 读取图像
|
|
|
+ input_path = paddle_data.get('input_path')
|
|
|
+
|
|
|
+ if input_path and Path(input_path).exists():
|
|
|
+ try:
|
|
|
+ with Image.open(input_path) as img:
|
|
|
+ # 返回原始图像尺寸
|
|
|
+ return img.size # (width, height)
|
|
|
+ except Exception as e:
|
|
|
+ print(f"⚠️ 无法读取图像文件 {input_path}: {e}")
|
|
|
+
|
|
|
+ # 🎯 降级方案:从 layout_det_res 推断
|
|
|
+ if 'layout_det_res' in paddle_data:
|
|
|
+ layout_res = paddle_data['layout_det_res']
|
|
|
+ if 'boxes' in layout_res and layout_res['boxes']:
|
|
|
+ max_x = 0
|
|
|
+ max_y = 0
|
|
|
+ for box in layout_res['boxes']:
|
|
|
+ coord = box.get('coordinate', [])
|
|
|
+ if len(coord) >= 4:
|
|
|
+ max_x = max(max_x, coord[2])
|
|
|
+ max_y = max(max_y, coord[3])
|
|
|
+
|
|
|
+ if max_x > 0 and max_y > 0:
|
|
|
+ return (int(max_x) + 50, int(max_y) + 50)
|
|
|
+
|
|
|
+ # 🎯 最后降级:从 overall_ocr_res 推断
|
|
|
+ if 'overall_ocr_res' in paddle_data:
|
|
|
+ ocr_res = paddle_data['overall_ocr_res']
|
|
|
+ rec_polys = ocr_res.get('rec_polys', [])
|
|
|
+ if rec_polys:
|
|
|
+ max_x = 0
|
|
|
+ max_y = 0
|
|
|
+ for poly in rec_polys:
|
|
|
+ for point in poly:
|
|
|
+ max_x = max(max_x, point[0])
|
|
|
+ max_y = max(max_y, point[1])
|
|
|
+
|
|
|
+ if max_x > 0 and max_y > 0:
|
|
|
+ return (int(max_x) + 50, int(max_y) + 50)
|
|
|
+
|
|
|
+ # 🎯 默认 A4 尺寸
|
|
|
+ print("⚠️ 无法确定原始图像尺寸,使用默认值")
|
|
|
+ return (2480, 3508)
|
|
|
+
|
|
|
+ @staticmethod
|
|
|
+ def _inverse_rotate_coordinates(poly: List[List[float]],
|
|
|
+ angle: float,
|
|
|
+ orig_image_size: tuple) -> List[List[float]]:
|
|
|
+ """
|
|
|
+ 反向旋转坐标
|
|
|
+
|
|
|
+ 参考 ocr_validator_utils.rotate_image_and_coordinates 的逆操作
|
|
|
+
|
|
|
+ PaddleOCR 在旋转后的图像上识别,坐标是旋转后的
|
|
|
+ 我们需要将坐标转换回原始图像(未旋转)
|
|
|
+
|
|
|
+ Args:
|
|
|
+ poly: 旋转后图像上的多边形坐标 [[x',y'], ...]
|
|
|
+ angle: 旋转角度(度数,PaddleX 使用的角度)
|
|
|
+ orig_image_size: 原始图像尺寸 (width, height)
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ 原始图像上的多边形坐标 [[x,y], ...]
|
|
|
+ """
|
|
|
+ orig_width, orig_height = orig_image_size
|
|
|
+
|
|
|
+ # 🎯 根据旋转角度计算旋转后的图像尺寸
|
|
|
+ if angle == 90:
|
|
|
+ rotated_width, rotated_height = orig_height, orig_width
|
|
|
+ elif angle == 270:
|
|
|
+ rotated_width, rotated_height = orig_height, orig_width
|
|
|
+ else:
|
|
|
+ rotated_width, rotated_height = orig_width, orig_height
|
|
|
+
|
|
|
+ inverse_poly = []
|
|
|
+
|
|
|
+ for point in poly:
|
|
|
+ x_rot, y_rot = point[0], point[1] # 旋转后的坐标
|
|
|
+
|
|
|
+ # 🎯 反向旋转(参考 rotate_image_and_coordinates 的逆操作)
|
|
|
+ if angle == 90:
|
|
|
+ # 正向: rotated = image.rotate(90, expand=True)
|
|
|
+ # x_rot = y_orig
|
|
|
+ # y_rot = rotated_width - x_orig = orig_height - x_orig
|
|
|
+ # 反向: x_orig = rotated_width - y_rot = orig_height - y_rot
|
|
|
+ # y_orig = x_rot
|
|
|
+ x_orig = rotated_width - y_rot
|
|
|
+ y_orig = x_rot
|
|
|
+
|
|
|
+ elif angle == 270:
|
|
|
+ # 正向: rotated = image.rotate(-90, expand=True)
|
|
|
+ # x_rot = rotated_width - y_orig = orig_height - y_orig
|
|
|
+ # y_rot = x_orig
|
|
|
+ # 反向: y_orig = rotated_width - x_rot = orig_height - x_rot
|
|
|
+ # x_orig = y_rot
|
|
|
+ x_orig = y_rot
|
|
|
+ y_orig = rotated_width - x_rot
|
|
|
+
|
|
|
+ elif angle == 180:
|
|
|
+ # 正向: rotated = image.rotate(180)
|
|
|
+ # x_rot = orig_width - x_orig
|
|
|
+ # y_rot = orig_height - y_orig
|
|
|
+ # 反向: x_orig = orig_width - x_rot
|
|
|
+ # y_orig = orig_height - y_rot
|
|
|
+ x_orig = orig_width - x_rot
|
|
|
+ y_orig = orig_height - y_rot
|
|
|
+
|
|
|
+ else:
|
|
|
+ # 其他角度或0度,不转换
|
|
|
+ x_orig = x_rot
|
|
|
+ y_orig = y_rot
|
|
|
+
|
|
|
+ inverse_poly.append([x_orig, y_orig])
|
|
|
+
|
|
|
+ return inverse_poly
|
|
|
+
|
|
|
+ @staticmethod
|
|
|
def _poly_to_bbox(poly: List[List[float]]) -> List[float]:
|
|
|
- """将多边形转换为 bbox"""
|
|
|
+ """将多边形转换为 bbox [x_min, y_min, x_max, y_max]"""
|
|
|
xs = [p[0] for p in poly]
|
|
|
ys = [p[1] for p in poly]
|
|
|
return [min(xs), min(ys), max(xs), max(ys)]
|