فهرست منبع

fix: Update bbox rotation logic and improve poly handling for skew correction

zhch158_admin 6 ساعت پیش
والد
کامیت
843cbc19fd
1فایلهای تغییر یافته به همراه26 افزوده شده و 13 حذف شده
  1. 26 13
      merger/bbox_extractor.py

+ 26 - 13
merger/bbox_extractor.py

@@ -536,7 +536,7 @@ class BBoxExtractor:
                     angle_deg: float, 
                     center: Tuple[float, float] = (0, 0)) -> Tuple[float, float]:
         """
-        旋转点坐标
+        旋转点坐标 (图像坐标系:Y轴向下)
         
         Args:
             point: 原始点 (x, y)
@@ -554,8 +554,11 @@ class BBoxExtractor:
         x -= cx
         y -= cy
         
-        x_new = x * np.cos(angle_rad) - y * np.sin(angle_rad)
-        y_new = x * np.sin(angle_rad) + y * np.cos(angle_rad)
+        # 图像坐标系(Y轴向下)下的逆时针旋转公式
+        # x' = x cosθ + y sinθ
+        # y' = -x sinθ + y cosθ
+        x_new = x * np.cos(angle_rad) + y * np.sin(angle_rad)
+        y_new = -x * np.sin(angle_rad) + y * np.cos(angle_rad)
         
         x_new += cx
         y_new += cy
@@ -564,20 +567,21 @@ class BBoxExtractor:
     
     @staticmethod
     def correct_boxes_skew(paddle_boxes: List[Dict], 
-                          rotation_angle: float,
+                          correction_angle: float,
                           image_size: Tuple[int, int]) -> List[Dict]:
         """
         校正文本框的倾斜
         
         Args:
             paddle_boxes: Paddle OCR 结果
-            rotation_angle: 倾斜角度(度数)
+            correction_angle: 校正旋转角度(度数,正值=逆时针,负值=顺时针)
+                              注意:这里直接传入需要旋转的角度,不再自动取反
             image_size: 图像尺寸 (width, height)
         
         Returns:
             校正后的文本框列表
         """
-        if abs(rotation_angle) < 0.1:
+        if abs(correction_angle) < 0.01:
             return paddle_boxes
         
         width, height = image_size
@@ -587,15 +591,24 @@ class BBoxExtractor:
         
         for box in paddle_boxes:
             poly = box.get('poly', [])
-            if len(poly) < 4:
-                corrected_boxes.append(box)
-                continue
+            
+            # 🆕 修复:如果没有 poly,尝试从 bbox 生成
+            # 这是为了兼容 MinerU 或其他没有 poly 的数据源
+            if not poly or len(poly) < 4:
+                if 'bbox' in box and len(box['bbox']) == 4:
+                    poly = BBoxExtractor._bbox_to_poly(box['bbox'])
+                else:
+                    corrected_boxes.append(box)
+                    continue
             
             # 旋转多边形
-            rotated_poly = [
-                BBoxExtractor.rotate_point(point, -rotation_angle, center)
-                for point in poly
-            ]
+            rotated_poly = []
+            for point in poly:
+                # 确保点是 tuple 或 list,并只有 2 个坐标
+                p = (point[0], point[1]) if isinstance(point, (list, tuple)) and len(point) >= 2 else (0.0, 0.0)
+                # 直接使用 correction_angle 进行旋转
+                rotated_point = BBoxExtractor.rotate_point(p, correction_angle, center)
+                rotated_poly.append([rotated_point[0], rotated_point[1]]) # 转换回 list 以匹配 _poly_to_bbox 类型
             
             # 重新计算 bbox
             corrected_bbox = BBoxExtractor._poly_to_bbox(rotated_poly)