Преглед изворни кода

优化旋转角度获取逻辑,支持手动设置优先级,增强图像和坐标旋转处理

zhch158_admin пре 1 месец
родитељ
комит
f12a31556a
1 измењених фајлова са 49 додато и 40 уклоњено
  1. 49 40
      ocr_validator_layout.py

+ 49 - 40
ocr_validator_layout.py

@@ -74,7 +74,11 @@ class OCRLayoutManager:
     
     def get_rotation_angle(self) -> float:
         """获取旋转角度 - 增强版本支持自动检测"""
-        # 首先尝试从OCR数据中获取(PPStructV3等)
+        # 如果没有预设角度,优先人工设置
+        if hasattr(self, 'rotated_angle') and self.rotated_angle != 0:
+            return self.rotated_angle
+
+        # 尝试从OCR数据中获取(PPStructV3等)
         if self.validator.ocr_data:
             for item in self.validator.ocr_data:
                 if isinstance(item, dict) and 'rotation_angle' in item:
@@ -111,51 +115,56 @@ class OCRLayoutManager:
                 
                 # 判断是否需要旋转坐标
                 if rotation_config['coordinates_are_pre_rotated']:
+                    # 图片的角度与坐标的角度不一致,比如PPStructV3,图片0度,坐标已旋转270度
+                    # 这种情况下,只需要旋转图片,坐标不变
                     # PPStructV3: 坐标已经是旋转后的,只旋转图像
-                    if rotation_angle == 270:
+                    img_rotation_angle = (rotation_angle + self.rotated_angle) % 360
+                    if img_rotation_angle == 270:
                         rotated_image = image.rotate(-90, expand=True)  # 顺时针90度
-                    elif rotation_angle == 90:
+                    elif img_rotation_angle == 90:
                         rotated_image = image.rotate(90, expand=True)   # 逆时针90度
-                    elif rotation_angle == 180:
+                    elif img_rotation_angle == 180:
                         rotated_image = image.rotate(180, expand=True)  # 180度
                     else:
-                        rotated_image = image.rotate(-rotation_angle, expand=True)
-                    
-                    # 坐标不需要变换,因为JSON中已经是正确的坐标
-                    self._rotated_image_cache[cache_key] = rotated_image
-                    self._manage_cache_size()
-                    return rotated_image
-                    
-                else:
-                    # Dots OCR: 需要同时旋转图像和坐标
-                    # 收集所有bbox坐标
-                    all_bboxes = []
-                    text_to_bbox_map = {}  # 记录文本到bbox索引的映射
-                    
-                    bbox_index = 0
-                    for text, info_list in self.validator.text_bbox_mapping.items():
-                        text_to_bbox_map[text] = []
-                        for info in info_list:
-                            all_bboxes.append(info['bbox'])
-                            text_to_bbox_map[text].append(bbox_index)
-                            bbox_index += 1
-                    
-                    # 旋转图像和坐标
-                    rotated_image, rotated_bboxes = rotate_image_and_coordinates(
-                        image, rotation_angle, all_bboxes, 
-                        rotate_coordinates=not rotation_config['coordinates_are_pre_rotated']
-                    )
-                    
-                    # 更新bbox映射 - 使用映射关系确保正确对应
-                    for text, bbox_indices in text_to_bbox_map.items():
-                        for i, bbox_idx in enumerate(bbox_indices):
-                            if bbox_idx < len(rotated_bboxes) and i < len(self.validator.text_bbox_mapping[text]):
-                                self.validator.text_bbox_mapping[text][i]['bbox'] = rotated_bboxes[bbox_idx]
+                        rotated_image = image.rotate(-img_rotation_angle, expand=True)
                     
-                    # 缓存结果
-                    self._rotated_image_cache[cache_key] = rotated_image
-                    self._manage_cache_size()
-                    return rotated_image
+                    if self.rotated_angle == 0:
+                        # 坐标不需要变换,因为JSON中已经是正确的坐标
+                        self._rotated_image_cache[cache_key] = rotated_image
+                        self._manage_cache_size()
+                        return rotated_image
+
+                    image = rotated_image  # 继续使用旋转后的图像进行后续处理
+                
+                # Dots OCR: 需要同时旋转图像和坐标
+                # 收集所有bbox坐标
+                all_bboxes = []
+                text_to_bbox_map = {}  # 记录文本到bbox索引的映射
+                
+                bbox_index = 0
+                for text, info_list in self.validator.text_bbox_mapping.items():
+                    text_to_bbox_map[text] = []
+                    for info in info_list:
+                        all_bboxes.append(info['bbox'])
+                        text_to_bbox_map[text].append(bbox_index)
+                        bbox_index += 1
+                
+                # 旋转图像和坐标
+                rotated_image, rotated_bboxes = rotate_image_and_coordinates(
+                    image, rotation_angle, all_bboxes, 
+                    rotate_coordinates=not rotation_config['coordinates_are_pre_rotated']
+                )
+                
+                # 更新bbox映射 - 使用映射关系确保正确对应
+                for text, bbox_indices in text_to_bbox_map.items():
+                    for i, bbox_idx in enumerate(bbox_indices):
+                        if bbox_idx < len(rotated_bboxes) and i < len(self.validator.text_bbox_mapping[text]):
+                            self.validator.text_bbox_mapping[text][i]['bbox'] = rotated_bboxes[bbox_idx]
+                
+                # 缓存结果
+                self._rotated_image_cache[cache_key] = rotated_image
+                self._manage_cache_size()
+                return rotated_image
                     
             else:
                 # 无需旋转,直接缓存原图