SHA1
--- a/merger/bbox_extractor.py
+++ b/merger/bbox_extractor.py
@@ -3,6 +3,8 @@ bbox 提取模块
 
				 负责从 PaddleOCR 结果中提取文字框信息
			
 
				 """
			
 
				 from typing import List, Dict
			
 
				+import numpy as np
			
 
				+from pathlib import Path
			
 
				 
			
 
				 
			
 
				 class BBoxExtractor:
			
@@ -17,7 +19,7 @@ class BBoxExtractor:
 
				             paddle_data: PaddleOCR 输出的数据
			
 
				         
			
 
				         Returns:
			
 
				-            文字框列表
			
 
				+            文字框列表（坐标已转换为 angle=0 时的坐标）
			
 
				         """
			
 
				         text_boxes = []
			
 
				         
			
@@ -28,9 +30,26 @@ class BBoxExtractor:
 
				         rec_texts = ocr_res.get('rec_texts', [])
			
 
				         rec_polys = ocr_res.get('rec_polys', [])
			
 
				         rec_scores = ocr_res.get('rec_scores', [])
			
 
				-
			
 
				+        
			
 
				+        # 🎯 获取旋转角度
			
 
				+        rotation_angle = BBoxExtractor._get_rotation_angle(paddle_data)
			
 
				+        
			
 
				+        # 🎯 如果有旋转，需要获取原始图像尺寸
			
 
				+        orig_image_size = None
			
 
				+        
			
 
				+        if rotation_angle != 0:
			
 
				+            orig_image_size = BBoxExtractor._get_original_image_size(paddle_data)
			
 
				+            print(f"🔄 检测到旋转角度: {rotation_angle}°")
			
 
				+            print(f"📐 原始图像尺寸: {orig_image_size[0]} x {orig_image_size[1]}")
			
 
				+        
			
 
				         for i, (text, poly, score) in enumerate(zip(rec_texts, rec_polys, rec_scores)):
			
 
				             if text and text.strip():
			
 
				+                # 🎯 如果有旋转角度，转换坐标
			
 
				+                if rotation_angle != 0 and orig_image_size:
			
 
				+                    poly = BBoxExtractor._inverse_rotate_coordinates(
			
 
				+                        poly, rotation_angle, orig_image_size
			
 
				+                    )
			
 
				+                
			
 
				                 # 计算 bbox (x_min, y_min, x_max, y_max)
			
 
				                 bbox = BBoxExtractor._poly_to_bbox(poly)
			
 
				                 
			
@@ -42,12 +61,154 @@ class BBoxExtractor:
 
				                     'paddle_bbox_index': i,
			
 
				                     'used': False
			
 
				                 })
			
 
				-
			
 
				+        
			
 
				         return text_boxes
			
 
				     
			
 
				     @staticmethod
			
 
				+    def _get_rotation_angle(paddle_data: Dict) -> float:
			
 
				+        """获取旋转角度"""
			
 
				+        if 'doc_preprocessor_res' not in paddle_data:
			
 
				+            return 0.0
			
 
				+        
			
 
				+        doc_res = paddle_data['doc_preprocessor_res']
			
 
				+        if isinstance(doc_res, dict) and 'angle' in doc_res:
			
 
				+            return float(doc_res['angle'])
			
 
				+        
			
 
				+        return 0.0
			
 
				+    
			
 
				+    @staticmethod
			
 
				+    def _get_original_image_size(paddle_data: Dict) -> tuple:
			
 
				+        """
			
 
				+        获取原始图像尺寸（从图片文件读取）
			
 
				+        
			
 
				+        Args:
			
 
				+            paddle_data: PaddleOCR 数据
			
 
				+        
			
 
				+        Returns:
			
 
				+            (width, height) 元组
			
 
				+        """
			
 
				+        from PIL import Image
			
 
				+        
			
 
				+        # 🎯 从 input_path 读取图像
			
 
				+        input_path = paddle_data.get('input_path')
			
 
				+        
			
 
				+        if input_path and Path(input_path).exists():
			
 
				+            try:
			
 
				+                with Image.open(input_path) as img:
			
 
				+                    # 返回原始图像尺寸
			
 
				+                    return img.size  # (width, height)
			
 
				+            except Exception as e:
			
 
				+                print(f"⚠️ 无法读取图像文件 {input_path}: {e}")
			
 
				+        
			
 
				+        # 🎯 降级方案：从 layout_det_res 推断
			
 
				+        if 'layout_det_res' in paddle_data:
			
 
				+            layout_res = paddle_data['layout_det_res']
			
 
				+            if 'boxes' in layout_res and layout_res['boxes']:
			
 
				+                max_x = 0
			
 
				+                max_y = 0
			
 
				+                for box in layout_res['boxes']:
			
 
				+                    coord = box.get('coordinate', [])
			
 
				+                    if len(coord) >= 4:
			
 
				+                        max_x = max(max_x, coord[2])
			
 
				+                        max_y = max(max_y, coord[3])
			
 
				+                
			
 
				+                if max_x > 0 and max_y > 0:
			
 
				+                    return (int(max_x) + 50, int(max_y) + 50)
			
 
				+        
			
 
				+        # 🎯 最后降级：从 overall_ocr_res 推断
			
 
				+        if 'overall_ocr_res' in paddle_data:
			
 
				+            ocr_res = paddle_data['overall_ocr_res']
			
 
				+            rec_polys = ocr_res.get('rec_polys', [])
			
 
				+            if rec_polys:
			
 
				+                max_x = 0
			
 
				+                max_y = 0
			
 
				+                for poly in rec_polys:
			
 
				+                    for point in poly:
			
 
				+                        max_x = max(max_x, point[0])
			
 
				+                        max_y = max(max_y, point[1])
			
 
				+                
			
 
				+                if max_x > 0 and max_y > 0:
			
 
				+                    return (int(max_x) + 50, int(max_y) + 50)
			
 
				+        
			
 
				+        # 🎯 默认 A4 尺寸
			
 
				+        print("⚠️ 无法确定原始图像尺寸，使用默认值")
			
 
				+        return (2480, 3508)
			
 
				+    
			
 
				+    @staticmethod
			
 
				+    def _inverse_rotate_coordinates(poly: List[List[float]], 
			
 
				+                                    angle: float,
			
 
				+                                    orig_image_size: tuple) -> List[List[float]]:
			
 
				+        """
			
 
				+        反向旋转坐标
			
 
				+        
			
 
				+        参考 ocr_validator_utils.rotate_image_and_coordinates 的逆操作
			
 
				+        
			
 
				+        PaddleOCR 在旋转后的图像上识别，坐标是旋转后的
			
 
				+        我们需要将坐标转换回原始图像（未旋转）
			
 
				+        
			
 
				+        Args:
			
 
				+            poly: 旋转后图像上的多边形坐标 [[x',y'], ...]
			
 
				+            angle: 旋转角度（度数，PaddleX 使用的角度）
			
 
				+            orig_image_size: 原始图像尺寸 (width, height)
			
 
				+        
			
 
				+        Returns:
			
 
				+            原始图像上的多边形坐标 [[x,y], ...]
			
 
				+        """
			
 
				+        orig_width, orig_height = orig_image_size
			
 
				+        
			
 
				+        # 🎯 根据旋转角度计算旋转后的图像尺寸
			
 
				+        if angle == 90:
			
 
				+            rotated_width, rotated_height = orig_height, orig_width
			
 
				+        elif angle == 270:
			
 
				+            rotated_width, rotated_height = orig_height, orig_width
			
 
				+        else:
			
 
				+            rotated_width, rotated_height = orig_width, orig_height
			
 
				+        
			
 
				+        inverse_poly = []
			
 
				+        
			
 
				+        for point in poly:
			
 
				+            x_rot, y_rot = point[0], point[1]  # 旋转后的坐标
			
 
				+            
			
 
				+            # 🎯 反向旋转（参考 rotate_image_and_coordinates 的逆操作）
			
 
				+            if angle == 90:
			
 
				+                # 正向: rotated = image.rotate(90, expand=True)
			
 
				+                #      x_rot = y_orig
			
 
				+                #      y_rot = rotated_width - x_orig = orig_height - x_orig
			
 
				+                # 反向: x_orig = rotated_width - y_rot = orig_height - y_rot
			
 
				+                #      y_orig = x_rot
			
 
				+                x_orig = rotated_width - y_rot
			
 
				+                y_orig = x_rot
			
 
				+                
			
 
				+            elif angle == 270:
			
 
				+                # 正向: rotated = image.rotate(-90, expand=True)
			
 
				+                #      x_rot = rotated_width - y_orig = orig_height - y_orig
			
 
				+                #      y_rot = x_orig
			
 
				+                # 反向: y_orig = rotated_width - x_rot = orig_height - x_rot
			
 
				+                #      x_orig = y_rot
			
 
				+                x_orig = y_rot
			
 
				+                y_orig = rotated_width - x_rot
			
 
				+                
			
 
				+            elif angle == 180:
			
 
				+                # 正向: rotated = image.rotate(180)
			
 
				+                #      x_rot = orig_width - x_orig
			
 
				+                #      y_rot = orig_height - y_orig
			
 
				+                # 反向: x_orig = orig_width - x_rot
			
 
				+                #      y_orig = orig_height - y_rot
			
 
				+                x_orig = orig_width - x_rot
			
 
				+                y_orig = orig_height - y_rot
			
 
				+                
			
 
				+            else:
			
 
				+                # 其他角度或0度，不转换
			
 
				+                x_orig = x_rot
			
 
				+                y_orig = y_rot
			
 
				+            
			
 
				+            inverse_poly.append([x_orig, y_orig])
			
 
				+        
			
 
				+        return inverse_poly
			
 
				+    
			
 
				+    @staticmethod
			
 
				     def _poly_to_bbox(poly: List[List[float]]) -> List[float]:
			
 
				-        """将多边形转换为 bbox"""
			
 
				+        """将多边形转换为 bbox [x_min, y_min, x_max, y_max]"""
			
 
				         xs = [p[0] for p in poly]
			
 
				         ys = [p[1] for p in poly]
			
 
				         return [min(xs), min(ys), max(xs), max(ys)]
			
--- a/merger/data_processor.py
+++ b/merger/data_processor.py
@@ -7,8 +7,10 @@ from bs4 import BeautifulSoup
 
				 
			
 
				 try:
			
 
				     from .text_matcher import TextMatcher
			
 
				+    from .bbox_extractor import BBoxExtractor
			
 
				 except ImportError:
			
 
				     from text_matcher import TextMatcher
			
 
				+    from bbox_extractor import BBoxExtractor
			
 
				 
			
 
				 
			
 
				 class DataProcessor:
			
@@ -212,28 +214,41 @@ class DataProcessor:
 
				                                   paddle_text_boxes: List[Dict]) -> List[Dict]:
			
 
				         """
			
 
				         处理 PaddleOCR_VL 数据，添加 bbox 信息
			
 
				-    
			
 
				+        
			
 
				         Args:
			
 
				             paddleocr_vl_data: PaddleOCR_VL 数据 (JSON 对象)
			
 
				             paddle_text_boxes: PaddleOCR 文字框列表
			
 
				-    
			
 
				+        
			
 
				         Returns:
			
 
				-            MinerU 格式的合并数据（统一输出格式）
			
 
				+            🎯 MinerU 格式的合并数据（统一输出格式）
			
 
				         """
			
 
				         merged_data = []
			
 
				         paddle_pointer = 0
			
 
				         last_matched_index = 0
			
 
				-    
			
 
				+        
			
 
				+        # 🎯 获取旋转角度和原始图像尺寸
			
 
				+        rotation_angle = self._get_rotation_angle_from_vl(paddleocr_vl_data)
			
 
				+        orig_image_size = None
			
 
				+        
			
 
				+        if rotation_angle != 0:
			
 
				+            orig_image_size = self._get_original_image_size_from_vl(paddleocr_vl_data)
			
 
				+            print(f"🔄 PaddleOCR_VL 检测到旋转角度: {rotation_angle}°")
			
 
				+            print(f"📐 原始图像尺寸: {orig_image_size[0]} x {orig_image_size[1]}")
			
 
				+        
			
 
				         # 提取 parsing_res_list
			
 
				         parsing_res_list = paddleocr_vl_data.get('parsing_res_list', [])
			
 
				-    
			
 
				+        
			
 
				         # 按 bbox 排序
			
 
				         parsing_res_list.sort(
			
 
				             key=lambda x: (x['block_bbox'][1], x['block_bbox'][0])
			
 
				             if 'block_bbox' in x else (float('inf'), float('inf'))
			
 
				         )
			
 
				-    
			
 
				+        
			
 
				         for item in parsing_res_list:
			
 
				+            # 🎯 先转换 bbox 坐标（如果需要）
			
 
				+            if rotation_angle != 0 and orig_image_size:
			
 
				+                item = self._transform_vl_block_bbox(item, rotation_angle, orig_image_size)
			
 
				+            
			
 
				             # 🎯 统一转换为 MinerU 格式
			
 
				             mineru_item = self._convert_paddleocr_vl_to_mineru(item)
			
 
				             item_type = mineru_item.get('type', '')
			
@@ -244,56 +259,94 @@ class DataProcessor:
 
				                     mineru_item, paddle_text_boxes, paddle_pointer
			
 
				                 )
			
 
				                 merged_data.append(merged_item)
			
 
				-        
			
 
				+            
			
 
				             elif item_type in ['text', 'title', 'header', 'footer', 'equation']:
			
 
				                 merged_item, paddle_pointer, last_matched_index = self._process_text(
			
 
				                     mineru_item, paddle_text_boxes, paddle_pointer, last_matched_index
			
 
				                 )
			
 
				                 merged_data.append(merged_item)
			
 
				-        
			
 
				+            
			
 
				             elif item_type == 'list':
			
 
				                 merged_item, paddle_pointer, last_matched_index = self._process_list(
			
 
				                     mineru_item, paddle_text_boxes, paddle_pointer, last_matched_index
			
 
				                 )
			
 
				                 merged_data.append(merged_item)
			
 
				-        
			
 
				+            
			
 
				             else:
			
 
				-                # 其他类型（image, equation 等）直接添加
			
 
				+                # 其他类型（image 等）直接添加
			
 
				                 merged_data.append(mineru_item)
			
 
				-    
			
 
				+        
			
 
				         return merged_data
			
 
				     
			
 
				+    def _get_rotation_angle_from_vl(self, paddleocr_vl_data: Dict) -> float:
			
 
				+        """从 PaddleOCR_VL 数据中获取旋转角度"""
			
 
				+        return BBoxExtractor._get_rotation_angle(paddleocr_vl_data)
			
 
				+    
			
 
				+    def _get_original_image_size_from_vl(self, paddleocr_vl_data: Dict) -> tuple:
			
 
				+        """从 PaddleOCR_VL 数据中获取原始图像尺寸"""
			
 
				+        return BBoxExtractor._get_original_image_size(paddleocr_vl_data)
			
 
				+    
			
 
				+    def _transform_vl_block_bbox(self, item: Dict, angle: float, 
			
 
				+                                 orig_image_size: tuple) -> Dict:
			
 
				+        """
			
 
				+        转换 PaddleOCR_VL 的 block_bbox 坐标
			
 
				+        
			
 
				+        Args:
			
 
				+            item: PaddleOCR_VL 的 block 数据
			
 
				+            angle: 旋转角度
			
 
				+            orig_image_size: 原始图像尺寸
			
 
				+        
			
 
				+        Returns:
			
 
				+            转换后的 block 数据
			
 
				+        """
			
 
				+        transformed_item = item.copy()
			
 
				+        
			
 
				+        if 'block_bbox' not in item:
			
 
				+            return transformed_item
			
 
				+        
			
 
				+        block_bbox = item['block_bbox']
			
 
				+        if len(block_bbox) < 4:
			
 
				+            return transformed_item
			
 
				+        
			
 
				+        # block_bbox 格式: [x1, y1, x2, y2]
			
 
				+        # 转换为 poly 格式进行旋转
			
 
				+        poly = [
			
 
				+            [block_bbox[0], block_bbox[1]],  # 左上
			
 
				+            [block_bbox[2], block_bbox[1]],  # 右上
			
 
				+            [block_bbox[2], block_bbox[3]],  # 右下
			
 
				+            [block_bbox[0], block_bbox[3]]   # 左下
			
 
				+        ]
			
 
				+        
			
 
				+        # 🎯 使用 BBoxExtractor 的坐标转换方法
			
 
				+        transformed_poly = BBoxExtractor._inverse_rotate_coordinates(
			
 
				+            poly, angle, orig_image_size
			
 
				+        )
			
 
				+        
			
 
				+        # 转换回 bbox 格式
			
 
				+        xs = [p[0] for p in transformed_poly]
			
 
				+        ys = [p[1] for p in transformed_poly]
			
 
				+        transformed_bbox = [min(xs), min(ys), max(xs), max(ys)]
			
 
				+        
			
 
				+        transformed_item['block_bbox'] = transformed_bbox
			
 
				+        
			
 
				+        return transformed_item
			
 
				+    
			
 
				     def _convert_paddleocr_vl_to_mineru(self, paddleocr_vl_item: Dict) -> Dict:
			
 
				         """
			
 
				         🎯 将 PaddleOCR_VL 格式转换为 MinerU 格式
			
 
				         
			
 
				-        PaddleOCR_VL (PP-DocLayout_plus-L):
			
 
				-        {
			
 
				-            "block_label": "paragraph_title",  # 或 "doc_title", "text" 等
			
 
				-            "block_bbox": [172, 151, 547, 184],
			
 
				-            "block_content": "...",
			
 
				-            "block_id": 0
			
 
				-        }
			
 
				-        
			
 
				-        MinerU:
			
 
				-        {
			
 
				-            "type": "title",
			
 
				-            "bbox": [172, 151, 547, 184],
			
 
				-            "text": "...",
			
 
				-            "text_level": 1,
			
 
				-            "page_idx": 0
			
 
				-        }
			
 
				+        基于 PP-DocLayout_plus-L 的 20 种类别
			
 
				         """
			
 
				         block_label = paddleocr_vl_item.get('block_label', '')
			
 
				         
			
 
				-        # 🎯 PP-DocLayout_plus-L 类别映射
			
 
				+        # 🎯 PP-DocLayout_plus-L 类别映射（共 20 种）
			
 
				         label_map = {
			
 
				-            # 标题类
			
 
				-            'paragraph_title': 'title',      # 段落标题 → title (level 2)
			
 
				-            'doc_title': 'title',            # 文档标题 → title (level 1)
			
 
				-            'figure_table_chart_title': 'title',  # 图表标题 → title (level 3)
			
 
				+            # 标题类（3种）
			
 
				+            'paragraph_title': 'title',
			
 
				+            'doc_title': 'title',
			
 
				+            'figure_table_chart_title': 'title',
			
 
				             
			
 
				-            # 文本类
			
 
				+            # 文本类（9种）
			
 
				             'text': 'text',
			
 
				             'number': 'text',
			
 
				             'content': 'text',
			
@@ -301,57 +354,49 @@ class DataProcessor:
 
				             'footnote': 'text',
			
 
				             'aside_text': 'text',
			
 
				             'algorithm': 'text',
			
 
				-            
			
 
				-            # 参考文献
			
 
				             'reference': 'text',
			
 
				             'reference_content': 'text',
			
 
				             
			
 
				-            # 页眉页脚
			
 
				+            # 页眉页脚（2种）
			
 
				             'header': 'header',
			
 
				             'footer': 'footer',
			
 
				             
			
 
				-            # 表格
			
 
				+            # 表格（1种）
			
 
				             'table': 'table',
			
 
				             
			
 
				-            # 图片
			
 
				+            # 图片/图表（3种）
			
 
				             'image': 'image',
			
 
				             'chart': 'image',
			
 
				+            'seal': 'image',
			
 
				             
			
 
				-            # 公式
			
 
				+            # 公式（2种）
			
 
				             'formula': 'equation',
			
 
				-            'formula_number': 'equation',
			
 
				-            
			
 
				-            # 印章
			
 
				-            'seal': 'image'
			
 
				+            'formula_number': 'equation'
			
 
				         }
			
 
				         
			
 
				         mineru_type = label_map.get(block_label, 'text')
			
 
				         
			
 
				-        # 🎯 基础转换
			
 
				         mineru_item = {
			
 
				             'type': mineru_type,
			
 
				             'bbox': paddleocr_vl_item.get('block_bbox', []),
			
 
				             'page_idx': 0
			
 
				         }
			
 
				         
			
 
				-        # 🎯 处理文本内容
			
 
				         content = paddleocr_vl_item.get('block_content', '')
			
 
				         
			
 
				         if mineru_type == 'table':
			
 
				-            # 表格：block_content -> table_body
			
 
				             mineru_item['table_body'] = content
			
 
				         else:
			
 
				-            # 其他类型：block_content -> text
			
 
				             mineru_item['text'] = content
			
 
				             
			
 
				-            # 🎯 处理标题级别（基于实际的类别）
			
 
				+            # 标题级别
			
 
				             if block_label == 'doc_title':
			
 
				-                mineru_item['text_level'] = 1  # 文档标题 - 一级
			
 
				+                mineru_item['text_level'] = 1
			
 
				             elif block_label == 'paragraph_title':
			
 
				-                mineru_item['text_level'] = 2  # 段落标题 - 二级
			
 
				+                mineru_item['text_level'] = 2
			
 
				             elif block_label == 'figure_table_chart_title':
			
 
				-                mineru_item['text_level'] = 3  # 图表标题 - 三级
			
 
				-    
			
 
				+                mineru_item['text_level'] = 3
			
 
				+        
			
 
				         return mineru_item
			
 
				     
			
 
				     def _process_table(self, item: Dict, paddle_text_boxes: List[Dict],
			
--- a/merger/merge_paddleocr_vl_paddleocr.py
+++ b/merger/merge_paddleocr_vl_paddleocr.py
@@ -285,9 +285,9 @@ if __name__ == "__main__":
 
				     if len(sys.argv) == 1:
			
 
				         # 默认配置
			
 
				         default_config = {
			
 
				-            "paddleocr-vl-file": "/Users/zhch158/workspace/data/流水分析/对公_招商银行图/PaddleOCR_VL_Results/对公_招商银行图_page_001.json",
			
 
				-            "paddle-file": "/Users/zhch158/workspace/data/流水分析/对公_招商银行图/data_PPStructureV3_Results/对公_招商银行图_page_001.json",
			
 
				-            "output-dir": "/Users/zhch158/workspace/data/流水分析/对公_招商银行图/PaddleOCR_VL_Results_cell_bbox",
			
 
				+            "paddleocr-vl-file": "/Users/zhch158/workspace/data/流水分析/2023年度报告母公司/paddleocr_vl_results/2023年度报告母公司_page_003.json",
			
 
				+            "paddle-file": "/Users/zhch158/workspace/data/流水分析/2023年度报告母公司/ppstructurev3_client_results/2023年度报告母公司_page_003.json",
			
 
				+            "output-dir": "/Users/zhch158/workspace/data/流水分析/2023年度报告母公司/paddleocr_vl_results_cell_bbox",
			
 
				             "output-type": "both",
			
 
				             "window": "15",
			
 
				             "threshold": "85"
			
--- a/merger/paddleocr_vl_merger.py
+++ b/merger/paddleocr_vl_merger.py
@@ -61,16 +61,17 @@ class PaddleOCRVLMerger:
 
				         # 提取 PaddleOCR 的文字框信息
			
 
				         paddle_text_boxes = self.bbox_extractor.extract_paddle_text_boxes(paddle_data)
			
 
				         
			
 
				-        # 处理 PaddleOCR_VL 的数据
			
 
				+        # 处理 PaddleOCR_VL 的数据, merge后已是minerU json格式
			
 
				         merged_data = self.data_processor.process_paddleocr_vl_data(
			
 
				             paddleocr_vl_data, paddle_text_boxes
			
 
				         )
			
 
				         
			
 
				+        # 不用再转换，
			
 
				         # 转换为指定格式
			
 
				-        if data_format == 'mineru':
			
 
				-            merged_data = self.output_converter.convert_to_mineru_format(
			
 
				-                merged_data, data_source='paddleocr_vl'
			
 
				-            )
			
 
				+        # if data_format == 'mineru':
			
 
				+        #     merged_data = self.output_converter.convert_to_mineru_format(
			
 
				+        #         merged_data, data_source='paddleocr_vl'
			
 
				+        #     )
			
 
				         
			
 
				         return merged_data
Szerző	SHA1 Üzenet	Dátum
zhch158_admin	038666f9ed feat: 优化 PaddleOCR_VL 数据处理逻辑，移除不必要的格式转换	1 hete
zhch158_admin	6e15bf3df4 feat: 更新默认配置文件路径，指向新的数据集位置	1 hete
zhch158_admin	7930c6cd71 feat: 添加对 PaddleOCR_VL 数据的旋转角度和原始图像尺寸处理，优化 bbox 坐标转换	1 hete
zhch158_admin	2ec53f5194 feat: 添加旋转角度处理和原始图像尺寸获取功能，支持坐标反向旋转	1 hete