|
|
@@ -225,17 +225,13 @@ class DataProcessor:
|
|
|
Returns:
|
|
|
🎯 MinerU 格式的合并数据(统一输出格式)
|
|
|
"""
|
|
|
- merged_data = []
|
|
|
- paddle_pointer = 0
|
|
|
- last_matched_index = 0
|
|
|
-
|
|
|
# 🎯 获取旋转角度和原始图像尺寸
|
|
|
- rotation_angle = self._get_rotation_angle_from_vl(paddleocr_vl_data)
|
|
|
- vl_orig_image_size = None
|
|
|
+ vl_rotation_angle = self._get_rotation_angle_from_vl(paddleocr_vl_data)
|
|
|
+ vl_orig_image_size = (0,0)
|
|
|
|
|
|
- if rotation_angle != 0:
|
|
|
+ if vl_rotation_angle != 0:
|
|
|
vl_orig_image_size = self._get_original_image_size_from_vl(paddleocr_vl_data)
|
|
|
- print(f"🔄 PaddleOCR_VL 检测到旋转角度: {rotation_angle}°")
|
|
|
+ print(f"🔄 PaddleOCR_VL 检测到旋转角度: {vl_rotation_angle}°")
|
|
|
print(f"📐 原始图像尺寸: {vl_orig_image_size[0]} x {vl_orig_image_size[1]}")
|
|
|
|
|
|
# 提取 parsing_res_list
|
|
|
@@ -250,8 +246,8 @@ class DataProcessor:
|
|
|
|
|
|
for item in parsing_res_list:
|
|
|
# 🎯 先转换 bbox 坐标(如果需要)
|
|
|
- if rotation_angle != 0 and orig_image_size:
|
|
|
- item = self._transform_vl_block_bbox(item, rotation_angle, orig_image_size)
|
|
|
+ if vl_rotation_angle != 0 and orig_image_size:
|
|
|
+ item = self._transform_vl_block_bbox(item, vl_rotation_angle, vl_orig_image_size)
|
|
|
converted_item = self._convert_paddleocr_vl_to_mineru(item)
|
|
|
if converted_item:
|
|
|
mineru_format_data.append(converted_item)
|