|
@@ -219,16 +219,19 @@ class JSONFormatters:
|
|
|
else:
|
|
else:
|
|
|
page_name = doc_name
|
|
page_name = doc_name
|
|
|
|
|
|
|
|
|
|
+ # 获取页面旋转角度
|
|
|
|
|
+ page_rotation_angle = float(page.get('angle', 0))
|
|
|
|
|
+
|
|
|
# 转换为 mineru_vllm_results_cell_bbox 格式
|
|
# 转换为 mineru_vllm_results_cell_bbox 格式
|
|
|
page_elements = []
|
|
page_elements = []
|
|
|
for element in page.get('elements', []):
|
|
for element in page.get('elements', []):
|
|
|
- converted = JSONFormatters._element_to_cell_bbox_format(element, page_idx)
|
|
|
|
|
|
|
+ converted = JSONFormatters._element_to_cell_bbox_format(element, page_idx, page_rotation_angle)
|
|
|
if converted:
|
|
if converted:
|
|
|
page_elements.append(converted)
|
|
page_elements.append(converted)
|
|
|
|
|
|
|
|
# 添加丢弃元素
|
|
# 添加丢弃元素
|
|
|
for element in page.get('discarded_blocks', []):
|
|
for element in page.get('discarded_blocks', []):
|
|
|
- converted = JSONFormatters._element_to_cell_bbox_format(element, page_idx)
|
|
|
|
|
|
|
+ converted = JSONFormatters._element_to_cell_bbox_format(element, page_idx, page_rotation_angle)
|
|
|
if converted:
|
|
if converted:
|
|
|
page_elements.append(converted)
|
|
page_elements.append(converted)
|
|
|
|
|
|
|
@@ -262,10 +265,16 @@ class JSONFormatters:
|
|
|
@staticmethod
|
|
@staticmethod
|
|
|
def _element_to_cell_bbox_format(
|
|
def _element_to_cell_bbox_format(
|
|
|
element: Dict[str, Any],
|
|
element: Dict[str, Any],
|
|
|
- page_idx: int
|
|
|
|
|
|
|
+ page_idx: int,
|
|
|
|
|
+ page_rotation_angle: float = 0.0
|
|
|
) -> Optional[Dict[str, Any]]:
|
|
) -> Optional[Dict[str, Any]]:
|
|
|
"""
|
|
"""
|
|
|
将元素转换为 mineru_vllm_results_cell_bbox 格式
|
|
将元素转换为 mineru_vllm_results_cell_bbox 格式
|
|
|
|
|
+
|
|
|
|
|
+ Args:
|
|
|
|
|
+ element: 元素字典
|
|
|
|
|
+ page_idx: 页面索引
|
|
|
|
|
+ page_rotation_angle: 页面旋转角度(0, 90, 180, 270)
|
|
|
"""
|
|
"""
|
|
|
elem_type = element.get('type', '')
|
|
elem_type = element.get('type', '')
|
|
|
bbox = element.get('bbox', [0, 0, 0, 0])
|
|
bbox = element.get('bbox', [0, 0, 0, 0])
|
|
@@ -277,6 +286,7 @@ class JSONFormatters:
|
|
|
result = {
|
|
result = {
|
|
|
'bbox': bbox,
|
|
'bbox': bbox,
|
|
|
'page_idx': page_idx,
|
|
'page_idx': page_idx,
|
|
|
|
|
+ 'page_rotation_angle': page_rotation_angle,
|
|
|
'reading_order': element.get('reading_order', 0)
|
|
'reading_order': element.get('reading_order', 0)
|
|
|
}
|
|
}
|
|
|
|
|
|