|
@@ -1,307 +1,38 @@
|
|
|
"""
|
|
"""
|
|
|
-坐标转换工具模块
|
|
|
|
|
|
|
+表格坐标转换工具模块
|
|
|
|
|
|
|
|
-提供各种坐标转换功能:
|
|
|
|
|
-- 底层坐标计算(IoU、重叠比例)
|
|
|
|
|
-- 多边形/bbox 格式转换
|
|
|
|
|
-- 相对坐标 → 绝对坐标转换
|
|
|
|
|
-- OCR 格式转换
|
|
|
|
|
-- 旋转坐标逆变换
|
|
|
|
|
|
|
+提供表格处理相关的坐标转换功能:
|
|
|
|
|
+- OCR 结果转换为 TableCellMatcher 格式
|
|
|
|
|
+- 表格旋转坐标逆变换
|
|
|
|
|
+- 表格偏移量处理
|
|
|
- HTML data-bbox 坐标转换
|
|
- HTML data-bbox 坐标转换
|
|
|
|
|
+- 元素坐标转换
|
|
|
|
|
+
|
|
|
|
|
+此模块包含特定于 universal_doc_parser 表格处理的坐标转换方法。
|
|
|
"""
|
|
"""
|
|
|
import re
|
|
import re
|
|
|
import json
|
|
import json
|
|
|
-from typing import Dict, List, Any, Optional, Tuple, Union
|
|
|
|
|
-import numpy as np
|
|
|
|
|
|
|
+from typing import Dict, List, Any, Optional, Tuple
|
|
|
from loguru import logger
|
|
from loguru import logger
|
|
|
|
|
|
|
|
-# 从 ocr_utils 导入 BBoxExtractor
|
|
|
|
|
|
|
+# 从 ocr_utils 导入通用坐标工具和 BBoxExtractor
|
|
|
try:
|
|
try:
|
|
|
|
|
+ from ocr_utils.coordinate_utils import CoordinateUtils
|
|
|
from ocr_utils import BBoxExtractor
|
|
from ocr_utils import BBoxExtractor
|
|
|
MERGER_AVAILABLE = True
|
|
MERGER_AVAILABLE = True
|
|
|
except ImportError:
|
|
except ImportError:
|
|
|
- MERGER_AVAILABLE = False
|
|
|
|
|
- BBoxExtractor = None
|
|
|
|
|
-
|
|
|
|
|
-# 导入 MinerU 组件(用于 IoU 计算)
|
|
|
|
|
-try:
|
|
|
|
|
- from mineru.utils.boxbase import calculate_iou as mineru_calculate_iou
|
|
|
|
|
- from mineru.utils.boxbase import calculate_overlap_area_2_minbox_area_ratio
|
|
|
|
|
- MINERU_BOXBASE_AVAILABLE = True
|
|
|
|
|
-except ImportError:
|
|
|
|
|
- MINERU_BOXBASE_AVAILABLE = False
|
|
|
|
|
- mineru_calculate_iou = None
|
|
|
|
|
- calculate_overlap_area_2_minbox_area_ratio = None
|
|
|
|
|
|
|
+ try:
|
|
|
|
|
+ from ocr_utils import CoordinateUtils
|
|
|
|
|
+ from ocr_utils import BBoxExtractor
|
|
|
|
|
+ MERGER_AVAILABLE = True
|
|
|
|
|
+ except ImportError:
|
|
|
|
|
+ MERGER_AVAILABLE = False
|
|
|
|
|
+ BBoxExtractor = None
|
|
|
|
|
+ CoordinateUtils = None
|
|
|
|
|
|
|
|
|
|
|
|
|
-class CoordinateUtils:
|
|
|
|
|
- """坐标转换工具类"""
|
|
|
|
|
-
|
|
|
|
|
- # ==================== 底层坐标计算方法 ====================
|
|
|
|
|
-
|
|
|
|
|
- @staticmethod
|
|
|
|
|
- def calculate_iou(bbox1: List[float], bbox2: List[float]) -> float:
|
|
|
|
|
- """
|
|
|
|
|
- 计算两个 bbox 的 IoU(交并比)
|
|
|
|
|
-
|
|
|
|
|
- Args:
|
|
|
|
|
- bbox1: 第一个 bbox [x1, y1, x2, y2]
|
|
|
|
|
- bbox2: 第二个 bbox [x1, y1, x2, y2]
|
|
|
|
|
-
|
|
|
|
|
- Returns:
|
|
|
|
|
- IoU 值
|
|
|
|
|
- """
|
|
|
|
|
- if MINERU_BOXBASE_AVAILABLE and mineru_calculate_iou is not None:
|
|
|
|
|
- return mineru_calculate_iou(bbox1, bbox2)
|
|
|
|
|
-
|
|
|
|
|
- # 备用实现
|
|
|
|
|
- x_left = max(bbox1[0], bbox2[0])
|
|
|
|
|
- y_top = max(bbox1[1], bbox2[1])
|
|
|
|
|
- x_right = min(bbox1[2], bbox2[2])
|
|
|
|
|
- y_bottom = min(bbox1[3], bbox2[3])
|
|
|
|
|
-
|
|
|
|
|
- if x_right < x_left or y_bottom < y_top:
|
|
|
|
|
- return 0.0
|
|
|
|
|
-
|
|
|
|
|
- intersection_area = (x_right - x_left) * (y_bottom - y_top)
|
|
|
|
|
- bbox1_area = (bbox1[2] - bbox1[0]) * (bbox1[3] - bbox1[1])
|
|
|
|
|
- bbox2_area = (bbox2[2] - bbox2[0]) * (bbox2[3] - bbox2[1])
|
|
|
|
|
-
|
|
|
|
|
- if bbox1_area == 0 or bbox2_area == 0:
|
|
|
|
|
- return 0.0
|
|
|
|
|
-
|
|
|
|
|
- return intersection_area / float(bbox1_area + bbox2_area - intersection_area)
|
|
|
|
|
-
|
|
|
|
|
- @staticmethod
|
|
|
|
|
- def calculate_overlap_ratio(bbox1: List[float], bbox2: List[float]) -> float:
|
|
|
|
|
- """
|
|
|
|
|
- 计算重叠面积占小框面积的比例
|
|
|
|
|
-
|
|
|
|
|
- Args:
|
|
|
|
|
- bbox1: 第一个 bbox [x1, y1, x2, y2]
|
|
|
|
|
- bbox2: 第二个 bbox [x1, y1, x2, y2]
|
|
|
|
|
-
|
|
|
|
|
- Returns:
|
|
|
|
|
- 重叠比例
|
|
|
|
|
- """
|
|
|
|
|
- if MINERU_BOXBASE_AVAILABLE and calculate_overlap_area_2_minbox_area_ratio is not None:
|
|
|
|
|
- return calculate_overlap_area_2_minbox_area_ratio(bbox1, bbox2)
|
|
|
|
|
-
|
|
|
|
|
- # 备用实现
|
|
|
|
|
- x_left = max(bbox1[0], bbox2[0])
|
|
|
|
|
- y_top = max(bbox1[1], bbox2[1])
|
|
|
|
|
- x_right = min(bbox1[2], bbox2[2])
|
|
|
|
|
- y_bottom = min(bbox1[3], bbox2[3])
|
|
|
|
|
-
|
|
|
|
|
- if x_right < x_left or y_bottom < y_top:
|
|
|
|
|
- return 0.0
|
|
|
|
|
-
|
|
|
|
|
- intersection_area = (x_right - x_left) * (y_bottom - y_top)
|
|
|
|
|
- area1 = (bbox1[2] - bbox1[0]) * (bbox1[3] - bbox1[1])
|
|
|
|
|
- area2 = (bbox2[2] - bbox2[0]) * (bbox2[3] - bbox2[1])
|
|
|
|
|
- min_area = min(area1, area2)
|
|
|
|
|
-
|
|
|
|
|
- if min_area == 0:
|
|
|
|
|
- return 0.0
|
|
|
|
|
-
|
|
|
|
|
- return intersection_area / min_area
|
|
|
|
|
-
|
|
|
|
|
- @staticmethod
|
|
|
|
|
- def calculate_overlap_in_bbox1_ratio(
|
|
|
|
|
- bbox1: List[float],
|
|
|
|
|
- bbox2: List[float]
|
|
|
|
|
- ) -> float:
|
|
|
|
|
- """
|
|
|
|
|
- 计算 bbox1 被 bbox2 覆盖的面积比例
|
|
|
|
|
-
|
|
|
|
|
- Args:
|
|
|
|
|
- bbox1: 第一个 bbox [x1, y1, x2, y2]
|
|
|
|
|
- bbox2: 第二个 bbox [x1, y1, x2, y2]
|
|
|
|
|
-
|
|
|
|
|
- Returns:
|
|
|
|
|
- bbox1 被覆盖的比例
|
|
|
|
|
- """
|
|
|
|
|
- x_left = max(bbox1[0], bbox2[0])
|
|
|
|
|
- y_top = max(bbox1[1], bbox2[1])
|
|
|
|
|
- x_right = min(bbox1[2], bbox2[2])
|
|
|
|
|
- y_bottom = min(bbox1[3], bbox2[3])
|
|
|
|
|
-
|
|
|
|
|
- if x_right < x_left or y_bottom < y_top:
|
|
|
|
|
- return 0.0
|
|
|
|
|
-
|
|
|
|
|
- intersection_area = (x_right - x_left) * (y_bottom - y_top)
|
|
|
|
|
- bbox1_area = (bbox1[2] - bbox1[0]) * (bbox1[3] - bbox1[1])
|
|
|
|
|
-
|
|
|
|
|
- if bbox1_area == 0:
|
|
|
|
|
- return 0.0
|
|
|
|
|
-
|
|
|
|
|
- return intersection_area / bbox1_area
|
|
|
|
|
-
|
|
|
|
|
- @staticmethod
|
|
|
|
|
- def poly_to_bbox(poly: Union[List, None]) -> List[float]:
|
|
|
|
|
- """
|
|
|
|
|
- 将多边形坐标转换为 bbox 格式
|
|
|
|
|
-
|
|
|
|
|
- Args:
|
|
|
|
|
- poly: 多边形坐标,支持以下格式:
|
|
|
|
|
- - [[x1,y1], [x2,y1], [x2,y2], [x1,y2]] (4个点)
|
|
|
|
|
- - [x1, y1, x2, y1, x2, y2, x1, y2] (8个值)
|
|
|
|
|
- - [x1, y1, x2, y2] (4个值,已是bbox)
|
|
|
|
|
-
|
|
|
|
|
- Returns:
|
|
|
|
|
- bbox [x1, y1, x2, y2]
|
|
|
|
|
- """
|
|
|
|
|
- if not poly:
|
|
|
|
|
- return [0, 0, 0, 0]
|
|
|
|
|
-
|
|
|
|
|
- # 处理嵌套列表格式 [[x1,y1], [x2,y1], ...]
|
|
|
|
|
- if isinstance(poly[0], (list, tuple)):
|
|
|
|
|
- xs = [p[0] for p in poly]
|
|
|
|
|
- ys = [p[1] for p in poly]
|
|
|
|
|
- return [min(xs), min(ys), max(xs), max(ys)]
|
|
|
|
|
-
|
|
|
|
|
- # 处理平面列表格式
|
|
|
|
|
- if len(poly) == 4:
|
|
|
|
|
- # 已经是 bbox 格式
|
|
|
|
|
- return list(poly)
|
|
|
|
|
- elif len(poly) >= 8:
|
|
|
|
|
- # 8点格式:[x1, y1, x2, y1, x2, y2, x1, y2]
|
|
|
|
|
- xs = [poly[i] for i in range(0, len(poly), 2)]
|
|
|
|
|
- ys = [poly[i] for i in range(1, len(poly), 2)]
|
|
|
|
|
- return [min(xs), min(ys), max(xs), max(ys)]
|
|
|
|
|
-
|
|
|
|
|
- return [0, 0, 0, 0]
|
|
|
|
|
-
|
|
|
|
|
- @staticmethod
|
|
|
|
|
- def bbox_to_poly(bbox: List[float]) -> List[List[float]]:
|
|
|
|
|
- """
|
|
|
|
|
- 将 bbox 转换为多边形坐标
|
|
|
|
|
-
|
|
|
|
|
- Args:
|
|
|
|
|
- bbox: [x1, y1, x2, y2]
|
|
|
|
|
-
|
|
|
|
|
- Returns:
|
|
|
|
|
- [[x1,y1], [x2,y1], [x2,y2], [x1,y2]]
|
|
|
|
|
- """
|
|
|
|
|
- if not bbox or len(bbox) < 4:
|
|
|
|
|
- return [[0, 0], [0, 0], [0, 0], [0, 0]]
|
|
|
|
|
-
|
|
|
|
|
- x1, y1, x2, y2 = bbox[:4]
|
|
|
|
|
- return [
|
|
|
|
|
- [float(x1), float(y1)],
|
|
|
|
|
- [float(x2), float(y1)],
|
|
|
|
|
- [float(x2), float(y2)],
|
|
|
|
|
- [float(x1), float(y2)]
|
|
|
|
|
- ]
|
|
|
|
|
-
|
|
|
|
|
- # ==================== 图像裁剪 ====================
|
|
|
|
|
-
|
|
|
|
|
- @staticmethod
|
|
|
|
|
- def crop_region(image: np.ndarray, bbox: List[float], padding: int = 0) -> np.ndarray:
|
|
|
|
|
- """
|
|
|
|
|
- 裁剪图像区域
|
|
|
|
|
-
|
|
|
|
|
- Args:
|
|
|
|
|
- image: 原始图像
|
|
|
|
|
- bbox: 裁剪区域 [x1, y1, x2, y2]
|
|
|
|
|
- padding: 边缘padding(像素),可以为正数(扩展裁剪区域)或负数(收缩裁剪区域)
|
|
|
|
|
-
|
|
|
|
|
- Returns:
|
|
|
|
|
- 裁剪后的图像
|
|
|
|
|
- """
|
|
|
|
|
- if len(bbox) < 4:
|
|
|
|
|
- return image
|
|
|
|
|
-
|
|
|
|
|
- h, w = image.shape[:2]
|
|
|
|
|
-
|
|
|
|
|
- # 解析padding(支持单个值或四个值)
|
|
|
|
|
- if isinstance(padding, (int, float)):
|
|
|
|
|
- pad_left = pad_right = pad_top = pad_bottom = int(padding)
|
|
|
|
|
- else:
|
|
|
|
|
- # 假设是长度为4的元组/列表 [left, top, right, bottom]
|
|
|
|
|
- if len(padding) >= 4:
|
|
|
|
|
- pad_left, pad_top, pad_right, pad_bottom = [int(p) for p in padding[:4]]
|
|
|
|
|
- else:
|
|
|
|
|
- pad_left = pad_top = pad_right = pad_bottom = 0
|
|
|
|
|
-
|
|
|
|
|
- x1 = max(0 - pad_left, int(bbox[0]) - pad_left)
|
|
|
|
|
- y1 = max(0 - pad_top, int(bbox[1]) - pad_top)
|
|
|
|
|
- x2 = min(w + pad_right, int(bbox[2]) + pad_right)
|
|
|
|
|
- y2 = min(h + pad_bottom, int(bbox[3]) + pad_bottom)
|
|
|
|
|
-
|
|
|
|
|
- # 确保坐标有效
|
|
|
|
|
- x1 = max(0, x1)
|
|
|
|
|
- y1 = max(0, y1)
|
|
|
|
|
- x2 = min(w, x2)
|
|
|
|
|
- y2 = min(h, y2)
|
|
|
|
|
-
|
|
|
|
|
- # 检查是否有效区域
|
|
|
|
|
- if x2 <= x1 or y2 <= y1:
|
|
|
|
|
- return image
|
|
|
|
|
-
|
|
|
|
|
- return image[y1:y2, x1:x2]
|
|
|
|
|
-
|
|
|
|
|
- @staticmethod
|
|
|
|
|
- def bbox_overlap(bbox1: List[float], bbox2: List[float]) -> bool:
|
|
|
|
|
- """
|
|
|
|
|
- 检查两个 bbox 是否重叠
|
|
|
|
|
-
|
|
|
|
|
- Args:
|
|
|
|
|
- bbox1: 第一个 bbox [x1, y1, x2, y2]
|
|
|
|
|
- bbox2: 第二个 bbox [x1, y1, x2, y2]
|
|
|
|
|
-
|
|
|
|
|
- Returns:
|
|
|
|
|
- 是否重叠
|
|
|
|
|
- """
|
|
|
|
|
- if len(bbox1) < 4 or len(bbox2) < 4:
|
|
|
|
|
- return False
|
|
|
|
|
-
|
|
|
|
|
- x1_1, y1_1, x2_1, y2_1 = bbox1[:4]
|
|
|
|
|
- x1_2, y1_2, x2_2, y2_2 = bbox2[:4]
|
|
|
|
|
-
|
|
|
|
|
- if x2_1 < x1_2 or x2_2 < x1_1:
|
|
|
|
|
- return False
|
|
|
|
|
- if y2_1 < y1_2 or y2_2 < y1_1:
|
|
|
|
|
- return False
|
|
|
|
|
-
|
|
|
|
|
- return True
|
|
|
|
|
-
|
|
|
|
|
- @staticmethod
|
|
|
|
|
- def convert_to_absolute_coords(
|
|
|
|
|
- relative_bbox: List,
|
|
|
|
|
- region_bbox: List[float]
|
|
|
|
|
- ) -> List:
|
|
|
|
|
- """
|
|
|
|
|
- 将相对坐标转换为绝对坐标
|
|
|
|
|
-
|
|
|
|
|
- Args:
|
|
|
|
|
- relative_bbox: 相对坐标
|
|
|
|
|
- region_bbox: 区域的绝对坐标 [x1, y1, x2, y2]
|
|
|
|
|
-
|
|
|
|
|
- Returns:
|
|
|
|
|
- 绝对坐标
|
|
|
|
|
- """
|
|
|
|
|
- if not relative_bbox or len(region_bbox) < 4:
|
|
|
|
|
- return relative_bbox
|
|
|
|
|
-
|
|
|
|
|
- bx1, by1 = region_bbox[0], region_bbox[1]
|
|
|
|
|
-
|
|
|
|
|
- # 处理4点坐标格式 [[x1,y1], [x2,y2], [x3,y3], [x4,y4]]
|
|
|
|
|
- if isinstance(relative_bbox[0], (list, tuple)):
|
|
|
|
|
- return [
|
|
|
|
|
- [p[0] + bx1, p[1] + by1] for p in relative_bbox
|
|
|
|
|
- ]
|
|
|
|
|
-
|
|
|
|
|
- # 处理4值坐标格式 [x1, y1, x2, y2]
|
|
|
|
|
- if len(relative_bbox) >= 4:
|
|
|
|
|
- return [
|
|
|
|
|
- relative_bbox[0] + bx1,
|
|
|
|
|
- relative_bbox[1] + by1,
|
|
|
|
|
- relative_bbox[2] + bx1,
|
|
|
|
|
- relative_bbox[3] + by1
|
|
|
|
|
- ]
|
|
|
|
|
-
|
|
|
|
|
- return relative_bbox
|
|
|
|
|
|
|
+class TableCoordinateUtils:
|
|
|
|
|
+ """表格坐标转换工具类"""
|
|
|
|
|
|
|
|
@staticmethod
|
|
@staticmethod
|
|
|
def convert_ocr_to_matcher_format(
|
|
def convert_ocr_to_matcher_format(
|
|
@@ -410,8 +141,8 @@ class CoordinateUtils:
|
|
|
"""
|
|
"""
|
|
|
if not MERGER_AVAILABLE or BBoxExtractor is None:
|
|
if not MERGER_AVAILABLE or BBoxExtractor is None:
|
|
|
# 如果 merger 不可用,只添加偏移量
|
|
# 如果 merger 不可用,只添加偏移量
|
|
|
- converted_cells = CoordinateUtils.add_table_offset_to_cells(cells, table_bbox)
|
|
|
|
|
- converted_html = CoordinateUtils.add_table_offset_to_html(html, table_bbox)
|
|
|
|
|
|
|
+ converted_cells = TableCoordinateUtils.add_table_offset_to_cells(cells, table_bbox)
|
|
|
|
|
+ converted_html = TableCoordinateUtils.add_table_offset_to_html(html, table_bbox)
|
|
|
return converted_cells, converted_html
|
|
return converted_cells, converted_html
|
|
|
|
|
|
|
|
table_offset_x, table_offset_y = table_bbox[0], table_bbox[1]
|
|
table_offset_x, table_offset_y = table_bbox[0], table_bbox[1]
|
|
@@ -610,7 +341,7 @@ class CoordinateUtils:
|
|
|
return ocr_boxes
|
|
return ocr_boxes
|
|
|
|
|
|
|
|
if not MERGER_AVAILABLE or BBoxExtractor is None:
|
|
if not MERGER_AVAILABLE or BBoxExtractor is None:
|
|
|
- return CoordinateUtils.add_table_offset_to_ocr_boxes(ocr_boxes, table_bbox)
|
|
|
|
|
|
|
+ return TableCoordinateUtils.add_table_offset_to_ocr_boxes(ocr_boxes, table_bbox)
|
|
|
|
|
|
|
|
offset_x = table_bbox[0]
|
|
offset_x = table_bbox[0]
|
|
|
offset_y = table_bbox[1]
|
|
offset_y = table_bbox[1]
|
|
@@ -656,20 +387,6 @@ class CoordinateUtils:
|
|
|
return converted_boxes
|
|
return converted_boxes
|
|
|
|
|
|
|
|
@staticmethod
|
|
@staticmethod
|
|
|
- def is_poly_format(bbox: Any) -> bool:
|
|
|
|
|
- """
|
|
|
|
|
- 检测 bbox 是否为四点多边形格式
|
|
|
|
|
-
|
|
|
|
|
- 四点格式: [[x1,y1], [x2,y2], [x3,y3], [x4,y4]]
|
|
|
|
|
- 矩形格式: [x_min, y_min, x_max, y_max]
|
|
|
|
|
- """
|
|
|
|
|
- if not bbox or not isinstance(bbox, list):
|
|
|
|
|
- return False
|
|
|
|
|
- if len(bbox) != 4:
|
|
|
|
|
- return False
|
|
|
|
|
- return isinstance(bbox[0], (list, tuple))
|
|
|
|
|
-
|
|
|
|
|
- @staticmethod
|
|
|
|
|
def transform_coords_to_original(
|
|
def transform_coords_to_original(
|
|
|
element: Dict[str, Any],
|
|
element: Dict[str, Any],
|
|
|
rotate_angle: int,
|
|
rotate_angle: int,
|
|
@@ -726,7 +443,7 @@ class CoordinateUtils:
|
|
|
|
|
|
|
|
# 转换 HTML 中的 data-bbox 属性
|
|
# 转换 HTML 中的 data-bbox 属性
|
|
|
if 'html' in content and content['html']:
|
|
if 'html' in content and content['html']:
|
|
|
- content['html'] = CoordinateUtils.transform_html_data_bbox(
|
|
|
|
|
|
|
+ content['html'] = TableCoordinateUtils.transform_html_data_bbox(
|
|
|
content['html'], rotate_angle, orig_image_size
|
|
content['html'], rotate_angle, orig_image_size
|
|
|
)
|
|
)
|
|
|
|
|
|
|
@@ -736,7 +453,7 @@ class CoordinateUtils:
|
|
|
if ocr_details:
|
|
if ocr_details:
|
|
|
for detail in ocr_details:
|
|
for detail in ocr_details:
|
|
|
if 'bbox' in detail and detail['bbox']:
|
|
if 'bbox' in detail and detail['bbox']:
|
|
|
- if CoordinateUtils.is_poly_format(detail['bbox']):
|
|
|
|
|
|
|
+ if CoordinateUtils and CoordinateUtils.is_poly_format(detail['bbox']):
|
|
|
detail['bbox'] = BBoxExtractor.inverse_rotate_coordinates(
|
|
detail['bbox'] = BBoxExtractor.inverse_rotate_coordinates(
|
|
|
detail['bbox'], rotate_angle, orig_image_size
|
|
detail['bbox'], rotate_angle, orig_image_size
|
|
|
)
|
|
)
|
|
@@ -782,4 +499,3 @@ class CoordinateUtils:
|
|
|
|
|
|
|
|
pattern = r'data-bbox="(\[[^\]]+\])"'
|
|
pattern = r'data-bbox="(\[[^\]]+\])"'
|
|
|
return re.sub(pattern, replace_bbox, html)
|
|
return re.sub(pattern, replace_bbox, html)
|
|
|
-
|
|
|