소스 검색

feat(table-coordinates): add table coordinate utility module for OCR processing

- Introduced a new module `table_coordinate_utils.py` for handling table coordinate transformations, including OCR results to matcher format, inverse rotation of coordinates, and offset handling.
- Implemented methods for converting relative coordinates to absolute, adjusting HTML data-bbox attributes, and managing OCR box transformations.
- Added a new `coordinate_utils.py` module for general coordinate calculations and conversions, including IoU and bounding box manipulations.
- Updated `__init__.py` to support delayed imports for coordinate utilities, improving module dependency management.
zhch158_admin 1 개월 전
부모
커밋
71f6e7c39c
4개의 변경된 파일344개의 추가작업 그리고 332개의 파일을 삭제
  1. 0 22
      ocr_tools/universal_doc_parser/core/pdf_utils.py
  2. 26 310
      ocr_tools/universal_doc_parser/core/table_coordinate_utils.py
  3. 10 0
      ocr_utils/__init__.py
  4. 308 0
      ocr_utils/coordinate_utils.py

+ 0 - 22
ocr_tools/universal_doc_parser/core/pdf_utils.py

@@ -1,22 +0,0 @@
-"""
-PDF处理工具模块
-
-此模块已迁移到使用 ocr_utils.PDFUtils,保留此文件仅用于向后兼容。
-新代码应直接使用 ocr_utils.PDFUtils。
-"""
-import sys
-from pathlib import Path
-
-# 添加 ocr_platform 根目录到 Python 路径(用于导入 ocr_utils)
-ocr_platform_root = Path(__file__).parents[3]  # core -> universal_doc_parser -> ocr_tools -> ocr_platform -> repository.git
-if str(ocr_platform_root) not in sys.path:
-    sys.path.insert(0, str(ocr_platform_root))
-
-# 从 ocr_utils 导入 PDFUtils
-try:
-    from ocr_utils import PDFUtils
-except ImportError:
-    raise ImportError("ocr_utils.PDFUtils is required. Please ensure ocr_utils is available.")
-
-# 为了向后兼容,将 PDFUtils 导出(实际上就是 ocr_utils.PDFUtils)
-__all__ = ['PDFUtils']

+ 26 - 310
ocr_tools/universal_doc_parser/core/coordinate_utils.py → ocr_tools/universal_doc_parser/core/table_coordinate_utils.py

@@ -1,307 +1,38 @@
 """
-坐标转换工具模块
+表格坐标转换工具模块
 
-提供各种坐标转换功能:
-- 底层坐标计算(IoU、重叠比例)
-- 多边形/bbox 格式转换
-- 相对坐标 → 绝对坐标转换
-- OCR 格式转换
-- 旋转坐标逆变换
+提供表格处理相关的坐标转换功能:
+- OCR 结果转换为 TableCellMatcher 格式
+- 表格旋转坐标逆变换
+- 表格偏移量处理
 - HTML data-bbox 坐标转换
+- 元素坐标转换
+
+此模块包含特定于 universal_doc_parser 表格处理的坐标转换方法。
 """
 import re
 import json
-from typing import Dict, List, Any, Optional, Tuple, Union
-import numpy as np
+from typing import Dict, List, Any, Optional, Tuple
 from loguru import logger
 
-# 从 ocr_utils 导入 BBoxExtractor
+# 从 ocr_utils 导入通用坐标工具和 BBoxExtractor
 try:
+    from ocr_utils.coordinate_utils import CoordinateUtils
     from ocr_utils import BBoxExtractor
     MERGER_AVAILABLE = True
 except ImportError:
-    MERGER_AVAILABLE = False
-    BBoxExtractor = None
-
-# 导入 MinerU 组件(用于 IoU 计算)
-try:
-    from mineru.utils.boxbase import calculate_iou as mineru_calculate_iou
-    from mineru.utils.boxbase import calculate_overlap_area_2_minbox_area_ratio
-    MINERU_BOXBASE_AVAILABLE = True
-except ImportError:
-    MINERU_BOXBASE_AVAILABLE = False
-    mineru_calculate_iou = None
-    calculate_overlap_area_2_minbox_area_ratio = None
+    try:
+        from ocr_utils import CoordinateUtils
+        from ocr_utils import BBoxExtractor
+        MERGER_AVAILABLE = True
+    except ImportError:
+        MERGER_AVAILABLE = False
+        BBoxExtractor = None
+        CoordinateUtils = None
 
 
-class CoordinateUtils:
-    """坐标转换工具类"""
-    
-    # ==================== 底层坐标计算方法 ====================
-    
-    @staticmethod
-    def calculate_iou(bbox1: List[float], bbox2: List[float]) -> float:
-        """
-        计算两个 bbox 的 IoU(交并比)
-        
-        Args:
-            bbox1: 第一个 bbox [x1, y1, x2, y2]
-            bbox2: 第二个 bbox [x1, y1, x2, y2]
-            
-        Returns:
-            IoU 值
-        """
-        if MINERU_BOXBASE_AVAILABLE and mineru_calculate_iou is not None:
-            return mineru_calculate_iou(bbox1, bbox2)
-        
-        # 备用实现
-        x_left = max(bbox1[0], bbox2[0])
-        y_top = max(bbox1[1], bbox2[1])
-        x_right = min(bbox1[2], bbox2[2])
-        y_bottom = min(bbox1[3], bbox2[3])
-        
-        if x_right < x_left or y_bottom < y_top:
-            return 0.0
-        
-        intersection_area = (x_right - x_left) * (y_bottom - y_top)
-        bbox1_area = (bbox1[2] - bbox1[0]) * (bbox1[3] - bbox1[1])
-        bbox2_area = (bbox2[2] - bbox2[0]) * (bbox2[3] - bbox2[1])
-        
-        if bbox1_area == 0 or bbox2_area == 0:
-            return 0.0
-        
-        return intersection_area / float(bbox1_area + bbox2_area - intersection_area)
-    
-    @staticmethod
-    def calculate_overlap_ratio(bbox1: List[float], bbox2: List[float]) -> float:
-        """
-        计算重叠面积占小框面积的比例
-        
-        Args:
-            bbox1: 第一个 bbox [x1, y1, x2, y2]
-            bbox2: 第二个 bbox [x1, y1, x2, y2]
-            
-        Returns:
-            重叠比例
-        """
-        if MINERU_BOXBASE_AVAILABLE and calculate_overlap_area_2_minbox_area_ratio is not None:
-            return calculate_overlap_area_2_minbox_area_ratio(bbox1, bbox2)
-        
-        # 备用实现
-        x_left = max(bbox1[0], bbox2[0])
-        y_top = max(bbox1[1], bbox2[1])
-        x_right = min(bbox1[2], bbox2[2])
-        y_bottom = min(bbox1[3], bbox2[3])
-        
-        if x_right < x_left or y_bottom < y_top:
-            return 0.0
-        
-        intersection_area = (x_right - x_left) * (y_bottom - y_top)
-        area1 = (bbox1[2] - bbox1[0]) * (bbox1[3] - bbox1[1])
-        area2 = (bbox2[2] - bbox2[0]) * (bbox2[3] - bbox2[1])
-        min_area = min(area1, area2)
-        
-        if min_area == 0:
-            return 0.0
-        
-        return intersection_area / min_area
-    
-    @staticmethod
-    def calculate_overlap_in_bbox1_ratio(
-        bbox1: List[float], 
-        bbox2: List[float]
-    ) -> float:
-        """
-        计算 bbox1 被 bbox2 覆盖的面积比例
-        
-        Args:
-            bbox1: 第一个 bbox [x1, y1, x2, y2]
-            bbox2: 第二个 bbox [x1, y1, x2, y2]
-            
-        Returns:
-            bbox1 被覆盖的比例
-        """
-        x_left = max(bbox1[0], bbox2[0])
-        y_top = max(bbox1[1], bbox2[1])
-        x_right = min(bbox1[2], bbox2[2])
-        y_bottom = min(bbox1[3], bbox2[3])
-        
-        if x_right < x_left or y_bottom < y_top:
-            return 0.0
-        
-        intersection_area = (x_right - x_left) * (y_bottom - y_top)
-        bbox1_area = (bbox1[2] - bbox1[0]) * (bbox1[3] - bbox1[1])
-        
-        if bbox1_area == 0:
-            return 0.0
-        
-        return intersection_area / bbox1_area
-    
-    @staticmethod
-    def poly_to_bbox(poly: Union[List, None]) -> List[float]:
-        """
-        将多边形坐标转换为 bbox 格式
-        
-        Args:
-            poly: 多边形坐标,支持以下格式:
-                - [[x1,y1], [x2,y1], [x2,y2], [x1,y2]] (4个点)
-                - [x1, y1, x2, y1, x2, y2, x1, y2] (8个值)
-                - [x1, y1, x2, y2] (4个值,已是bbox)
-                
-        Returns:
-            bbox [x1, y1, x2, y2]
-        """
-        if not poly:
-            return [0, 0, 0, 0]
-        
-        # 处理嵌套列表格式 [[x1,y1], [x2,y1], ...]
-        if isinstance(poly[0], (list, tuple)):
-            xs = [p[0] for p in poly]
-            ys = [p[1] for p in poly]
-            return [min(xs), min(ys), max(xs), max(ys)]
-        
-        # 处理平面列表格式
-        if len(poly) == 4:
-            # 已经是 bbox 格式
-            return list(poly)
-        elif len(poly) >= 8:
-            # 8点格式:[x1, y1, x2, y1, x2, y2, x1, y2]
-            xs = [poly[i] for i in range(0, len(poly), 2)]
-            ys = [poly[i] for i in range(1, len(poly), 2)]
-            return [min(xs), min(ys), max(xs), max(ys)]
-        
-        return [0, 0, 0, 0]
-    
-    @staticmethod
-    def bbox_to_poly(bbox: List[float]) -> List[List[float]]:
-        """
-        将 bbox 转换为多边形坐标
-        
-        Args:
-            bbox: [x1, y1, x2, y2]
-            
-        Returns:
-            [[x1,y1], [x2,y1], [x2,y2], [x1,y2]]
-        """
-        if not bbox or len(bbox) < 4:
-            return [[0, 0], [0, 0], [0, 0], [0, 0]]
-        
-        x1, y1, x2, y2 = bbox[:4]
-        return [
-            [float(x1), float(y1)],
-            [float(x2), float(y1)],
-            [float(x2), float(y2)],
-            [float(x1), float(y2)]
-        ]
-    
-    # ==================== 图像裁剪 ====================
-    
-    @staticmethod
-    def crop_region(image: np.ndarray, bbox: List[float], padding: int = 0) -> np.ndarray:
-        """
-        裁剪图像区域
-        
-        Args:
-            image: 原始图像
-            bbox: 裁剪区域 [x1, y1, x2, y2]
-            padding: 边缘padding(像素),可以为正数(扩展裁剪区域)或负数(收缩裁剪区域)
-            
-        Returns:
-            裁剪后的图像
-        """
-        if len(bbox) < 4:
-            return image
-        
-        h, w = image.shape[:2]
-        
-        # 解析padding(支持单个值或四个值)
-        if isinstance(padding, (int, float)):
-            pad_left = pad_right = pad_top = pad_bottom = int(padding)
-        else:
-            # 假设是长度为4的元组/列表 [left, top, right, bottom]
-            if len(padding) >= 4:
-                pad_left, pad_top, pad_right, pad_bottom = [int(p) for p in padding[:4]]
-            else:
-                pad_left = pad_top = pad_right = pad_bottom = 0
-
-        x1 = max(0 - pad_left, int(bbox[0]) - pad_left)
-        y1 = max(0 - pad_top, int(bbox[1]) - pad_top)
-        x2 = min(w + pad_right, int(bbox[2]) + pad_right)
-        y2 = min(h + pad_bottom, int(bbox[3]) + pad_bottom)
-
-        # 确保坐标有效
-        x1 = max(0, x1)
-        y1 = max(0, y1)
-        x2 = min(w, x2)
-        y2 = min(h, y2)
-
-        # 检查是否有效区域
-        if x2 <= x1 or y2 <= y1:
-            return image
-        
-        return image[y1:y2, x1:x2]
-    
-    @staticmethod
-    def bbox_overlap(bbox1: List[float], bbox2: List[float]) -> bool:
-        """
-        检查两个 bbox 是否重叠
-        
-        Args:
-            bbox1: 第一个 bbox [x1, y1, x2, y2]
-            bbox2: 第二个 bbox [x1, y1, x2, y2]
-            
-        Returns:
-            是否重叠
-        """
-        if len(bbox1) < 4 or len(bbox2) < 4:
-            return False
-        
-        x1_1, y1_1, x2_1, y2_1 = bbox1[:4]
-        x1_2, y1_2, x2_2, y2_2 = bbox2[:4]
-        
-        if x2_1 < x1_2 or x2_2 < x1_1:
-            return False
-        if y2_1 < y1_2 or y2_2 < y1_1:
-            return False
-        
-        return True
-    
-    @staticmethod
-    def convert_to_absolute_coords(
-        relative_bbox: List, 
-        region_bbox: List[float]
-    ) -> List:
-        """
-        将相对坐标转换为绝对坐标
-        
-        Args:
-            relative_bbox: 相对坐标
-            region_bbox: 区域的绝对坐标 [x1, y1, x2, y2]
-            
-        Returns:
-            绝对坐标
-        """
-        if not relative_bbox or len(region_bbox) < 4:
-            return relative_bbox
-        
-        bx1, by1 = region_bbox[0], region_bbox[1]
-        
-        # 处理4点坐标格式 [[x1,y1], [x2,y2], [x3,y3], [x4,y4]]
-        if isinstance(relative_bbox[0], (list, tuple)):
-            return [
-                [p[0] + bx1, p[1] + by1] for p in relative_bbox
-            ]
-        
-        # 处理4值坐标格式 [x1, y1, x2, y2]
-        if len(relative_bbox) >= 4:
-            return [
-                relative_bbox[0] + bx1,
-                relative_bbox[1] + by1,
-                relative_bbox[2] + bx1,
-                relative_bbox[3] + by1
-            ]
-        
-        return relative_bbox
+class TableCoordinateUtils:
+    """表格坐标转换工具类"""
     
     @staticmethod
     def convert_ocr_to_matcher_format(
@@ -410,8 +141,8 @@ class CoordinateUtils:
         """
         if not MERGER_AVAILABLE or BBoxExtractor is None:
             # 如果 merger 不可用,只添加偏移量
-            converted_cells = CoordinateUtils.add_table_offset_to_cells(cells, table_bbox)
-            converted_html = CoordinateUtils.add_table_offset_to_html(html, table_bbox)
+            converted_cells = TableCoordinateUtils.add_table_offset_to_cells(cells, table_bbox)
+            converted_html = TableCoordinateUtils.add_table_offset_to_html(html, table_bbox)
             return converted_cells, converted_html
         
         table_offset_x, table_offset_y = table_bbox[0], table_bbox[1]
@@ -610,7 +341,7 @@ class CoordinateUtils:
             return ocr_boxes
         
         if not MERGER_AVAILABLE or BBoxExtractor is None:
-            return CoordinateUtils.add_table_offset_to_ocr_boxes(ocr_boxes, table_bbox)
+            return TableCoordinateUtils.add_table_offset_to_ocr_boxes(ocr_boxes, table_bbox)
         
         offset_x = table_bbox[0]
         offset_y = table_bbox[1]
@@ -656,20 +387,6 @@ class CoordinateUtils:
         return converted_boxes
     
     @staticmethod
-    def is_poly_format(bbox: Any) -> bool:
-        """
-        检测 bbox 是否为四点多边形格式
-        
-        四点格式: [[x1,y1], [x2,y2], [x3,y3], [x4,y4]]
-        矩形格式: [x_min, y_min, x_max, y_max]
-        """
-        if not bbox or not isinstance(bbox, list):
-            return False
-        if len(bbox) != 4:
-            return False
-        return isinstance(bbox[0], (list, tuple))
-    
-    @staticmethod
     def transform_coords_to_original(
         element: Dict[str, Any],
         rotate_angle: int,
@@ -726,7 +443,7 @@ class CoordinateUtils:
             
             # 转换 HTML 中的 data-bbox 属性
             if 'html' in content and content['html']:
-                content['html'] = CoordinateUtils.transform_html_data_bbox(
+                content['html'] = TableCoordinateUtils.transform_html_data_bbox(
                     content['html'], rotate_angle, orig_image_size
                 )
         
@@ -736,7 +453,7 @@ class CoordinateUtils:
             if ocr_details:
                 for detail in ocr_details:
                     if 'bbox' in detail and detail['bbox']:
-                        if CoordinateUtils.is_poly_format(detail['bbox']):
+                        if CoordinateUtils and CoordinateUtils.is_poly_format(detail['bbox']):
                             detail['bbox'] = BBoxExtractor.inverse_rotate_coordinates(
                                 detail['bbox'], rotate_angle, orig_image_size
                             )
@@ -782,4 +499,3 @@ class CoordinateUtils:
         
         pattern = r'data-bbox="(\[[^\]]+\])"'
         return re.sub(pattern, replace_bbox, html)
-

+ 10 - 0
ocr_utils/__init__.py

@@ -56,6 +56,8 @@ from .number_utils import (
     parse_number,
     normalize_text_number
 )
+# 坐标工具使用延迟导入,避免循环依赖
+# from .coordinate_utils import CoordinateUtils  # 已移除,改为延迟导入
 
 __all__ = [
     # PDF 工具
@@ -112,6 +114,8 @@ __all__ = [
     # 数字解析工具
     'parse_number',
     'normalize_text_number',
+    # 坐标工具
+    'CoordinateUtils',
 ]
 
 
@@ -132,6 +136,12 @@ def __getattr__(name: str):
         """
         from .bbox_utils import BBoxExtractor
         return BBoxExtractor
+    elif name == 'CoordinateUtils':
+        """
+        延迟导入 CoordinateUtils,只有在实际使用时才导入。
+        """
+        from .coordinate_utils import CoordinateUtils
+        return CoordinateUtils
     raise AttributeError(f"module '{__name__}' has no attribute '{name}'")
 
 __version__ = "1.0.0"

+ 308 - 0
ocr_utils/coordinate_utils.py

@@ -0,0 +1,308 @@
+"""
+通用坐标转换工具模块
+
+提供通用的坐标计算和转换功能:
+- 底层坐标计算(IoU、重叠比例)
+- 多边形/bbox 格式转换
+- 相对坐标 → 绝对坐标转换
+- 图像裁剪
+- 格式检测
+
+此模块从 universal_doc_parser 中提取,供多个模块共享使用。
+"""
+from typing import List, Tuple, Union, Any
+import numpy as np
+
+# 导入 MinerU 组件(用于 IoU 计算优化)
+try:
+    from mineru.utils.boxbase import calculate_iou as mineru_calculate_iou
+    from mineru.utils.boxbase import calculate_overlap_area_2_minbox_area_ratio
+    MINERU_BOXBASE_AVAILABLE = True
+except ImportError:
+    MINERU_BOXBASE_AVAILABLE = False
+    mineru_calculate_iou = None
+    calculate_overlap_area_2_minbox_area_ratio = None
+
+
+class CoordinateUtils:
+    """通用坐标转换工具类"""
+    
+    # ==================== 底层坐标计算方法 ====================
+    
+    @staticmethod
+    def calculate_iou(bbox1: List[float], bbox2: List[float]) -> float:
+        """
+        计算两个 bbox 的 IoU(交并比)
+        
+        Args:
+            bbox1: 第一个 bbox [x1, y1, x2, y2]
+            bbox2: 第二个 bbox [x1, y1, x2, y2]
+            
+        Returns:
+            IoU 值
+        """
+        if MINERU_BOXBASE_AVAILABLE and mineru_calculate_iou is not None:
+            return mineru_calculate_iou(bbox1, bbox2)
+        
+        # 备用实现
+        x_left = max(bbox1[0], bbox2[0])
+        y_top = max(bbox1[1], bbox2[1])
+        x_right = min(bbox1[2], bbox2[2])
+        y_bottom = min(bbox1[3], bbox2[3])
+        
+        if x_right < x_left or y_bottom < y_top:
+            return 0.0
+        
+        intersection_area = (x_right - x_left) * (y_bottom - y_top)
+        bbox1_area = (bbox1[2] - bbox1[0]) * (bbox1[3] - bbox1[1])
+        bbox2_area = (bbox2[2] - bbox2[0]) * (bbox2[3] - bbox2[1])
+        
+        if bbox1_area == 0 or bbox2_area == 0:
+            return 0.0
+        
+        return intersection_area / float(bbox1_area + bbox2_area - intersection_area)
+    
+    @staticmethod
+    def calculate_overlap_ratio(bbox1: List[float], bbox2: List[float]) -> float:
+        """
+        计算重叠面积占小框面积的比例
+        
+        Args:
+            bbox1: 第一个 bbox [x1, y1, x2, y2]
+            bbox2: 第二个 bbox [x1, y1, x2, y2]
+            
+        Returns:
+            重叠比例
+        """
+        if MINERU_BOXBASE_AVAILABLE and calculate_overlap_area_2_minbox_area_ratio is not None:
+            return calculate_overlap_area_2_minbox_area_ratio(bbox1, bbox2)
+        
+        # 备用实现
+        x_left = max(bbox1[0], bbox2[0])
+        y_top = max(bbox1[1], bbox2[1])
+        x_right = min(bbox1[2], bbox2[2])
+        y_bottom = min(bbox1[3], bbox2[3])
+        
+        if x_right < x_left or y_bottom < y_top:
+            return 0.0
+        
+        intersection_area = (x_right - x_left) * (y_bottom - y_top)
+        area1 = (bbox1[2] - bbox1[0]) * (bbox1[3] - bbox1[1])
+        area2 = (bbox2[2] - bbox2[0]) * (bbox2[3] - bbox2[1])
+        min_area = min(area1, area2)
+        
+        if min_area == 0:
+            return 0.0
+        
+        return intersection_area / min_area
+    
+    @staticmethod
+    def calculate_overlap_in_bbox1_ratio(
+        bbox1: List[float], 
+        bbox2: List[float]
+    ) -> float:
+        """
+        计算 bbox1 被 bbox2 覆盖的面积比例
+        
+        Args:
+            bbox1: 第一个 bbox [x1, y1, x2, y2]
+            bbox2: 第二个 bbox [x1, y1, x2, y2]
+            
+        Returns:
+            bbox1 被覆盖的比例
+        """
+        x_left = max(bbox1[0], bbox2[0])
+        y_top = max(bbox1[1], bbox2[1])
+        x_right = min(bbox1[2], bbox2[2])
+        y_bottom = min(bbox1[3], bbox2[3])
+        
+        if x_right < x_left or y_bottom < y_top:
+            return 0.0
+        
+        intersection_area = (x_right - x_left) * (y_bottom - y_top)
+        bbox1_area = (bbox1[2] - bbox1[0]) * (bbox1[3] - bbox1[1])
+        
+        if bbox1_area == 0:
+            return 0.0
+        
+        return intersection_area / bbox1_area
+    
+    @staticmethod
+    def poly_to_bbox(poly: Union[List, None]) -> List[float]:
+        """
+        将多边形坐标转换为 bbox 格式
+        
+        Args:
+            poly: 多边形坐标,支持以下格式:
+                - [[x1,y1], [x2,y1], [x2,y2], [x1,y2]] (4个点)
+                - [x1, y1, x2, y1, x2, y2, x1, y2] (8个值)
+                - [x1, y1, x2, y2] (4个值,已是bbox)
+                
+        Returns:
+            bbox [x1, y1, x2, y2]
+        """
+        if not poly:
+            return [0, 0, 0, 0]
+        
+        # 处理嵌套列表格式 [[x1,y1], [x2,y1], ...]
+        if isinstance(poly[0], (list, tuple)):
+            xs = [p[0] for p in poly]
+            ys = [p[1] for p in poly]
+            return [min(xs), min(ys), max(xs), max(ys)]
+        
+        # 处理平面列表格式
+        if len(poly) == 4:
+            # 已经是 bbox 格式
+            return list(poly)
+        elif len(poly) >= 8:
+            # 8点格式:[x1, y1, x2, y1, x2, y2, x1, y2]
+            xs = [poly[i] for i in range(0, len(poly), 2)]
+            ys = [poly[i] for i in range(1, len(poly), 2)]
+            return [min(xs), min(ys), max(xs), max(ys)]
+        
+        return [0, 0, 0, 0]
+    
+    @staticmethod
+    def bbox_to_poly(bbox: List[float]) -> List[List[float]]:
+        """
+        将 bbox 转换为多边形坐标
+        
+        Args:
+            bbox: [x1, y1, x2, y2]
+            
+        Returns:
+            [[x1,y1], [x2,y1], [x2,y2], [x1,y2]]
+        """
+        if not bbox or len(bbox) < 4:
+            return [[0, 0], [0, 0], [0, 0], [0, 0]]
+        
+        x1, y1, x2, y2 = bbox[:4]
+        return [
+            [float(x1), float(y1)],
+            [float(x2), float(y1)],
+            [float(x2), float(y2)],
+            [float(x1), float(y2)]
+        ]
+    
+    # ==================== 图像裁剪 ====================
+    
+    @staticmethod
+    def crop_region(image: np.ndarray, bbox: List[float], padding: int = 0) -> np.ndarray:
+        """
+        裁剪图像区域
+        
+        Args:
+            image: 原始图像
+            bbox: 裁剪区域 [x1, y1, x2, y2]
+            padding: 边缘padding(像素),可以为正数(扩展裁剪区域)或负数(收缩裁剪区域)
+            
+        Returns:
+            裁剪后的图像
+        """
+        if len(bbox) < 4:
+            return image
+        
+        h, w = image.shape[:2]
+        
+        # 解析padding(支持单个值或四个值)
+        if isinstance(padding, (int, float)):
+            pad_left = pad_right = pad_top = pad_bottom = int(padding)
+        else:
+            # 假设是长度为4的元组/列表 [left, top, right, bottom]
+            if len(padding) >= 4:
+                pad_left, pad_top, pad_right, pad_bottom = [int(p) for p in padding[:4]]
+            else:
+                pad_left = pad_top = pad_right = pad_bottom = 0
+
+        x1 = max(0 - pad_left, int(bbox[0]) - pad_left)
+        y1 = max(0 - pad_top, int(bbox[1]) - pad_top)
+        x2 = min(w + pad_right, int(bbox[2]) + pad_right)
+        y2 = min(h + pad_bottom, int(bbox[3]) + pad_bottom)
+
+        # 确保坐标有效
+        x1 = max(0, x1)
+        y1 = max(0, y1)
+        x2 = min(w, x2)
+        y2 = min(h, y2)
+
+        # 检查是否有效区域
+        if x2 <= x1 or y2 <= y1:
+            return image
+        
+        return image[y1:y2, x1:x2]
+    
+    @staticmethod
+    def bbox_overlap(bbox1: List[float], bbox2: List[float]) -> bool:
+        """
+        检查两个 bbox 是否重叠
+        
+        Args:
+            bbox1: 第一个 bbox [x1, y1, x2, y2]
+            bbox2: 第二个 bbox [x1, y1, x2, y2]
+            
+        Returns:
+            是否重叠
+        """
+        if len(bbox1) < 4 or len(bbox2) < 4:
+            return False
+        
+        x1_1, y1_1, x2_1, y2_1 = bbox1[:4]
+        x1_2, y1_2, x2_2, y2_2 = bbox2[:4]
+        
+        if x2_1 < x1_2 or x2_2 < x1_1:
+            return False
+        if y2_1 < y1_2 or y2_2 < y1_1:
+            return False
+        
+        return True
+    
+    @staticmethod
+    def convert_to_absolute_coords(
+        relative_bbox: List, 
+        region_bbox: List[float]
+    ) -> List:
+        """
+        将相对坐标转换为绝对坐标
+        
+        Args:
+            relative_bbox: 相对坐标
+            region_bbox: 区域的绝对坐标 [x1, y1, x2, y2]
+            
+        Returns:
+            绝对坐标
+        """
+        if not relative_bbox or len(region_bbox) < 4:
+            return relative_bbox
+        
+        bx1, by1 = region_bbox[0], region_bbox[1]
+        
+        # 处理4点坐标格式 [[x1,y1], [x2,y2], [x3,y3], [x4,y4]]
+        if isinstance(relative_bbox[0], (list, tuple)):
+            return [
+                [p[0] + bx1, p[1] + by1] for p in relative_bbox
+            ]
+        
+        # 处理4值坐标格式 [x1, y1, x2, y2]
+        if len(relative_bbox) >= 4:
+            return [
+                relative_bbox[0] + bx1,
+                relative_bbox[1] + by1,
+                relative_bbox[2] + bx1,
+                relative_bbox[3] + by1
+            ]
+        
+        return relative_bbox
+    
+    @staticmethod
+    def is_poly_format(bbox: Any) -> bool:
+        """
+        检测 bbox 是否为四点多边形格式
+        
+        四点格式: [[x1,y1], [x2,y2], [x3,y3], [x4,y4]]
+        矩形格式: [x_min, y_min, x_max, y_max]
+        """
+        if not bbox or not isinstance(bbox, list):
+            return False
+        if len(bbox) != 4:
+            return False
+        return isinstance(bbox[0], (list, tuple))