5 달 전 · 71f6e7c39c
--- a/ocr_tools/universal_doc_parser/core/pdf_utils.py
+++ b/ocr_tools/universal_doc_parser/core/pdf_utils.py
@@ -1,22 +0,0 @@
 
				-"""
			
 
				-PDF处理工具模块
			
 
				-
			
 
				-此模块已迁移到使用 ocr_utils.PDFUtils，保留此文件仅用于向后兼容。
			
 
				-新代码应直接使用 ocr_utils.PDFUtils。
			
 
				-"""
			
 
				-import sys
			
 
				-from pathlib import Path
			
 
				-
			
 
				-# 添加 ocr_platform 根目录到 Python 路径（用于导入 ocr_utils）
			
 
				-ocr_platform_root = Path(__file__).parents[3]  # core -> universal_doc_parser -> ocr_tools -> ocr_platform -> repository.git
			
 
				-if str(ocr_platform_root) not in sys.path:
			
 
				-    sys.path.insert(0, str(ocr_platform_root))
			
 
				-
			
 
				-# 从 ocr_utils 导入 PDFUtils
			
 
				-try:
			
 
				-    from ocr_utils import PDFUtils
			
 
				-except ImportError:
			
 
				-    raise ImportError("ocr_utils.PDFUtils is required. Please ensure ocr_utils is available.")
			
 
				-
			
 
				-# 为了向后兼容，将 PDFUtils 导出（实际上就是 ocr_utils.PDFUtils）
			
 
				-__all__ = ['PDFUtils']
			
--- a/ocr_tools/universal_doc_parser/core/table_coordinate_utils.py
+++ b/ocr_tools/universal_doc_parser/core/table_coordinate_utils.py
@@ -1,307 +1,38 @@
 
				 """
			
 
				-坐标转换工具模块
			
 
				+表格坐标转换工具模块
			
 
				 
			
 
				-提供各种坐标转换功能：
			
 
				-- 底层坐标计算（IoU、重叠比例）
			
 
				-- 多边形/bbox 格式转换
			
 
				-- 相对坐标 → 绝对坐标转换
			
 
				-- OCR 格式转换
			
 
				-- 旋转坐标逆变换
			
 
				+提供表格处理相关的坐标转换功能：
			
 
				+- OCR 结果转换为 TableCellMatcher 格式
			
 
				+- 表格旋转坐标逆变换
			
 
				+- 表格偏移量处理
			
 
				 - HTML data-bbox 坐标转换
			
 
				+- 元素坐标转换
			
 
				+
			
 
				+此模块包含特定于 universal_doc_parser 表格处理的坐标转换方法。
			
 
				 """
			
 
				 import re
			
 
				 import json
			
 
				-from typing import Dict, List, Any, Optional, Tuple, Union
			
 
				-import numpy as np
			
 
				+from typing import Dict, List, Any, Optional, Tuple
			
 
				 from loguru import logger
			
 
				 
			
 
				-# 从 ocr_utils 导入 BBoxExtractor
			
 
				+# 从 ocr_utils 导入通用坐标工具和 BBoxExtractor
			
 
				 try:
			
 
				+    from ocr_utils.coordinate_utils import CoordinateUtils
			
 
				     from ocr_utils import BBoxExtractor
			
 
				     MERGER_AVAILABLE = True
			
 
				 except ImportError:
			
 
				-    MERGER_AVAILABLE = False
			
 
				-    BBoxExtractor = None
			
 
				-
			
 
				-# 导入 MinerU 组件（用于 IoU 计算）
			
 
				-try:
			
 
				-    from mineru.utils.boxbase import calculate_iou as mineru_calculate_iou
			
 
				-    from mineru.utils.boxbase import calculate_overlap_area_2_minbox_area_ratio
			
 
				-    MINERU_BOXBASE_AVAILABLE = True
			
 
				-except ImportError:
			
 
				-    MINERU_BOXBASE_AVAILABLE = False
			
 
				-    mineru_calculate_iou = None
			
 
				-    calculate_overlap_area_2_minbox_area_ratio = None
			
 
				+    try:
			
 
				+        from ocr_utils import CoordinateUtils
			
 
				+        from ocr_utils import BBoxExtractor
			
 
				+        MERGER_AVAILABLE = True
			
 
				+    except ImportError:
			
 
				+        MERGER_AVAILABLE = False
			
 
				+        BBoxExtractor = None
			
 
				+        CoordinateUtils = None
			
 
				 
			
 
				 
			
 
				-class CoordinateUtils:
			
 
				-    """坐标转换工具类"""
			
 
				-    
			
 
				-    # ==================== 底层坐标计算方法 ====================
			
 
				-    
			
 
				-    @staticmethod
			
 
				-    def calculate_iou(bbox1: List[float], bbox2: List[float]) -> float:
			
 
				-        """
			
 
				-        计算两个 bbox 的 IoU（交并比）
			
 
				-        
			
 
				-        Args:
			
 
				-            bbox1: 第一个 bbox [x1, y1, x2, y2]
			
 
				-            bbox2: 第二个 bbox [x1, y1, x2, y2]
			
 
				-            
			
 
				-        Returns:
			
 
				-            IoU 值
			
 
				-        """
			
 
				-        if MINERU_BOXBASE_AVAILABLE and mineru_calculate_iou is not None:
			
 
				-            return mineru_calculate_iou(bbox1, bbox2)
			
 
				-        
			
 
				-        # 备用实现
			
 
				-        x_left = max(bbox1[0], bbox2[0])
			
 
				-        y_top = max(bbox1[1], bbox2[1])
			
 
				-        x_right = min(bbox1[2], bbox2[2])
			
 
				-        y_bottom = min(bbox1[3], bbox2[3])
			
 
				-        
			
 
				-        if x_right < x_left or y_bottom < y_top:
			
 
				-            return 0.0
			
 
				-        
			
 
				-        intersection_area = (x_right - x_left) * (y_bottom - y_top)
			
 
				-        bbox1_area = (bbox1[2] - bbox1[0]) * (bbox1[3] - bbox1[1])
			
 
				-        bbox2_area = (bbox2[2] - bbox2[0]) * (bbox2[3] - bbox2[1])
			
 
				-        
			
 
				-        if bbox1_area == 0 or bbox2_area == 0:
			
 
				-            return 0.0
			
 
				-        
			
 
				-        return intersection_area / float(bbox1_area + bbox2_area - intersection_area)
			
 
				-    
			
 
				-    @staticmethod
			
 
				-    def calculate_overlap_ratio(bbox1: List[float], bbox2: List[float]) -> float:
			
 
				-        """
			
 
				-        计算重叠面积占小框面积的比例
			
 
				-        
			
 
				-        Args:
			
 
				-            bbox1: 第一个 bbox [x1, y1, x2, y2]
			
 
				-            bbox2: 第二个 bbox [x1, y1, x2, y2]
			
 
				-            
			
 
				-        Returns:
			
 
				-            重叠比例
			
 
				-        """
			
 
				-        if MINERU_BOXBASE_AVAILABLE and calculate_overlap_area_2_minbox_area_ratio is not None:
			
 
				-            return calculate_overlap_area_2_minbox_area_ratio(bbox1, bbox2)
			
 
				-        
			
 
				-        # 备用实现
			
 
				-        x_left = max(bbox1[0], bbox2[0])
			
 
				-        y_top = max(bbox1[1], bbox2[1])
			
 
				-        x_right = min(bbox1[2], bbox2[2])
			
 
				-        y_bottom = min(bbox1[3], bbox2[3])
			
 
				-        
			
 
				-        if x_right < x_left or y_bottom < y_top:
			
 
				-            return 0.0
			
 
				-        
			
 
				-        intersection_area = (x_right - x_left) * (y_bottom - y_top)
			
 
				-        area1 = (bbox1[2] - bbox1[0]) * (bbox1[3] - bbox1[1])
			
 
				-        area2 = (bbox2[2] - bbox2[0]) * (bbox2[3] - bbox2[1])
			
 
				-        min_area = min(area1, area2)
			
 
				-        
			
 
				-        if min_area == 0:
			
 
				-            return 0.0
			
 
				-        
			
 
				-        return intersection_area / min_area
			
 
				-    
			
 
				-    @staticmethod
			
 
				-    def calculate_overlap_in_bbox1_ratio(
			
 
				-        bbox1: List[float], 
			
 
				-        bbox2: List[float]
			
 
				-    ) -> float:
			
 
				-        """
			
 
				-        计算 bbox1 被 bbox2 覆盖的面积比例
			
 
				-        
			
 
				-        Args:
			
 
				-            bbox1: 第一个 bbox [x1, y1, x2, y2]
			
 
				-            bbox2: 第二个 bbox [x1, y1, x2, y2]
			
 
				-            
			
 
				-        Returns:
			
 
				-            bbox1 被覆盖的比例
			
 
				-        """
			
 
				-        x_left = max(bbox1[0], bbox2[0])
			
 
				-        y_top = max(bbox1[1], bbox2[1])
			
 
				-        x_right = min(bbox1[2], bbox2[2])
			
 
				-        y_bottom = min(bbox1[3], bbox2[3])
			
 
				-        
			
 
				-        if x_right < x_left or y_bottom < y_top:
			
 
				-            return 0.0
			
 
				-        
			
 
				-        intersection_area = (x_right - x_left) * (y_bottom - y_top)
			
 
				-        bbox1_area = (bbox1[2] - bbox1[0]) * (bbox1[3] - bbox1[1])
			
 
				-        
			
 
				-        if bbox1_area == 0:
			
 
				-            return 0.0
			
 
				-        
			
 
				-        return intersection_area / bbox1_area
			
 
				-    
			
 
				-    @staticmethod
			
 
				-    def poly_to_bbox(poly: Union[List, None]) -> List[float]:
			
 
				-        """
			
 
				-        将多边形坐标转换为 bbox 格式
			
 
				-        
			
 
				-        Args:
			
 
				-            poly: 多边形坐标，支持以下格式：
			
 
				-                - [[x1,y1], [x2,y1], [x2,y2], [x1,y2]] (4个点)
			
 
				-                - [x1, y1, x2, y1, x2, y2, x1, y2] (8个值)
			
 
				-                - [x1, y1, x2, y2] (4个值，已是bbox)
			
 
				-                
			
 
				-        Returns:
			
 
				-            bbox [x1, y1, x2, y2]
			
 
				-        """
			
 
				-        if not poly:
			
 
				-            return [0, 0, 0, 0]
			
 
				-        
			
 
				-        # 处理嵌套列表格式 [[x1,y1], [x2,y1], ...]
			
 
				-        if isinstance(poly[0], (list, tuple)):
			
 
				-            xs = [p[0] for p in poly]
			
 
				-            ys = [p[1] for p in poly]
			
 
				-            return [min(xs), min(ys), max(xs), max(ys)]
			
 
				-        
			
 
				-        # 处理平面列表格式
			
 
				-        if len(poly) == 4:
			
 
				-            # 已经是 bbox 格式
			
 
				-            return list(poly)
			
 
				-        elif len(poly) >= 8:
			
 
				-            # 8点格式：[x1, y1, x2, y1, x2, y2, x1, y2]
			
 
				-            xs = [poly[i] for i in range(0, len(poly), 2)]
			
 
				-            ys = [poly[i] for i in range(1, len(poly), 2)]
			
 
				-            return [min(xs), min(ys), max(xs), max(ys)]
			
 
				-        
			
 
				-        return [0, 0, 0, 0]
			
 
				-    
			
 
				-    @staticmethod
			
 
				-    def bbox_to_poly(bbox: List[float]) -> List[List[float]]:
			
 
				-        """
			
 
				-        将 bbox 转换为多边形坐标
			
 
				-        
			
 
				-        Args:
			
 
				-            bbox: [x1, y1, x2, y2]
			
 
				-            
			
 
				-        Returns:
			
 
				-            [[x1,y1], [x2,y1], [x2,y2], [x1,y2]]
			
 
				-        """
			
 
				-        if not bbox or len(bbox) < 4:
			
 
				-            return [[0, 0], [0, 0], [0, 0], [0, 0]]
			
 
				-        
			
 
				-        x1, y1, x2, y2 = bbox[:4]
			
 
				-        return [
			
 
				-            [float(x1), float(y1)],
			
 
				-            [float(x2), float(y1)],
			
 
				-            [float(x2), float(y2)],
			
 
				-            [float(x1), float(y2)]
			
 
				-        ]
			
 
				-    
			
 
				-    # ==================== 图像裁剪 ====================
			
 
				-    
			
 
				-    @staticmethod
			
 
				-    def crop_region(image: np.ndarray, bbox: List[float], padding: int = 0) -> np.ndarray:
			
 
				-        """
			
 
				-        裁剪图像区域
			
 
				-        
			
 
				-        Args:
			
 
				-            image: 原始图像
			
 
				-            bbox: 裁剪区域 [x1, y1, x2, y2]
			
 
				-            padding: 边缘padding（像素），可以为正数（扩展裁剪区域）或负数（收缩裁剪区域）
			
 
				-            
			
 
				-        Returns:
			
 
				-            裁剪后的图像
			
 
				-        """
			
 
				-        if len(bbox) < 4:
			
 
				-            return image
			
 
				-        
			
 
				-        h, w = image.shape[:2]
			
 
				-        
			
 
				-        # 解析padding（支持单个值或四个值）
			
 
				-        if isinstance(padding, (int, float)):
			
 
				-            pad_left = pad_right = pad_top = pad_bottom = int(padding)
			
 
				-        else:
			
 
				-            # 假设是长度为4的元组/列表 [left, top, right, bottom]
			
 
				-            if len(padding) >= 4:
			
 
				-                pad_left, pad_top, pad_right, pad_bottom = [int(p) for p in padding[:4]]
			
 
				-            else:
			
 
				-                pad_left = pad_top = pad_right = pad_bottom = 0
			
 
				-
			
 
				-        x1 = max(0 - pad_left, int(bbox[0]) - pad_left)
			
 
				-        y1 = max(0 - pad_top, int(bbox[1]) - pad_top)
			
 
				-        x2 = min(w + pad_right, int(bbox[2]) + pad_right)
			
 
				-        y2 = min(h + pad_bottom, int(bbox[3]) + pad_bottom)
			
 
				-
			
 
				-        # 确保坐标有效
			
 
				-        x1 = max(0, x1)
			
 
				-        y1 = max(0, y1)
			
 
				-        x2 = min(w, x2)
			
 
				-        y2 = min(h, y2)
			
 
				-
			
 
				-        # 检查是否有效区域
			
 
				-        if x2 <= x1 or y2 <= y1:
			
 
				-            return image
			
 
				-        
			
 
				-        return image[y1:y2, x1:x2]
			
 
				-    
			
 
				-    @staticmethod
			
 
				-    def bbox_overlap(bbox1: List[float], bbox2: List[float]) -> bool:
			
 
				-        """
			
 
				-        检查两个 bbox 是否重叠
			
 
				-        
			
 
				-        Args:
			
 
				-            bbox1: 第一个 bbox [x1, y1, x2, y2]
			
 
				-            bbox2: 第二个 bbox [x1, y1, x2, y2]
			
 
				-            
			
 
				-        Returns:
			
 
				-            是否重叠
			
 
				-        """
			
 
				-        if len(bbox1) < 4 or len(bbox2) < 4:
			
 
				-            return False
			
 
				-        
			
 
				-        x1_1, y1_1, x2_1, y2_1 = bbox1[:4]
			
 
				-        x1_2, y1_2, x2_2, y2_2 = bbox2[:4]
			
 
				-        
			
 
				-        if x2_1 < x1_2 or x2_2 < x1_1:
			
 
				-            return False
			
 
				-        if y2_1 < y1_2 or y2_2 < y1_1:
			
 
				-            return False
			
 
				-        
			
 
				-        return True
			
 
				-    
			
 
				-    @staticmethod
			
 
				-    def convert_to_absolute_coords(
			
 
				-        relative_bbox: List, 
			
 
				-        region_bbox: List[float]
			
 
				-    ) -> List:
			
 
				-        """
			
 
				-        将相对坐标转换为绝对坐标
			
 
				-        
			
 
				-        Args:
			
 
				-            relative_bbox: 相对坐标
			
 
				-            region_bbox: 区域的绝对坐标 [x1, y1, x2, y2]
			
 
				-            
			
 
				-        Returns:
			
 
				-            绝对坐标
			
 
				-        """
			
 
				-        if not relative_bbox or len(region_bbox) < 4:
			
 
				-            return relative_bbox
			
 
				-        
			
 
				-        bx1, by1 = region_bbox[0], region_bbox[1]
			
 
				-        
			
 
				-        # 处理4点坐标格式 [[x1,y1], [x2,y2], [x3,y3], [x4,y4]]
			
 
				-        if isinstance(relative_bbox[0], (list, tuple)):
			
 
				-            return [
			
 
				-                [p[0] + bx1, p[1] + by1] for p in relative_bbox
			
 
				-            ]
			
 
				-        
			
 
				-        # 处理4值坐标格式 [x1, y1, x2, y2]
			
 
				-        if len(relative_bbox) >= 4:
			
 
				-            return [
			
 
				-                relative_bbox[0] + bx1,
			
 
				-                relative_bbox[1] + by1,
			
 
				-                relative_bbox[2] + bx1,
			
 
				-                relative_bbox[3] + by1
			
 
				-            ]
			
 
				-        
			
 
				-        return relative_bbox
			
 
				+class TableCoordinateUtils:
			
 
				+    """表格坐标转换工具类"""
			
 
				     
			
 
				     @staticmethod
			
 
				     def convert_ocr_to_matcher_format(
			
@@ -410,8 +141,8 @@ class CoordinateUtils:
 
				         """
			
 
				         if not MERGER_AVAILABLE or BBoxExtractor is None:
			
 
				             # 如果 merger 不可用，只添加偏移量
			
 
				-            converted_cells = CoordinateUtils.add_table_offset_to_cells(cells, table_bbox)
			
 
				-            converted_html = CoordinateUtils.add_table_offset_to_html(html, table_bbox)
			
 
				+            converted_cells = TableCoordinateUtils.add_table_offset_to_cells(cells, table_bbox)
			
 
				+            converted_html = TableCoordinateUtils.add_table_offset_to_html(html, table_bbox)
			
 
				             return converted_cells, converted_html
			
 
				         
			
 
				         table_offset_x, table_offset_y = table_bbox[0], table_bbox[1]
			
@@ -610,7 +341,7 @@ class CoordinateUtils:
 
				             return ocr_boxes
			
 
				         
			
 
				         if not MERGER_AVAILABLE or BBoxExtractor is None:
			
 
				-            return CoordinateUtils.add_table_offset_to_ocr_boxes(ocr_boxes, table_bbox)
			
 
				+            return TableCoordinateUtils.add_table_offset_to_ocr_boxes(ocr_boxes, table_bbox)
			
 
				         
			
 
				         offset_x = table_bbox[0]
			
 
				         offset_y = table_bbox[1]
			
@@ -656,20 +387,6 @@ class CoordinateUtils:
 
				         return converted_boxes
			
 
				     
			
 
				     @staticmethod
			
 
				-    def is_poly_format(bbox: Any) -> bool:
			
 
				-        """
			
 
				-        检测 bbox 是否为四点多边形格式
			
 
				-        
			
 
				-        四点格式: [[x1,y1], [x2,y2], [x3,y3], [x4,y4]]
			
 
				-        矩形格式: [x_min, y_min, x_max, y_max]
			
 
				-        """
			
 
				-        if not bbox or not isinstance(bbox, list):
			
 
				-            return False
			
 
				-        if len(bbox) != 4:
			
 
				-            return False
			
 
				-        return isinstance(bbox[0], (list, tuple))
			
 
				-    
			
 
				-    @staticmethod
			
 
				     def transform_coords_to_original(
			
 
				         element: Dict[str, Any],
			
 
				         rotate_angle: int,
			
@@ -726,7 +443,7 @@ class CoordinateUtils:
 
				             
			
 
				             # 转换 HTML 中的 data-bbox 属性
			
 
				             if 'html' in content and content['html']:
			
 
				-                content['html'] = CoordinateUtils.transform_html_data_bbox(
			
 
				+                content['html'] = TableCoordinateUtils.transform_html_data_bbox(
			
 
				                     content['html'], rotate_angle, orig_image_size
			
 
				                 )
			
 
				         
			
@@ -736,7 +453,7 @@ class CoordinateUtils:
 
				             if ocr_details:
			
 
				                 for detail in ocr_details:
			
 
				                     if 'bbox' in detail and detail['bbox']:
			
 
				-                        if CoordinateUtils.is_poly_format(detail['bbox']):
			
 
				+                        if CoordinateUtils and CoordinateUtils.is_poly_format(detail['bbox']):
			
 
				                             detail['bbox'] = BBoxExtractor.inverse_rotate_coordinates(
			
 
				                                 detail['bbox'], rotate_angle, orig_image_size
			
 
				                             )
			
@@ -782,4 +499,3 @@ class CoordinateUtils:
 
				         
			
 
				         pattern = r'data-bbox="(\[[^\]]+\])"'
			
 
				         return re.sub(pattern, replace_bbox, html)
			
 
				-
			
--- a/ocr_utils/__init__.py
+++ b/ocr_utils/__init__.py
@@ -56,6 +56,8 @@ from .number_utils import (
 
				     parse_number,
			
 
				     normalize_text_number
			
 
				 )
			
 
				+# 坐标工具使用延迟导入，避免循环依赖
			
 
				+# from .coordinate_utils import CoordinateUtils  # 已移除，改为延迟导入
			
 
				 
			
 
				 __all__ = [
			
 
				     # PDF 工具
			
@@ -112,6 +114,8 @@ __all__ = [
 
				     # 数字解析工具
			
 
				     'parse_number',
			
 
				     'normalize_text_number',
			
 
				+    # 坐标工具
			
 
				+    'CoordinateUtils',
			
 
				 ]
			
 
				 
			
 
				 
			
@@ -132,6 +136,12 @@ def __getattr__(name: str):
 
				         """
			
 
				         from .bbox_utils import BBoxExtractor
			
 
				         return BBoxExtractor
			
 
				+    elif name == 'CoordinateUtils':
			
 
				+        """
			
 
				+        延迟导入 CoordinateUtils，只有在实际使用时才导入。
			
 
				+        """
			
 
				+        from .coordinate_utils import CoordinateUtils
			
 
				+        return CoordinateUtils
			
 
				     raise AttributeError(f"module '{__name__}' has no attribute '{name}'")
			
 
				 
			
 
				 __version__ = "1.0.0"
			
--- a/ocr_utils/coordinate_utils.py
+++ b/ocr_utils/coordinate_utils.py
@@ -0,0 +1,308 @@
 
				+"""
			
 
				+通用坐标转换工具模块
			
 
				+
			
 
				+提供通用的坐标计算和转换功能：
			
 
				+- 底层坐标计算（IoU、重叠比例）
			
 
				+- 多边形/bbox 格式转换
			
 
				+- 相对坐标 → 绝对坐标转换
			
 
				+- 图像裁剪
			
 
				+- 格式检测
			
 
				+
			
 
				+此模块从 universal_doc_parser 中提取，供多个模块共享使用。
			
 
				+"""
			
 
				+from typing import List, Tuple, Union, Any
			
 
				+import numpy as np
			
 
				+
			
 
				+# 导入 MinerU 组件（用于 IoU 计算优化）
			
 
				+try:
			
 
				+    from mineru.utils.boxbase import calculate_iou as mineru_calculate_iou
			
 
				+    from mineru.utils.boxbase import calculate_overlap_area_2_minbox_area_ratio
			
 
				+    MINERU_BOXBASE_AVAILABLE = True
			
 
				+except ImportError:
			
 
				+    MINERU_BOXBASE_AVAILABLE = False
			
 
				+    mineru_calculate_iou = None
			
 
				+    calculate_overlap_area_2_minbox_area_ratio = None
			
 
				+
			
 
				+
			
 
				+class CoordinateUtils:
			
 
				+    """通用坐标转换工具类"""
			
 
				+    
			
 
				+    # ==================== 底层坐标计算方法 ====================
			
 
				+    
			
 
				+    @staticmethod
			
 
				+    def calculate_iou(bbox1: List[float], bbox2: List[float]) -> float:
			
 
				+        """
			
 
				+        计算两个 bbox 的 IoU（交并比）
			
 
				+        
			
 
				+        Args:
			
 
				+            bbox1: 第一个 bbox [x1, y1, x2, y2]
			
 
				+            bbox2: 第二个 bbox [x1, y1, x2, y2]
			
 
				+            
			
 
				+        Returns:
			
 
				+            IoU 值
			
 
				+        """
			
 
				+        if MINERU_BOXBASE_AVAILABLE and mineru_calculate_iou is not None:
			
 
				+            return mineru_calculate_iou(bbox1, bbox2)
			
 
				+        
			
 
				+        # 备用实现
			
 
				+        x_left = max(bbox1[0], bbox2[0])
			
 
				+        y_top = max(bbox1[1], bbox2[1])
			
 
				+        x_right = min(bbox1[2], bbox2[2])
			
 
				+        y_bottom = min(bbox1[3], bbox2[3])
			
 
				+        
			
 
				+        if x_right < x_left or y_bottom < y_top:
			
 
				+            return 0.0
			
 
				+        
			
 
				+        intersection_area = (x_right - x_left) * (y_bottom - y_top)
			
 
				+        bbox1_area = (bbox1[2] - bbox1[0]) * (bbox1[3] - bbox1[1])
			
 
				+        bbox2_area = (bbox2[2] - bbox2[0]) * (bbox2[3] - bbox2[1])
			
 
				+        
			
 
				+        if bbox1_area == 0 or bbox2_area == 0:
			
 
				+            return 0.0
			
 
				+        
			
 
				+        return intersection_area / float(bbox1_area + bbox2_area - intersection_area)
			
 
				+    
			
 
				+    @staticmethod
			
 
				+    def calculate_overlap_ratio(bbox1: List[float], bbox2: List[float]) -> float:
			
 
				+        """
			
 
				+        计算重叠面积占小框面积的比例
			
 
				+        
			
 
				+        Args:
			
 
				+            bbox1: 第一个 bbox [x1, y1, x2, y2]
			
 
				+            bbox2: 第二个 bbox [x1, y1, x2, y2]
			
 
				+            
			
 
				+        Returns:
			
 
				+            重叠比例
			
 
				+        """
			
 
				+        if MINERU_BOXBASE_AVAILABLE and calculate_overlap_area_2_minbox_area_ratio is not None:
			
 
				+            return calculate_overlap_area_2_minbox_area_ratio(bbox1, bbox2)
			
 
				+        
			
 
				+        # 备用实现
			
 
				+        x_left = max(bbox1[0], bbox2[0])
			
 
				+        y_top = max(bbox1[1], bbox2[1])
			
 
				+        x_right = min(bbox1[2], bbox2[2])
			
 
				+        y_bottom = min(bbox1[3], bbox2[3])
			
 
				+        
			
 
				+        if x_right < x_left or y_bottom < y_top:
			
 
				+            return 0.0
			
 
				+        
			
 
				+        intersection_area = (x_right - x_left) * (y_bottom - y_top)
			
 
				+        area1 = (bbox1[2] - bbox1[0]) * (bbox1[3] - bbox1[1])
			
 
				+        area2 = (bbox2[2] - bbox2[0]) * (bbox2[3] - bbox2[1])
			
 
				+        min_area = min(area1, area2)
			
 
				+        
			
 
				+        if min_area == 0:
			
 
				+            return 0.0
			
 
				+        
			
 
				+        return intersection_area / min_area
			
 
				+    
			
 
				+    @staticmethod
			
 
				+    def calculate_overlap_in_bbox1_ratio(
			
 
				+        bbox1: List[float], 
			
 
				+        bbox2: List[float]
			
 
				+    ) -> float:
			
 
				+        """
			
 
				+        计算 bbox1 被 bbox2 覆盖的面积比例
			
 
				+        
			
 
				+        Args:
			
 
				+            bbox1: 第一个 bbox [x1, y1, x2, y2]
			
 
				+            bbox2: 第二个 bbox [x1, y1, x2, y2]
			
 
				+            
			
 
				+        Returns:
			
 
				+            bbox1 被覆盖的比例
			
 
				+        """
			
 
				+        x_left = max(bbox1[0], bbox2[0])
			
 
				+        y_top = max(bbox1[1], bbox2[1])
			
 
				+        x_right = min(bbox1[2], bbox2[2])
			
 
				+        y_bottom = min(bbox1[3], bbox2[3])
			
 
				+        
			
 
				+        if x_right < x_left or y_bottom < y_top:
			
 
				+            return 0.0
			
 
				+        
			
 
				+        intersection_area = (x_right - x_left) * (y_bottom - y_top)
			
 
				+        bbox1_area = (bbox1[2] - bbox1[0]) * (bbox1[3] - bbox1[1])
			
 
				+        
			
 
				+        if bbox1_area == 0:
			
 
				+            return 0.0
			
 
				+        
			
 
				+        return intersection_area / bbox1_area
			
 
				+    
			
 
				+    @staticmethod
			
 
				+    def poly_to_bbox(poly: Union[List, None]) -> List[float]:
			
 
				+        """
			
 
				+        将多边形坐标转换为 bbox 格式
			
 
				+        
			
 
				+        Args:
			
 
				+            poly: 多边形坐标，支持以下格式：
			
 
				+                - [[x1,y1], [x2,y1], [x2,y2], [x1,y2]] (4个点)
			
 
				+                - [x1, y1, x2, y1, x2, y2, x1, y2] (8个值)
			
 
				+                - [x1, y1, x2, y2] (4个值，已是bbox)
			
 
				+                
			
 
				+        Returns:
			
 
				+            bbox [x1, y1, x2, y2]
			
 
				+        """
			
 
				+        if not poly:
			
 
				+            return [0, 0, 0, 0]
			
 
				+        
			
 
				+        # 处理嵌套列表格式 [[x1,y1], [x2,y1], ...]
			
 
				+        if isinstance(poly[0], (list, tuple)):
			
 
				+            xs = [p[0] for p in poly]
			
 
				+            ys = [p[1] for p in poly]
			
 
				+            return [min(xs), min(ys), max(xs), max(ys)]
			
 
				+        
			
 
				+        # 处理平面列表格式
			
 
				+        if len(poly) == 4:
			
 
				+            # 已经是 bbox 格式
			
 
				+            return list(poly)
			
 
				+        elif len(poly) >= 8:
			
 
				+            # 8点格式：[x1, y1, x2, y1, x2, y2, x1, y2]
			
 
				+            xs = [poly[i] for i in range(0, len(poly), 2)]
			
 
				+            ys = [poly[i] for i in range(1, len(poly), 2)]
			
 
				+            return [min(xs), min(ys), max(xs), max(ys)]
			
 
				+        
			
 
				+        return [0, 0, 0, 0]
			
 
				+    
			
 
				+    @staticmethod
			
 
				+    def bbox_to_poly(bbox: List[float]) -> List[List[float]]:
			
 
				+        """
			
 
				+        将 bbox 转换为多边形坐标
			
 
				+        
			
 
				+        Args:
			
 
				+            bbox: [x1, y1, x2, y2]
			
 
				+            
			
 
				+        Returns:
			
 
				+            [[x1,y1], [x2,y1], [x2,y2], [x1,y2]]
			
 
				+        """
			
 
				+        if not bbox or len(bbox) < 4:
			
 
				+            return [[0, 0], [0, 0], [0, 0], [0, 0]]
			
 
				+        
			
 
				+        x1, y1, x2, y2 = bbox[:4]
			
 
				+        return [
			
 
				+            [float(x1), float(y1)],
			
 
				+            [float(x2), float(y1)],
			
 
				+            [float(x2), float(y2)],
			
 
				+            [float(x1), float(y2)]
			
 
				+        ]
			
 
				+    
			
 
				+    # ==================== 图像裁剪 ====================
			
 
				+    
			
 
				+    @staticmethod
			
 
				+    def crop_region(image: np.ndarray, bbox: List[float], padding: int = 0) -> np.ndarray:
			
 
				+        """
			
 
				+        裁剪图像区域
			
 
				+        
			
 
				+        Args:
			
 
				+            image: 原始图像
			
 
				+            bbox: 裁剪区域 [x1, y1, x2, y2]
			
 
				+            padding: 边缘padding（像素），可以为正数（扩展裁剪区域）或负数（收缩裁剪区域）
			
 
				+            
			
 
				+        Returns:
			
 
				+            裁剪后的图像
			
 
				+        """
			
 
				+        if len(bbox) < 4:
			
 
				+            return image
			
 
				+        
			
 
				+        h, w = image.shape[:2]
			
 
				+        
			
 
				+        # 解析padding（支持单个值或四个值）
			
 
				+        if isinstance(padding, (int, float)):
			
 
				+            pad_left = pad_right = pad_top = pad_bottom = int(padding)
			
 
				+        else:
			
 
				+            # 假设是长度为4的元组/列表 [left, top, right, bottom]
			
 
				+            if len(padding) >= 4:
			
 
				+                pad_left, pad_top, pad_right, pad_bottom = [int(p) for p in padding[:4]]
			
 
				+            else:
			
 
				+                pad_left = pad_top = pad_right = pad_bottom = 0
			
 
				+
			
 
				+        x1 = max(0 - pad_left, int(bbox[0]) - pad_left)
			
 
				+        y1 = max(0 - pad_top, int(bbox[1]) - pad_top)
			
 
				+        x2 = min(w + pad_right, int(bbox[2]) + pad_right)
			
 
				+        y2 = min(h + pad_bottom, int(bbox[3]) + pad_bottom)
			
 
				+
			
 
				+        # 确保坐标有效
			
 
				+        x1 = max(0, x1)
			
 
				+        y1 = max(0, y1)
			
 
				+        x2 = min(w, x2)
			
 
				+        y2 = min(h, y2)
			
 
				+
			
 
				+        # 检查是否有效区域
			
 
				+        if x2 <= x1 or y2 <= y1:
			
 
				+            return image
			
 
				+        
			
 
				+        return image[y1:y2, x1:x2]
			
 
				+    
			
 
				+    @staticmethod
			
 
				+    def bbox_overlap(bbox1: List[float], bbox2: List[float]) -> bool:
			
 
				+        """
			
 
				+        检查两个 bbox 是否重叠
			
 
				+        
			
 
				+        Args:
			
 
				+            bbox1: 第一个 bbox [x1, y1, x2, y2]
			
 
				+            bbox2: 第二个 bbox [x1, y1, x2, y2]
			
 
				+            
			
 
				+        Returns:
			
 
				+            是否重叠
			
 
				+        """
			
 
				+        if len(bbox1) < 4 or len(bbox2) < 4:
			
 
				+            return False
			
 
				+        
			
 
				+        x1_1, y1_1, x2_1, y2_1 = bbox1[:4]
			
 
				+        x1_2, y1_2, x2_2, y2_2 = bbox2[:4]
			
 
				+        
			
 
				+        if x2_1 < x1_2 or x2_2 < x1_1:
			
 
				+            return False
			
 
				+        if y2_1 < y1_2 or y2_2 < y1_1:
			
 
				+            return False
			
 
				+        
			
 
				+        return True
			
 
				+    
			
 
				+    @staticmethod
			
 
				+    def convert_to_absolute_coords(
			
 
				+        relative_bbox: List, 
			
 
				+        region_bbox: List[float]
			
 
				+    ) -> List:
			
 
				+        """
			
 
				+        将相对坐标转换为绝对坐标
			
 
				+        
			
 
				+        Args:
			
 
				+            relative_bbox: 相对坐标
			
 
				+            region_bbox: 区域的绝对坐标 [x1, y1, x2, y2]
			
 
				+            
			
 
				+        Returns:
			
 
				+            绝对坐标
			
 
				+        """
			
 
				+        if not relative_bbox or len(region_bbox) < 4:
			
 
				+            return relative_bbox
			
 
				+        
			
 
				+        bx1, by1 = region_bbox[0], region_bbox[1]
			
 
				+        
			
 
				+        # 处理4点坐标格式 [[x1,y1], [x2,y2], [x3,y3], [x4,y4]]
			
 
				+        if isinstance(relative_bbox[0], (list, tuple)):
			
 
				+            return [
			
 
				+                [p[0] + bx1, p[1] + by1] for p in relative_bbox
			
 
				+            ]
			
 
				+        
			
 
				+        # 处理4值坐标格式 [x1, y1, x2, y2]
			
 
				+        if len(relative_bbox) >= 4:
			
 
				+            return [
			
 
				+                relative_bbox[0] + bx1,
			
 
				+                relative_bbox[1] + by1,
			
 
				+                relative_bbox[2] + bx1,
			
 
				+                relative_bbox[3] + by1
			
 
				+            ]
			
 
				+        
			
 
				+        return relative_bbox
			
 
				+    
			
 
				+    @staticmethod
			
 
				+    def is_poly_format(bbox: Any) -> bool:
			
 
				+        """
			
 
				+        检测 bbox 是否为四点多边形格式
			
 
				+        
			
 
				+        四点格式: [[x1,y1], [x2,y2], [x3,y3], [x4,y4]]
			
 
				+        矩形格式: [x_min, y_min, x_max, y_max]
			
 
				+        """
			
 
				+        if not bbox or not isinstance(bbox, list):
			
 
				+            return False
			
 
				+        if len(bbox) != 4:
			
 
				+            return False
			
 
				+        return isinstance(bbox[0], (list, tuple))