Przeglądaj źródła

feat: Introduce wired table parsing adapter with grid recovery, OCR formatting, and enhanced region cropping.

zhch158_admin 3 dni temu
rodzic
commit
3b3c3c9c5a

+ 31 - 11
ocr_tools/universal_doc_parser/core/coordinate_utils.py

@@ -197,28 +197,48 @@ class CoordinateUtils:
     # ==================== 图像裁剪 ====================
     
     @staticmethod
-    def crop_region(image: np.ndarray, bbox: List[float]) -> np.ndarray:
+    def crop_region(image: np.ndarray, bbox: List[float], padding: int = 0) -> np.ndarray:
         """
         裁剪图像区域
-        
+
         Args:
             image: 原始图像
             bbox: 裁剪区域 [x1, y1, x2, y2]
-            
+            padding: 边缘padding(像素),可以为正数(扩展裁剪区域)或负数(收缩裁剪区域)
+
         Returns:
             裁剪后的图像
         """
         if len(bbox) < 4:
             return image
-        
-        x1, y1, x2, y2 = map(int, bbox[:4])
+
         h, w = image.shape[:2]
-        
-        x1 = max(0, min(x1, w))
-        y1 = max(0, min(y1, h))
-        x2 = max(x1, min(x2, w))
-        y2 = max(y1, min(y2, h))
-        
+
+        # 解析padding(支持单个值或四个值)
+        if isinstance(padding, (int, float)):
+            pad_left = pad_right = pad_top = pad_bottom = int(padding)
+        else:
+            # 假设是长度为4的元组/列表 [left, top, right, bottom]
+            if len(padding) >= 4:
+                pad_left, pad_top, pad_right, pad_bottom = [int(p) for p in padding[:4]]
+            else:
+                pad_left = pad_top = pad_right = pad_bottom = 0
+
+        x1 = max(0 - pad_left, int(bbox[0]) - pad_left)
+        y1 = max(0 - pad_top, int(bbox[1]) - pad_top)
+        x2 = min(w + pad_right, int(bbox[2]) + pad_right)
+        y2 = min(h + pad_bottom, int(bbox[3]) + pad_bottom)
+
+        # 确保坐标有效
+        x1 = max(0, x1)
+        y1 = max(0, y1)
+        x2 = min(w, x2)
+        y2 = min(h, y2)
+
+        # 检查是否有效区域
+        if x2 <= x1 or y2 <= y1:
+            return image
+
         return image[y1:y2, x1:x2]
     
     @staticmethod