Sfoglia il codice sorgente

feat(grid_recovery): 增加 OCR 文本容差参数,优化网格结构恢复算法

zhch158_admin 1 settimana fa
parent
commit
93977737f5

+ 8 - 3
ocr_tools/universal_doc_parser/models/adapters/wired_table/grid_recovery.py

@@ -498,7 +498,10 @@ class GridRecovery:
     
     
     @staticmethod
-    def recover_grid_structure(bboxes: List[List[float]]) -> List[Dict]:
+    def recover_grid_structure(
+        bboxes: List[List[float]],
+        ocr_text_pixel_tolerance: float = 0.0
+    ) -> List[Dict]:
         """
         从散乱的单元格 bbox 恢复表格的行列结构 (row, col, rowspan, colspan)
         重构版:基于投影网格线 (Projected Grid Lines) 的算法
@@ -506,6 +509,7 @@ class GridRecovery:
         
         Args:
             bboxes: 单元格bbox列表
+            ocr_text_pixel_tolerance: OCR文本容差(原图坐标系)
             
         Returns:
             结构化单元格列表,包含 row, col, rowspan, colspan
@@ -519,14 +523,15 @@ class GridRecovery:
             y_coords.append(b[1])
             y_coords.append(b[3])
         
-        row_dividers= GridRecovery.find_grid_lines(y_coords, tolerance=5, min_support=1)
+        tolerance = max(5.0, min(float(ocr_text_pixel_tolerance), 20.0))
+        row_dividers = GridRecovery.find_grid_lines(y_coords, tolerance=tolerance, min_support=1)
         
         # 2. 识别列分割线 (X轴)
         x_coords = []
         for b in bboxes:
             x_coords.append(b[0])
             x_coords.append(b[2])
-        col_dividers= GridRecovery.find_grid_lines(x_coords, tolerance=5, min_support=1)
+        col_dividers = GridRecovery.find_grid_lines(x_coords, tolerance=tolerance, min_support=1)
         
         # 3. 构建网格结构
         structured_cells = []