Jelajahi Sumber

feat(mineru_wired_table): 添加 OCR 文本容差计算,优化网格结构恢复

zhch158_admin 1 Minggu lalu
induk
melakukan
73f20ff9e2

+ 18 - 1
ocr_tools/universal_doc_parser/models/adapters/mineru_wired_table.py

@@ -378,6 +378,19 @@ class MinerUWiredTableRecognizer:
 
             # Step 2: 使用连通域法提取单元格 (替换了原来的投影法)
             debug_prefix = f"{dbg.prefix}_grid" if dbg.prefix else "grid"
+
+            # 计算 OCR 文本容差:取最小行高的 50%,无有效 OCR 时回退为 0
+            ocr_heights = []
+            for ocr in ocr_boxes or []:
+                bbox = ocr.get("bbox", [])
+                if len(bbox) >= 4:
+                    height = bbox[3] - bbox[1]
+                    if height > 0:
+                        ocr_heights.append(height)
+            if ocr_heights:
+                ocr_text_pixel_tolerance = min(ocr_heights) * 0.5
+            else:
+                ocr_text_pixel_tolerance = 10.0
             
             # 传入原图的实际尺寸和裁剪padding
             bboxes = self.grid_recovery.compute_cells_from_lines(
@@ -402,6 +415,7 @@ class MinerUWiredTableRecognizer:
                         table_image=table_image,
                         unet_cells=bboxes,
                         ocr_boxes=ocr_boxes or [],
+                        ocr_text_pixel_tolerance=ocr_text_pixel_tolerance,
                         pdf_type=pdf_type,
                         debug_dir=debug_dir,
                         debug_prefix=debug_prefix
@@ -425,7 +439,10 @@ class MinerUWiredTableRecognizer:
 
             # Step 3: 重建网格结构 (计算 row, col, rowspan, colspan)
             # OCR补偿已在Step 2中完成,这里仅做网格重建
-            merged_cells = self.grid_recovery.recover_grid_structure(bboxes)
+            merged_cells = self.grid_recovery.recover_grid_structure(
+                bboxes,
+                ocr_text_pixel_tolerance=ocr_text_pixel_tolerance
+            )
             
             # Step 3.5: 可视化逻辑结构 (新增)
             if self.debug_utils.debug_is_on("save_grid_structure", dbg):