Переглянути джерело

fix: Improve coordinate transformation accuracy in WiredTableVisualizer to reduce cumulative errors and enhance debugging with detailed logging of cell coordinates during visualization.

zhch158_admin 2 днів тому
батько
коміт
1bb438fba3

+ 34 - 5
ocr_tools/universal_doc_parser/models/adapters/wired_table/visualization.py

@@ -59,11 +59,20 @@ class WiredTableVisualizer:
         Args:
             hpred_up: 横线预测mask(上采样后)
             vpred_up: 竖线预测mask(上采样后)
-            bboxes: 单元格bbox列表
+            bboxes: 单元格bbox列表(原图坐标)
             upscale: 上采样比例
             output_path: 输出路径
         """
         h, w = hpred_up.shape[:2]
+        
+        # 调试:验证上采样图像尺寸
+        expected_h = int(bboxes[-1][3] * upscale + 0.5) if bboxes else 0
+        expected_w = int(bboxes[-1][2] * upscale + 0.5) if bboxes else 0
+        logger.debug(
+            f"上采样图像尺寸: 实际=[{h}, {w}], "
+            f"预期(基于最大bbox)≈[{expected_h}, {expected_w}], "
+            f"upscale={upscale:.3f}"
+        )
 
         # 与连通域提取相同的预处理,以获得直观的网格线背景
         _, h_bin = cv2.threshold(hpred_up, 127, 255, cv2.THRESH_BINARY)
@@ -78,12 +87,32 @@ class WiredTableVisualizer:
         vis[grid_mask > 0] = [0, 0, 255]  # 红色线条
 
         # 在上采样坐标系上绘制单元格框
-        for box in bboxes:
-            x1, y1, x2, y2 = [int(c * upscale) for c in box]
-            cv2.rectangle(vis, (x1, y1), (x2, y2), (0, 255, 0), 2)
+        # 修复:使用更精确的坐标转换,避免累积误差
+        for idx, box in enumerate(bboxes):
+            # 使用四舍五入而不是直接截断,提高精度
+            x1 = int(box[0] * upscale + 0.5)
+            y1 = int(box[1] * upscale + 0.5)
+            x2 = int(box[2] * upscale + 0.5)
+            y2 = int(box[3] * upscale + 0.5)
+            
+            # 确保坐标在图像范围内
+            x1 = max(0, min(x1, w - 1))
+            y1 = max(0, min(y1, h - 1))
+            x2 = max(0, min(x2, w - 1))
+            y2 = max(0, min(y2, h - 1))
+            
+            if x2 > x1 and y2 > y1:
+                cv2.rectangle(vis, (x1, y1), (x2, y2), (0, 255, 0), 2)
+            
+            # 调试日志:输出前几个和后几个单元格的坐标转换信息
+            if idx < 3 or idx >= len(bboxes) - 3:
+                logger.debug(
+                    f"单元格 {idx}: 原图坐标 [{box[0]:.1f}, {box[1]:.1f}, {box[2]:.1f}, {box[3]:.1f}] "
+                    f"-> 上采样坐标 [{x1}, {y1}, {x2}, {y2}] (upscale={upscale:.3f})"
+                )
 
         cv2.imwrite(output_path, vis)
-        logger.info(f"连通域可视化: {output_path}")
+        logger.info(f"连通域可视化: {output_path} (共 {len(bboxes)} 个单元格)")
     
     @staticmethod
     def visualize_grid_structure(