5 months ago · 7b7b368f07
--- a/ocr_tools/universal_doc_parser/models/adapters/wired_table/cell_fusion.py
+++ b/ocr_tools/universal_doc_parser/models/adapters/wired_table/cell_fusion.py
@@ -185,11 +185,11 @@ class CellFusionEngine:
 
				         fused_cells = self._nms_filter(fused_cells, self.iou_nms_threshold)
			
 
				         
			
 
				         # Phase 4: OCR 补偿（可选）
			
 
				-        if self.enable_ocr_compensation and ocr_boxes:
			
 
				-            fused_cells, cell_labels, ocr_comp_count = self._compensate_with_ocr(
			
 
				-                fused_cells, cell_labels, ocr_boxes, (w, h)
			
 
				-            )
			
 
				-            fusion_stats['ocr_compensated_count'] = ocr_comp_count
			
 
				+        # if self.enable_ocr_compensation and ocr_boxes:
			
 
				+        #     fused_cells, cell_labels, ocr_comp_count = self._compensate_with_ocr(
			
 
				+        #         fused_cells, cell_labels, ocr_boxes, (w, h)
			
 
				+        #     )
			
 
				+        #     fusion_stats['ocr_compensated_count'] = ocr_comp_count
			
 
				         
			
 
				         fusion_stats['fused_count'] = len(fused_cells)
			
 
				         
			
@@ -306,7 +306,14 @@ class CellFusionEngine:
 
				                     # 如果覆盖率>40%，说明这是一个真实的合并单元格
			
 
				                     # 降低阈值从0.5到0.4，因为合并单元格可能包含很多空白区域
			
 
				                     if coverage > 0.4:
			
 
				-                        fused_cells.append(rtdetr_cell)
			
 
				+                        # 认定为合并单元格，取bounding与RT-DETR的最大范围
			
 
				+                        fused_cell = [
			
 
				+                            min(bounding_x1, rtdetr_cell[0]),
			
 
				+                            min(bounding_y1, rtdetr_cell[1]),
			
 
				+                            max(bounding_x2, rtdetr_cell[2]),
			
 
				+                            max(bounding_y2, rtdetr_cell[3])
			
 
				+                        ]
			
 
				+                        fused_cells.append(fused_cell)
			
 
				                         cell_labels.append('merged_span')  # 标记为合并单元格
			
 
				                         rtdetr_matched[rt_idx] = True
			
 
				                         # 标记所有被包含的UNet单元格