|
@@ -71,8 +71,7 @@ def test_fusion_engine(detector):
|
|
|
'iou_merge_threshold': 0.7,
|
|
'iou_merge_threshold': 0.7,
|
|
|
'iou_nms_threshold': 0.5,
|
|
'iou_nms_threshold': 0.5,
|
|
|
'rtdetr_conf_threshold': 0.5,
|
|
'rtdetr_conf_threshold': 0.5,
|
|
|
- 'enable_ocr_compensation': True,
|
|
|
|
|
- 'skip_rtdetr_for_txt_pdf': True
|
|
|
|
|
|
|
+ 'enable_ocr_compensation': True
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
# 初始化
|
|
# 初始化
|
|
@@ -93,33 +92,30 @@ def test_fusion_engine(detector):
|
|
|
{'bbox': [20, 70, 80, 90], 'text': 'Cell 2'}
|
|
{'bbox': [20, 70, 80, 90], 'text': 'Cell 2'}
|
|
|
]
|
|
]
|
|
|
|
|
|
|
|
- # Test 2.1: 文字PDF模式(应跳过RT-DETR)
|
|
|
|
|
- print("\n📄 Test 2.1: Text PDF mode (should skip RT-DETR)")
|
|
|
|
|
|
|
+ # Test 2.1: 文字PDF模式(现在也使用RT-DETR融合)
|
|
|
|
|
+ print("\n📄 Test 2.1: Text PDF mode (now uses RT-DETR fusion)")
|
|
|
fused_cells, stats = engine.fuse(
|
|
fused_cells, stats = engine.fuse(
|
|
|
table_image=table_image,
|
|
table_image=table_image,
|
|
|
unet_cells=unet_cells,
|
|
unet_cells=unet_cells,
|
|
|
ocr_boxes=ocr_boxes,
|
|
ocr_boxes=ocr_boxes,
|
|
|
- pdf_type='txt',
|
|
|
|
|
- upscale=1.0
|
|
|
|
|
|
|
+ pdf_type='txt'
|
|
|
)
|
|
)
|
|
|
print(f" Use RT-DETR: {stats['use_rtdetr']}")
|
|
print(f" Use RT-DETR: {stats['use_rtdetr']}")
|
|
|
print(f" Fused cells: {len(fused_cells)}")
|
|
print(f" Fused cells: {len(fused_cells)}")
|
|
|
- assert not stats['use_rtdetr'], "❌ Should skip RT-DETR for text PDF"
|
|
|
|
|
- assert len(fused_cells) == len(unet_cells), "❌ Should keep UNet cells only"
|
|
|
|
|
- print(" ✅ Correctly skipped RT-DETR for text PDF")
|
|
|
|
|
|
|
+ assert stats['use_rtdetr'], "✔️ Now uses RT-DETR for all PDF types"
|
|
|
|
|
+ print(" ✅ Correctly enabled RT-DETR for text PDF")
|
|
|
|
|
|
|
|
- # Test 2.2: 扫描PDF模式(应启用RT-DETR,但因为是假图片可能失败)
|
|
|
|
|
- print("\n🔍 Test 2.2: Scan PDF mode (should enable RT-DETR)")
|
|
|
|
|
|
|
+ # Test 2.2: 扫描PDF模式
|
|
|
|
|
+ print("\n🔍 Test 2.2: Scan PDF mode")
|
|
|
fused_cells, stats = engine.fuse(
|
|
fused_cells, stats = engine.fuse(
|
|
|
table_image=table_image,
|
|
table_image=table_image,
|
|
|
unet_cells=unet_cells,
|
|
unet_cells=unet_cells,
|
|
|
ocr_boxes=ocr_boxes,
|
|
ocr_boxes=ocr_boxes,
|
|
|
- pdf_type='ocr',
|
|
|
|
|
- upscale=1.0
|
|
|
|
|
|
|
+ pdf_type='ocr'
|
|
|
)
|
|
)
|
|
|
print(f" Use RT-DETR: {stats['use_rtdetr']}")
|
|
print(f" Use RT-DETR: {stats['use_rtdetr']}")
|
|
|
print(f" Stats: {stats}")
|
|
print(f" Stats: {stats}")
|
|
|
- print(" ✅ Fusion completed (RT-DETR may return 0 cells on blank image)")
|
|
|
|
|
|
|
+ print(" ✅ Fusion completed")
|
|
|
|
|
|
|
|
return engine
|
|
return engine
|
|
|
|
|
|
|
@@ -130,24 +126,25 @@ def test_adaptive_strategy():
|
|
|
print("Test 3: 自适应策略测试")
|
|
print("Test 3: 自适应策略测试")
|
|
|
print("=" * 60)
|
|
print("=" * 60)
|
|
|
|
|
|
|
|
- engine = CellFusionEngine(rtdetr_detector=None, config={'skip_rtdetr_for_txt_pdf': True})
|
|
|
|
|
|
|
+ engine = CellFusionEngine(rtdetr_detector=None, config={})
|
|
|
|
|
|
|
|
- # Test 3.1: 文字PDF + 正常单元格数 → 跳过
|
|
|
|
|
|
|
+ # Test 3.1: 文字PDF + 检测器未初始化 → 跳过
|
|
|
should_use = engine.should_use_rtdetr('txt', unet_cell_count=10, table_size=(500, 500))
|
|
should_use = engine.should_use_rtdetr('txt', unet_cell_count=10, table_size=(500, 500))
|
|
|
- print(f"📄 Text PDF, 10 cells: use_rtdetr={should_use}")
|
|
|
|
|
- assert not should_use, "❌ Should skip RT-DETR"
|
|
|
|
|
|
|
+ print(f"📄 Text PDF, 10 cells, no detector: use_rtdetr={should_use}")
|
|
|
|
|
+ assert not should_use, "❌ Should skip (detector not available)"
|
|
|
print(" ✅ Correct")
|
|
print(" ✅ Correct")
|
|
|
|
|
|
|
|
- # Test 3.2: 扫描PDF + 正常单元格数 → 跳过(因为检测器未初始化)
|
|
|
|
|
|
|
+ # Test 3.2: 扫描PDF + 检测器未初始化 → 跳过
|
|
|
should_use = engine.should_use_rtdetr('ocr', unet_cell_count=10, table_size=(500, 500))
|
|
should_use = engine.should_use_rtdetr('ocr', unet_cell_count=10, table_size=(500, 500))
|
|
|
print(f"🔍 Scan PDF, 10 cells, no detector: use_rtdetr={should_use}")
|
|
print(f"🔍 Scan PDF, 10 cells, no detector: use_rtdetr={should_use}")
|
|
|
assert not should_use, "❌ Should skip (detector not available)"
|
|
assert not should_use, "❌ Should skip (detector not available)"
|
|
|
print(" ✅ Correct")
|
|
print(" ✅ Correct")
|
|
|
|
|
|
|
|
- # Test 3.3: UNet为空 → 强制启用(但检测器未初始化,仍跳过)
|
|
|
|
|
|
|
+ # Test 3.3: UNet为空 + 检测器未初始化 → 仍跳过
|
|
|
should_use = engine.should_use_rtdetr('ocr', unet_cell_count=0, table_size=(500, 500))
|
|
should_use = engine.should_use_rtdetr('ocr', unet_cell_count=0, table_size=(500, 500))
|
|
|
print(f"🚨 Scan PDF, 0 cells, no detector: use_rtdetr={should_use}")
|
|
print(f"🚨 Scan PDF, 0 cells, no detector: use_rtdetr={should_use}")
|
|
|
- print(" ⚠️ Would force enable if detector available")
|
|
|
|
|
|
|
+ assert not should_use, "❌ Should skip (detector not available)"
|
|
|
|
|
+ print(" ✅ Correct (would force enable if detector available)")
|
|
|
|
|
|
|
|
print("\n✅ All adaptive strategy tests passed")
|
|
print("\n✅ All adaptive strategy tests passed")
|
|
|
|
|
|