| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128 |
- """
- 布局检测器测试脚本
- """
- import sys
- from pathlib import Path
- import cv2
- import random
- # 添加项目根目录到路径
- project_root = Path(__file__).parents[1]
- sys.path.insert(0, str(project_root))
- from models.adapters import PaddleLayoutDetector
- def test_layout_detector():
- """测试 PaddleX 布局检测器"""
-
- # 测试配置
- config = {
- 'model_dir': '/Users/zhch158/workspace/repository.git/PaddleX/zhch/unified_pytorch_models/Layout/RT-DETR-H_layout_17cls.onnx',
- 'device': 'cpu',
- 'conf': 0.25
- }
-
- # 初始化检测器
- print("🔧 Initializing detector...")
- detector = PaddleLayoutDetector(config)
- detector.initialize()
-
- # 读取测试图像
- img_path = "/Users/zhch158/workspace/data/流水分析/B用户_扫描流水/PaddleOCR_VL_Results/B用户_扫描流水/B用户_扫描流水_page_001.png"
- print(f"\n📖 Loading image: {img_path}")
- img = cv2.imread(img_path)
-
- if img is None:
- print(f"❌ Failed to load image: {img_path}")
- return
-
- print(f" Image shape: {img.shape}")
-
- # 执行检测
- print("\n🔍 Detecting layout...")
- results = detector.detect(img)
-
- print(f"\n✅ 检测到 {len(results)} 个区域:")
- for i, res in enumerate(results, 1):
- print(f" [{i}] {res['category']}: "
- f"score={res['confidence']:.3f}, "
- f"bbox={res['bbox']}, "
- f"size={res['raw']['width']}x{res['raw']['height']}, "
- f"original={res['raw']['original_category_name']}")
-
- # 统计各类别
- category_counts = {}
- for res in results:
- cat = res['category']
- category_counts[cat] = category_counts.get(cat, 0) + 1
-
- print(f"\n📊 类别统计 (MinerU格式):")
- for cat, count in sorted(category_counts.items()):
- print(f" - {cat}: {count}")
-
- # 可视化结果
- if len(results) > 0:
- print("\n🎨 Generating visualization...")
-
- # 为每个类别分配颜色
- category_colors = {}
- for res in results:
- cat = res['category']
- if cat not in category_colors:
- category_colors[cat] = (
- random.randint(50, 255),
- random.randint(50, 255),
- random.randint(50, 255)
- )
-
- # 绘制检测框
- vis_img = img.copy()
- for res in results:
- bbox = res['bbox']
- x1, y1, x2, y2 = bbox
- cat = res['category']
- color = category_colors[cat]
-
- # 绘制矩形
- cv2.rectangle(vis_img, (x1, y1), (x2, y2), color, 2)
-
- # 绘制标签
- label = f"{cat} {res['confidence']:.2f}"
- label_size, _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
-
- # 标签背景
- cv2.rectangle(
- vis_img,
- (x1, y1 - label_size[1] - 4),
- (x1 + label_size[0], y1),
- color,
- -1
- )
-
- # 标签文字
- cv2.putText(
- vis_img,
- label,
- (x1, y1 - 2),
- cv2.FONT_HERSHEY_SIMPLEX,
- 0.5,
- (255, 255, 255),
- 1
- )
-
- # 保存可视化结果
- output_dir = Path(__file__).parent / "output"
- output_dir.mkdir(exist_ok=True)
- output_path = output_dir / f"{Path(img_path).stem}_layout_vis.jpg"
- cv2.imwrite(str(output_path), vis_img)
- print(f"💾 Visualization saved to: {output_path}")
-
- # 清理
- detector.cleanup()
- print("\n✅ 测试完成!")
- if __name__ == "__main__":
- test_layout_detector()
|