Просмотр исходного кода

feat: 添加 PDF 旋转验证测试程序,支持不同角度的文本坐标验证与渲染一致性

zhch158_admin 3 дней назад
Родитель
Сommit
e8a1f6ece8
1 измененных файлов с 326 добавлено и 0 удалено
  1. 326 0
      ocr_tools/universal_doc_parser/tests/test_pdf_rotation.py

+ 326 - 0
ocr_tools/universal_doc_parser/tests/test_pdf_rotation.py

@@ -0,0 +1,326 @@
+"""
+PDF Rotation 验证测试程序
+
+测试不同rotation角度(0/90/180/270)的PDF:
+1. 文本坐标是否正确转换
+2. 文本坐标是否与渲染图像对齐
+3. 两种渲染引擎(fitz/pypdfium2)的一致性
+"""
+import sys
+from pathlib import Path
+import cv2
+import numpy as np
+from loguru import logger
+import json
+
+# 添加路径
+current_file = Path(__file__).resolve()
+ocr_platform_root = current_file.parents[3]  # tests -> universal_doc_parser -> ocr_tools -> ocr_platform
+sys.path.insert(0, str(ocr_platform_root))
+
+from ocr_utils.pdf_utils import PDFUtils
+
+
+def create_test_pdf_with_rotation(output_path: str, rotation: int):
+    """
+    创建指定rotation的测试PDF
+    
+    Args:
+        output_path: 输出PDF路径
+        rotation: 旋转角度 (0/90/180/270)
+    """
+    try:
+        import fitz
+    except ImportError:
+        logger.error("PyMuPDF not installed, cannot create test PDF")
+        return None
+    
+    # 创建PDF(A4尺寸:595x842点)
+    doc = fitz.open()
+    page = doc.new_page(width=595, height=842)
+    
+    # 设置rotation
+    page.set_rotation(rotation)
+    
+    # 添加测试文本(使用不同位置和大小)
+    test_texts = [
+        ("Top Left", 50, 100, 24),
+        ("Top Center", 250, 100, 24),
+        ("Top Right", 450, 100, 24),
+        ("Middle Left", 50, 400, 20),
+        ("CENTER TEXT", 220, 420, 28),
+        ("Middle Right", 450, 400, 20),
+        ("Bottom Left", 50, 750, 18),
+        ("Bottom Center", 230, 750, 18),
+        ("Bottom Right", 450, 750, 18),
+    ]
+    
+    # 添加rotation标识
+    page.insert_text((250, 50), f"Rotation: {rotation}°", fontsize=32, color=(1, 0, 0))
+    
+    for text, x, y, fontsize in test_texts:
+        page.insert_text((x, y), text, fontsize=fontsize)
+    
+    # 绘制边框(帮助定位)
+    rect = page.rect
+    page.draw_rect(fitz.Rect(10, 10, rect.width-10, rect.height-10), color=(0, 0, 1), width=2)
+    
+    # 保存
+    doc.save(output_path)
+    doc.close()
+    
+    logger.info(f"✅ Created test PDF with rotation={rotation}°: {output_path}")
+    return output_path
+
+
+def test_pdf_rotation(pdf_path: str, renderer: str = "fitz", dpi: int = 200):
+    """
+    测试PDF rotation处理
+    
+    Args:
+        pdf_path: PDF文件路径
+        renderer: 渲染引擎 ("fitz" or "pypdfium2")
+        dpi: 渲染DPI
+        
+    Returns:
+        (all_in_bounds, text_blocks, rotation, image_size)
+    """
+    logger.info(f"\n{'='*60}")
+    logger.info(f"Testing: {Path(pdf_path).name}")
+    logger.info(f"Renderer: {renderer}, DPI: {dpi}")
+    logger.info(f"{'='*60}\n")
+    
+    # 1. 加载PDF并渲染图像
+    with open(pdf_path, 'rb') as f:
+        pdf_bytes = f.read()
+    
+    images_list, pdf_doc = PDFUtils.load_images_from_pdf_unified(
+        pdf_bytes, dpi=dpi, renderer=renderer
+    )
+    
+    if not images_list:
+        logger.error("Failed to load PDF")
+        return False, [], 0, (0, 0)
+    
+    # 2. 获取第一页
+    page_idx = 0
+    image_dict = images_list[page_idx]
+    pil_image = image_dict['img_pil']
+    scale = image_dict.get('scale', dpi / 72)
+    
+    # 转为numpy
+    image = np.array(pil_image)
+    h, w = image.shape[:2]
+    
+    logger.info(f"📐 Rendered image size: {w}x{h} pixels, scale: {scale:.3f}")
+    
+    # 3. 提取文本块
+    text_blocks, rotation = PDFUtils.extract_all_text_blocks(
+        pdf_doc, page_idx, scale
+    )
+    
+    logger.info(f"📋 PDF rotation: {rotation}°")
+    logger.info(f"📝 Extracted {len(text_blocks)} text blocks\n")
+    
+    # 4. 验证每个文本块
+    out_of_bounds_count = 0
+    
+    for idx, block in enumerate(text_blocks):
+        text = block['text']
+        bbox = block['bbox']
+        
+        # 检查bbox是否在图像范围内
+        x1, y1, x2, y2 = bbox
+        
+        in_bounds = (0 <= x1 < w and 0 <= y1 < h and 0 <= x2 <= w and 0 <= y2 <= h)
+        
+        if not in_bounds:
+            out_of_bounds_count += 1
+        
+        status = "✅" if in_bounds else "❌"
+        logger.info(f"  {status} Block {idx}: '{text[:30]}' bbox=[{int(x1)},{int(y1)},{int(x2)},{int(y2)}]")
+    
+    # 5. 可视化:在图像上绘制文本框
+    vis_image = image.copy()
+    
+    for idx, block in enumerate(text_blocks):
+        bbox = block['bbox']
+        x1, y1, x2, y2 = [int(v) for v in bbox]
+        
+        # 裁剪到图像范围内
+        x1 = max(0, min(x1, w-1))
+        y1 = max(0, min(y1, h-1))
+        x2 = max(0, min(x2, w))
+        y2 = max(0, min(y2, h))
+        
+        in_bounds = (
+            0 <= bbox[0] < w and 0 <= bbox[1] < h and
+            0 <= bbox[2] <= w and 0 <= bbox[3] <= h
+        )
+        
+        # 绘制矩形
+        color = (0, 255, 0) if in_bounds else (0, 0, 255)  # 绿色=正常,红色=越界
+        cv2.rectangle(vis_image, (x1, y1), (x2, y2), color, 2)
+        
+        # 添加文本标签
+        cv2.putText(vis_image, f"{idx}", (x1, max(15, y1-5)), 
+                   cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
+    
+    # 6. 保存可视化结果
+    output_dir = Path(__file__).parent / "output" / "rotation_test"
+    output_dir.mkdir(parents=True, exist_ok=True)
+    
+    pdf_name = Path(pdf_path).stem
+    output_path = output_dir / f"{pdf_name}_{renderer}_vis.jpg"
+    cv2.imwrite(str(output_path), cv2.cvtColor(vis_image, cv2.COLOR_RGB2BGR))
+    logger.info(f"\n💾 Saved visualization: {output_path}")
+    
+    # 7. 保存JSON结果
+    json_path = output_dir / f"{pdf_name}_{renderer}_result.json"
+    result_data = {
+        'pdf_path': str(pdf_path),
+        'renderer': renderer,
+        'dpi': dpi,
+        'rotation': rotation,
+        'image_size': [w, h],
+        'scale': scale,
+        'text_blocks_count': len(text_blocks),
+        'out_of_bounds_count': out_of_bounds_count,
+        'text_blocks': [
+            {
+                'text': block['text'],
+                'bbox': [float(v) for v in block['bbox']],
+                'in_bounds': (
+                    0 <= block['bbox'][0] < w and
+                    0 <= block['bbox'][1] < h and
+                    0 <= block['bbox'][2] <= w and
+                    0 <= block['bbox'][3] <= h
+                )
+            }
+            for block in text_blocks
+        ]
+    }
+    
+    with open(json_path, 'w', encoding='utf-8') as f:
+        json.dump(result_data, f, ensure_ascii=False, indent=2)
+    logger.info(f"💾 Saved JSON: {json_path}\n")
+    
+    # 8. 验证结果
+    all_in_bounds = out_of_bounds_count == 0
+    
+    if all_in_bounds:
+        logger.info("✅ All text bboxes are within image bounds - PASS\n")
+    else:
+        logger.warning(f"❌ {out_of_bounds_count} text bboxes are outside image bounds - FAIL\n")
+    
+    # 关闭PDF
+    pdf_doc.close()
+    
+    return all_in_bounds, text_blocks, rotation, (w, h)
+
+
+def pdf_rotation_to_image_rotation(pdf_rotation: int) -> int:
+    """将PDF旋转角度(顺时针)转换为图片旋转角度(逆时针)
+    
+    PDF规范使用顺时针旋转定义,图片处理通常使用逆时针旋转定义。
+    
+    Args:
+        pdf_rotation: PDF旋转角度 (0/90/180/270,顺时针)
+        
+    Returns:
+        图片旋转角度 (0/90/180/270,逆时针)
+    """
+    mapping = {0: 0, 90: 270, 180: 180, 270: 90}
+    return mapping.get(pdf_rotation, 0)
+
+
+def main():
+    """主测试函数"""
+    logger.info("🚀 Starting PDF Rotation Validation Test\n")
+    logger.info("=" * 60)
+    
+    # 创建测试PDF目录
+    test_dir = Path(__file__).parent / "output" / "rotation_test" / "test_pdfs"
+    test_dir.mkdir(parents=True, exist_ok=True)
+    
+    # 测试不同rotation角度
+    rotations = [0, 90, 180, 270]
+    renderers = ["fitz"]  # 先测试fitz
+    
+    # 如果pypdfium2可用,也测试
+    try:
+        import pypdfium2
+        renderers.append("pypdfium2")
+    except ImportError:
+        logger.warning("pypdfium2 not available, skipping pypdfium2 tests")
+    
+    results = {}
+    
+    for rotation in rotations:
+        # 创建测试PDF
+        pdf_path = test_dir / f"test_rotation_{rotation}.pdf"
+        create_test_pdf_with_rotation(str(pdf_path), rotation)
+        
+        if not pdf_path.exists():
+            logger.error(f"Failed to create test PDF for rotation={rotation}")
+            continue
+        
+        # 测试所有渲染引擎
+        for renderer in renderers:
+            test_key = f"rotation_{rotation}_{renderer}"
+            try:
+                all_in_bounds, text_blocks, detected_rotation, image_size = test_pdf_rotation(
+                    str(pdf_path), renderer=renderer
+                )
+                
+                # 验证rotation检测是否正确
+                # 注意:现在返回的是图片旋转角度(逆时针),需要转换PDF rotation来比较
+                expected_image_rotation = pdf_rotation_to_image_rotation(rotation)
+                rotation_correct = (detected_rotation == expected_image_rotation)
+                
+                if all_in_bounds and rotation_correct:
+                    results[test_key] = "PASS"
+                elif not rotation_correct:
+                    results[test_key] = f"FAIL (rotation mismatch: expected {expected_image_rotation}, got {detected_rotation})"
+                else:
+                    results[test_key] = "FAIL (bbox out of bounds)"
+                    
+            except Exception as e:
+                logger.error(f"❌ Test failed for {test_key}: {e}")
+                import traceback
+                logger.error(traceback.format_exc())
+                results[test_key] = "ERROR"
+    
+    # 打印总结
+    logger.info(f"\n{'='*60}")
+    logger.info("TEST SUMMARY")
+    logger.info(f"{'='*60}\n")
+    
+    for test_key, result in results.items():
+        if result == "PASS":
+            status_emoji = "✅"
+        elif "FAIL" in result:
+            status_emoji = "❌"
+        else:
+            status_emoji = "⚠️"
+        logger.info(f"{status_emoji} {test_key}: {result}")
+    
+    # 统计
+    pass_count = sum(1 for r in results.values() if r == "PASS")
+    total_count = len(results)
+    
+    logger.info(f"\n{'='*60}")
+    logger.info(f"📊 Final Score: {pass_count}/{total_count} tests passed")
+    logger.info(f"{'='*60}\n")
+    
+    if pass_count == total_count:
+        logger.info("🎉 All tests passed!")
+        return 0
+    else:
+        logger.warning(f"⚠️ {total_count - pass_count} tests failed")
+        logger.info(f"\nCheck output files in: {test_dir.parent}")
+        return 1
+
+
+if __name__ == "__main__":
+    sys.exit(main())