6 месяцев назад · e8a1f6ece8
--- a/ocr_tools/universal_doc_parser/tests/test_pdf_rotation.py
+++ b/ocr_tools/universal_doc_parser/tests/test_pdf_rotation.py
@@ -0,0 +1,326 @@
 
				+"""
			
 
				+PDF Rotation 验证测试程序
			
 
				+
			
 
				+测试不同rotation角度(0/90/180/270)的PDF：
			
 
				+1. 文本坐标是否正确转换
			
 
				+2. 文本坐标是否与渲染图像对齐
			
 
				+3. 两种渲染引擎(fitz/pypdfium2)的一致性
			
 
				+"""
			
 
				+import sys
			
 
				+from pathlib import Path
			
 
				+import cv2
			
 
				+import numpy as np
			
 
				+from loguru import logger
			
 
				+import json
			
 
				+
			
 
				+# 添加路径
			
 
				+current_file = Path(__file__).resolve()
			
 
				+ocr_platform_root = current_file.parents[3]  # tests -> universal_doc_parser -> ocr_tools -> ocr_platform
			
 
				+sys.path.insert(0, str(ocr_platform_root))
			
 
				+
			
 
				+from ocr_utils.pdf_utils import PDFUtils
			
 
				+
			
 
				+
			
 
				+def create_test_pdf_with_rotation(output_path: str, rotation: int):
			
 
				+    """
			
 
				+    创建指定rotation的测试PDF
			
 
				+    
			
 
				+    Args:
			
 
				+        output_path: 输出PDF路径
			
 
				+        rotation: 旋转角度 (0/90/180/270)
			
 
				+    """
			
 
				+    try:
			
 
				+        import fitz
			
 
				+    except ImportError:
			
 
				+        logger.error("PyMuPDF not installed, cannot create test PDF")
			
 
				+        return None
			
 
				+    
			
 
				+    # 创建PDF（A4尺寸：595x842点）
			
 
				+    doc = fitz.open()
			
 
				+    page = doc.new_page(width=595, height=842)
			
 
				+    
			
 
				+    # 设置rotation
			
 
				+    page.set_rotation(rotation)
			
 
				+    
			
 
				+    # 添加测试文本（使用不同位置和大小）
			
 
				+    test_texts = [
			
 
				+        ("Top Left", 50, 100, 24),
			
 
				+        ("Top Center", 250, 100, 24),
			
 
				+        ("Top Right", 450, 100, 24),
			
 
				+        ("Middle Left", 50, 400, 20),
			
 
				+        ("CENTER TEXT", 220, 420, 28),
			
 
				+        ("Middle Right", 450, 400, 20),
			
 
				+        ("Bottom Left", 50, 750, 18),
			
 
				+        ("Bottom Center", 230, 750, 18),
			
 
				+        ("Bottom Right", 450, 750, 18),
			
 
				+    ]
			
 
				+    
			
 
				+    # 添加rotation标识
			
 
				+    page.insert_text((250, 50), f"Rotation: {rotation}°", fontsize=32, color=(1, 0, 0))
			
 
				+    
			
 
				+    for text, x, y, fontsize in test_texts:
			
 
				+        page.insert_text((x, y), text, fontsize=fontsize)
			
 
				+    
			
 
				+    # 绘制边框（帮助定位）
			
 
				+    rect = page.rect
			
 
				+    page.draw_rect(fitz.Rect(10, 10, rect.width-10, rect.height-10), color=(0, 0, 1), width=2)
			
 
				+    
			
 
				+    # 保存
			
 
				+    doc.save(output_path)
			
 
				+    doc.close()
			
 
				+    
			
 
				+    logger.info(f"✅ Created test PDF with rotation={rotation}°: {output_path}")
			
 
				+    return output_path
			
 
				+
			
 
				+
			
 
				+def test_pdf_rotation(pdf_path: str, renderer: str = "fitz", dpi: int = 200):
			
 
				+    """
			
 
				+    测试PDF rotation处理
			
 
				+    
			
 
				+    Args:
			
 
				+        pdf_path: PDF文件路径
			
 
				+        renderer: 渲染引擎 ("fitz" or "pypdfium2")
			
 
				+        dpi: 渲染DPI
			
 
				+        
			
 
				+    Returns:
			
 
				+        (all_in_bounds, text_blocks, rotation, image_size)
			
 
				+    """
			
 
				+    logger.info(f"\n{'='*60}")
			
 
				+    logger.info(f"Testing: {Path(pdf_path).name}")
			
 
				+    logger.info(f"Renderer: {renderer}, DPI: {dpi}")
			
 
				+    logger.info(f"{'='*60}\n")
			
 
				+    
			
 
				+    # 1. 加载PDF并渲染图像
			
 
				+    with open(pdf_path, 'rb') as f:
			
 
				+        pdf_bytes = f.read()
			
 
				+    
			
 
				+    images_list, pdf_doc = PDFUtils.load_images_from_pdf_unified(
			
 
				+        pdf_bytes, dpi=dpi, renderer=renderer
			
 
				+    )
			
 
				+    
			
 
				+    if not images_list:
			
 
				+        logger.error("Failed to load PDF")
			
 
				+        return False, [], 0, (0, 0)
			
 
				+    
			
 
				+    # 2. 获取第一页
			
 
				+    page_idx = 0
			
 
				+    image_dict = images_list[page_idx]
			
 
				+    pil_image = image_dict['img_pil']
			
 
				+    scale = image_dict.get('scale', dpi / 72)
			
 
				+    
			
 
				+    # 转为numpy
			
 
				+    image = np.array(pil_image)
			
 
				+    h, w = image.shape[:2]
			
 
				+    
			
 
				+    logger.info(f"📐 Rendered image size: {w}x{h} pixels, scale: {scale:.3f}")
			
 
				+    
			
 
				+    # 3. 提取文本块
			
 
				+    text_blocks, rotation = PDFUtils.extract_all_text_blocks(
			
 
				+        pdf_doc, page_idx, scale
			
 
				+    )
			
 
				+    
			
 
				+    logger.info(f"📋 PDF rotation: {rotation}°")
			
 
				+    logger.info(f"📝 Extracted {len(text_blocks)} text blocks\n")
			
 
				+    
			
 
				+    # 4. 验证每个文本块
			
 
				+    out_of_bounds_count = 0
			
 
				+    
			
 
				+    for idx, block in enumerate(text_blocks):
			
 
				+        text = block['text']
			
 
				+        bbox = block['bbox']
			
 
				+        
			
 
				+        # 检查bbox是否在图像范围内
			
 
				+        x1, y1, x2, y2 = bbox
			
 
				+        
			
 
				+        in_bounds = (0 <= x1 < w and 0 <= y1 < h and 0 <= x2 <= w and 0 <= y2 <= h)
			
 
				+        
			
 
				+        if not in_bounds:
			
 
				+            out_of_bounds_count += 1
			
 
				+        
			
 
				+        status = "✅" if in_bounds else "❌"
			
 
				+        logger.info(f"  {status} Block {idx}: '{text[:30]}' bbox=[{int(x1)},{int(y1)},{int(x2)},{int(y2)}]")
			
 
				+    
			
 
				+    # 5. 可视化：在图像上绘制文本框
			
 
				+    vis_image = image.copy()
			
 
				+    
			
 
				+    for idx, block in enumerate(text_blocks):
			
 
				+        bbox = block['bbox']
			
 
				+        x1, y1, x2, y2 = [int(v) for v in bbox]
			
 
				+        
			
 
				+        # 裁剪到图像范围内
			
 
				+        x1 = max(0, min(x1, w-1))
			
 
				+        y1 = max(0, min(y1, h-1))
			
 
				+        x2 = max(0, min(x2, w))
			
 
				+        y2 = max(0, min(y2, h))
			
 
				+        
			
 
				+        in_bounds = (
			
 
				+            0 <= bbox[0] < w and 0 <= bbox[1] < h and
			
 
				+            0 <= bbox[2] <= w and 0 <= bbox[3] <= h
			
 
				+        )
			
 
				+        
			
 
				+        # 绘制矩形
			
 
				+        color = (0, 255, 0) if in_bounds else (0, 0, 255)  # 绿色=正常，红色=越界
			
 
				+        cv2.rectangle(vis_image, (x1, y1), (x2, y2), color, 2)
			
 
				+        
			
 
				+        # 添加文本标签
			
 
				+        cv2.putText(vis_image, f"{idx}", (x1, max(15, y1-5)), 
			
 
				+                   cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
			
 
				+    
			
 
				+    # 6. 保存可视化结果
			
 
				+    output_dir = Path(__file__).parent / "output" / "rotation_test"
			
 
				+    output_dir.mkdir(parents=True, exist_ok=True)
			
 
				+    
			
 
				+    pdf_name = Path(pdf_path).stem
			
 
				+    output_path = output_dir / f"{pdf_name}_{renderer}_vis.jpg"
			
 
				+    cv2.imwrite(str(output_path), cv2.cvtColor(vis_image, cv2.COLOR_RGB2BGR))
			
 
				+    logger.info(f"\n💾 Saved visualization: {output_path}")
			
 
				+    
			
 
				+    # 7. 保存JSON结果
			
 
				+    json_path = output_dir / f"{pdf_name}_{renderer}_result.json"
			
 
				+    result_data = {
			
 
				+        'pdf_path': str(pdf_path),
			
 
				+        'renderer': renderer,
			
 
				+        'dpi': dpi,
			
 
				+        'rotation': rotation,
			
 
				+        'image_size': [w, h],
			
 
				+        'scale': scale,
			
 
				+        'text_blocks_count': len(text_blocks),
			
 
				+        'out_of_bounds_count': out_of_bounds_count,
			
 
				+        'text_blocks': [
			
 
				+            {
			
 
				+                'text': block['text'],
			
 
				+                'bbox': [float(v) for v in block['bbox']],
			
 
				+                'in_bounds': (
			
 
				+                    0 <= block['bbox'][0] < w and
			
 
				+                    0 <= block['bbox'][1] < h and
			
 
				+                    0 <= block['bbox'][2] <= w and
			
 
				+                    0 <= block['bbox'][3] <= h
			
 
				+                )
			
 
				+            }
			
 
				+            for block in text_blocks
			
 
				+        ]
			
 
				+    }
			
 
				+    
			
 
				+    with open(json_path, 'w', encoding='utf-8') as f:
			
 
				+        json.dump(result_data, f, ensure_ascii=False, indent=2)
			
 
				+    logger.info(f"💾 Saved JSON: {json_path}\n")
			
 
				+    
			
 
				+    # 8. 验证结果
			
 
				+    all_in_bounds = out_of_bounds_count == 0
			
 
				+    
			
 
				+    if all_in_bounds:
			
 
				+        logger.info("✅ All text bboxes are within image bounds - PASS\n")
			
 
				+    else:
			
 
				+        logger.warning(f"❌ {out_of_bounds_count} text bboxes are outside image bounds - FAIL\n")
			
 
				+    
			
 
				+    # 关闭PDF
			
 
				+    pdf_doc.close()
			
 
				+    
			
 
				+    return all_in_bounds, text_blocks, rotation, (w, h)
			
 
				+
			
 
				+
			
 
				+def pdf_rotation_to_image_rotation(pdf_rotation: int) -> int:
			
 
				+    """将PDF旋转角度（顺时针）转换为图片旋转角度（逆时针）
			
 
				+    
			
 
				+    PDF规范使用顺时针旋转定义，图片处理通常使用逆时针旋转定义。
			
 
				+    
			
 
				+    Args:
			
 
				+        pdf_rotation: PDF旋转角度 (0/90/180/270，顺时针)
			
 
				+        
			
 
				+    Returns:
			
 
				+        图片旋转角度 (0/90/180/270，逆时针)
			
 
				+    """
			
 
				+    mapping = {0: 0, 90: 270, 180: 180, 270: 90}
			
 
				+    return mapping.get(pdf_rotation, 0)
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    """主测试函数"""
			
 
				+    logger.info("🚀 Starting PDF Rotation Validation Test\n")
			
 
				+    logger.info("=" * 60)
			
 
				+    
			
 
				+    # 创建测试PDF目录
			
 
				+    test_dir = Path(__file__).parent / "output" / "rotation_test" / "test_pdfs"
			
 
				+    test_dir.mkdir(parents=True, exist_ok=True)
			
 
				+    
			
 
				+    # 测试不同rotation角度
			
 
				+    rotations = [0, 90, 180, 270]
			
 
				+    renderers = ["fitz"]  # 先测试fitz
			
 
				+    
			
 
				+    # 如果pypdfium2可用，也测试
			
 
				+    try:
			
 
				+        import pypdfium2
			
 
				+        renderers.append("pypdfium2")
			
 
				+    except ImportError:
			
 
				+        logger.warning("pypdfium2 not available, skipping pypdfium2 tests")
			
 
				+    
			
 
				+    results = {}
			
 
				+    
			
 
				+    for rotation in rotations:
			
 
				+        # 创建测试PDF
			
 
				+        pdf_path = test_dir / f"test_rotation_{rotation}.pdf"
			
 
				+        create_test_pdf_with_rotation(str(pdf_path), rotation)
			
 
				+        
			
 
				+        if not pdf_path.exists():
			
 
				+            logger.error(f"Failed to create test PDF for rotation={rotation}")
			
 
				+            continue
			
 
				+        
			
 
				+        # 测试所有渲染引擎
			
 
				+        for renderer in renderers:
			
 
				+            test_key = f"rotation_{rotation}_{renderer}"
			
 
				+            try:
			
 
				+                all_in_bounds, text_blocks, detected_rotation, image_size = test_pdf_rotation(
			
 
				+                    str(pdf_path), renderer=renderer
			
 
				+                )
			
 
				+                
			
 
				+                # 验证rotation检测是否正确
			
 
				+                # 注意：现在返回的是图片旋转角度（逆时针），需要转换PDF rotation来比较
			
 
				+                expected_image_rotation = pdf_rotation_to_image_rotation(rotation)
			
 
				+                rotation_correct = (detected_rotation == expected_image_rotation)
			
 
				+                
			
 
				+                if all_in_bounds and rotation_correct:
			
 
				+                    results[test_key] = "PASS"
			
 
				+                elif not rotation_correct:
			
 
				+                    results[test_key] = f"FAIL (rotation mismatch: expected {expected_image_rotation}, got {detected_rotation})"
			
 
				+                else:
			
 
				+                    results[test_key] = "FAIL (bbox out of bounds)"
			
 
				+                    
			
 
				+            except Exception as e:
			
 
				+                logger.error(f"❌ Test failed for {test_key}: {e}")
			
 
				+                import traceback
			
 
				+                logger.error(traceback.format_exc())
			
 
				+                results[test_key] = "ERROR"
			
 
				+    
			
 
				+    # 打印总结
			
 
				+    logger.info(f"\n{'='*60}")
			
 
				+    logger.info("TEST SUMMARY")
			
 
				+    logger.info(f"{'='*60}\n")
			
 
				+    
			
 
				+    for test_key, result in results.items():
			
 
				+        if result == "PASS":
			
 
				+            status_emoji = "✅"
			
 
				+        elif "FAIL" in result:
			
 
				+            status_emoji = "❌"
			
 
				+        else:
			
 
				+            status_emoji = "⚠️"
			
 
				+        logger.info(f"{status_emoji} {test_key}: {result}")
			
 
				+    
			
 
				+    # 统计
			
 
				+    pass_count = sum(1 for r in results.values() if r == "PASS")
			
 
				+    total_count = len(results)
			
 
				+    
			
 
				+    logger.info(f"\n{'='*60}")
			
 
				+    logger.info(f"📊 Final Score: {pass_count}/{total_count} tests passed")
			
 
				+    logger.info(f"{'='*60}\n")
			
 
				+    
			
 
				+    if pass_count == total_count:
			
 
				+        logger.info("🎉 All tests passed!")
			
 
				+        return 0
			
 
				+    else:
			
 
				+        logger.warning(f"⚠️ {total_count - pass_count} tests failed")
			
 
				+        logger.info(f"\nCheck output files in: {test_dir.parent}")
			
 
				+        return 1
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    sys.exit(main())