|
|
@@ -0,0 +1,326 @@
|
|
|
+"""
|
|
|
+PDF Rotation 验证测试程序
|
|
|
+
|
|
|
+测试不同rotation角度(0/90/180/270)的PDF:
|
|
|
+1. 文本坐标是否正确转换
|
|
|
+2. 文本坐标是否与渲染图像对齐
|
|
|
+3. 两种渲染引擎(fitz/pypdfium2)的一致性
|
|
|
+"""
|
|
|
+import sys
|
|
|
+from pathlib import Path
|
|
|
+import cv2
|
|
|
+import numpy as np
|
|
|
+from loguru import logger
|
|
|
+import json
|
|
|
+
|
|
|
+# 添加路径
|
|
|
+current_file = Path(__file__).resolve()
|
|
|
+ocr_platform_root = current_file.parents[3] # tests -> universal_doc_parser -> ocr_tools -> ocr_platform
|
|
|
+sys.path.insert(0, str(ocr_platform_root))
|
|
|
+
|
|
|
+from ocr_utils.pdf_utils import PDFUtils
|
|
|
+
|
|
|
+
|
|
|
+def create_test_pdf_with_rotation(output_path: str, rotation: int):
|
|
|
+ """
|
|
|
+ 创建指定rotation的测试PDF
|
|
|
+
|
|
|
+ Args:
|
|
|
+ output_path: 输出PDF路径
|
|
|
+ rotation: 旋转角度 (0/90/180/270)
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ import fitz
|
|
|
+ except ImportError:
|
|
|
+ logger.error("PyMuPDF not installed, cannot create test PDF")
|
|
|
+ return None
|
|
|
+
|
|
|
+ # 创建PDF(A4尺寸:595x842点)
|
|
|
+ doc = fitz.open()
|
|
|
+ page = doc.new_page(width=595, height=842)
|
|
|
+
|
|
|
+ # 设置rotation
|
|
|
+ page.set_rotation(rotation)
|
|
|
+
|
|
|
+ # 添加测试文本(使用不同位置和大小)
|
|
|
+ test_texts = [
|
|
|
+ ("Top Left", 50, 100, 24),
|
|
|
+ ("Top Center", 250, 100, 24),
|
|
|
+ ("Top Right", 450, 100, 24),
|
|
|
+ ("Middle Left", 50, 400, 20),
|
|
|
+ ("CENTER TEXT", 220, 420, 28),
|
|
|
+ ("Middle Right", 450, 400, 20),
|
|
|
+ ("Bottom Left", 50, 750, 18),
|
|
|
+ ("Bottom Center", 230, 750, 18),
|
|
|
+ ("Bottom Right", 450, 750, 18),
|
|
|
+ ]
|
|
|
+
|
|
|
+ # 添加rotation标识
|
|
|
+ page.insert_text((250, 50), f"Rotation: {rotation}°", fontsize=32, color=(1, 0, 0))
|
|
|
+
|
|
|
+ for text, x, y, fontsize in test_texts:
|
|
|
+ page.insert_text((x, y), text, fontsize=fontsize)
|
|
|
+
|
|
|
+ # 绘制边框(帮助定位)
|
|
|
+ rect = page.rect
|
|
|
+ page.draw_rect(fitz.Rect(10, 10, rect.width-10, rect.height-10), color=(0, 0, 1), width=2)
|
|
|
+
|
|
|
+ # 保存
|
|
|
+ doc.save(output_path)
|
|
|
+ doc.close()
|
|
|
+
|
|
|
+ logger.info(f"✅ Created test PDF with rotation={rotation}°: {output_path}")
|
|
|
+ return output_path
|
|
|
+
|
|
|
+
|
|
|
+def test_pdf_rotation(pdf_path: str, renderer: str = "fitz", dpi: int = 200):
|
|
|
+ """
|
|
|
+ 测试PDF rotation处理
|
|
|
+
|
|
|
+ Args:
|
|
|
+ pdf_path: PDF文件路径
|
|
|
+ renderer: 渲染引擎 ("fitz" or "pypdfium2")
|
|
|
+ dpi: 渲染DPI
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ (all_in_bounds, text_blocks, rotation, image_size)
|
|
|
+ """
|
|
|
+ logger.info(f"\n{'='*60}")
|
|
|
+ logger.info(f"Testing: {Path(pdf_path).name}")
|
|
|
+ logger.info(f"Renderer: {renderer}, DPI: {dpi}")
|
|
|
+ logger.info(f"{'='*60}\n")
|
|
|
+
|
|
|
+ # 1. 加载PDF并渲染图像
|
|
|
+ with open(pdf_path, 'rb') as f:
|
|
|
+ pdf_bytes = f.read()
|
|
|
+
|
|
|
+ images_list, pdf_doc = PDFUtils.load_images_from_pdf_unified(
|
|
|
+ pdf_bytes, dpi=dpi, renderer=renderer
|
|
|
+ )
|
|
|
+
|
|
|
+ if not images_list:
|
|
|
+ logger.error("Failed to load PDF")
|
|
|
+ return False, [], 0, (0, 0)
|
|
|
+
|
|
|
+ # 2. 获取第一页
|
|
|
+ page_idx = 0
|
|
|
+ image_dict = images_list[page_idx]
|
|
|
+ pil_image = image_dict['img_pil']
|
|
|
+ scale = image_dict.get('scale', dpi / 72)
|
|
|
+
|
|
|
+ # 转为numpy
|
|
|
+ image = np.array(pil_image)
|
|
|
+ h, w = image.shape[:2]
|
|
|
+
|
|
|
+ logger.info(f"📐 Rendered image size: {w}x{h} pixels, scale: {scale:.3f}")
|
|
|
+
|
|
|
+ # 3. 提取文本块
|
|
|
+ text_blocks, rotation = PDFUtils.extract_all_text_blocks(
|
|
|
+ pdf_doc, page_idx, scale
|
|
|
+ )
|
|
|
+
|
|
|
+ logger.info(f"📋 PDF rotation: {rotation}°")
|
|
|
+ logger.info(f"📝 Extracted {len(text_blocks)} text blocks\n")
|
|
|
+
|
|
|
+ # 4. 验证每个文本块
|
|
|
+ out_of_bounds_count = 0
|
|
|
+
|
|
|
+ for idx, block in enumerate(text_blocks):
|
|
|
+ text = block['text']
|
|
|
+ bbox = block['bbox']
|
|
|
+
|
|
|
+ # 检查bbox是否在图像范围内
|
|
|
+ x1, y1, x2, y2 = bbox
|
|
|
+
|
|
|
+ in_bounds = (0 <= x1 < w and 0 <= y1 < h and 0 <= x2 <= w and 0 <= y2 <= h)
|
|
|
+
|
|
|
+ if not in_bounds:
|
|
|
+ out_of_bounds_count += 1
|
|
|
+
|
|
|
+ status = "✅" if in_bounds else "❌"
|
|
|
+ logger.info(f" {status} Block {idx}: '{text[:30]}' bbox=[{int(x1)},{int(y1)},{int(x2)},{int(y2)}]")
|
|
|
+
|
|
|
+ # 5. 可视化:在图像上绘制文本框
|
|
|
+ vis_image = image.copy()
|
|
|
+
|
|
|
+ for idx, block in enumerate(text_blocks):
|
|
|
+ bbox = block['bbox']
|
|
|
+ x1, y1, x2, y2 = [int(v) for v in bbox]
|
|
|
+
|
|
|
+ # 裁剪到图像范围内
|
|
|
+ x1 = max(0, min(x1, w-1))
|
|
|
+ y1 = max(0, min(y1, h-1))
|
|
|
+ x2 = max(0, min(x2, w))
|
|
|
+ y2 = max(0, min(y2, h))
|
|
|
+
|
|
|
+ in_bounds = (
|
|
|
+ 0 <= bbox[0] < w and 0 <= bbox[1] < h and
|
|
|
+ 0 <= bbox[2] <= w and 0 <= bbox[3] <= h
|
|
|
+ )
|
|
|
+
|
|
|
+ # 绘制矩形
|
|
|
+ color = (0, 255, 0) if in_bounds else (0, 0, 255) # 绿色=正常,红色=越界
|
|
|
+ cv2.rectangle(vis_image, (x1, y1), (x2, y2), color, 2)
|
|
|
+
|
|
|
+ # 添加文本标签
|
|
|
+ cv2.putText(vis_image, f"{idx}", (x1, max(15, y1-5)),
|
|
|
+ cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
|
|
|
+
|
|
|
+ # 6. 保存可视化结果
|
|
|
+ output_dir = Path(__file__).parent / "output" / "rotation_test"
|
|
|
+ output_dir.mkdir(parents=True, exist_ok=True)
|
|
|
+
|
|
|
+ pdf_name = Path(pdf_path).stem
|
|
|
+ output_path = output_dir / f"{pdf_name}_{renderer}_vis.jpg"
|
|
|
+ cv2.imwrite(str(output_path), cv2.cvtColor(vis_image, cv2.COLOR_RGB2BGR))
|
|
|
+ logger.info(f"\n💾 Saved visualization: {output_path}")
|
|
|
+
|
|
|
+ # 7. 保存JSON结果
|
|
|
+ json_path = output_dir / f"{pdf_name}_{renderer}_result.json"
|
|
|
+ result_data = {
|
|
|
+ 'pdf_path': str(pdf_path),
|
|
|
+ 'renderer': renderer,
|
|
|
+ 'dpi': dpi,
|
|
|
+ 'rotation': rotation,
|
|
|
+ 'image_size': [w, h],
|
|
|
+ 'scale': scale,
|
|
|
+ 'text_blocks_count': len(text_blocks),
|
|
|
+ 'out_of_bounds_count': out_of_bounds_count,
|
|
|
+ 'text_blocks': [
|
|
|
+ {
|
|
|
+ 'text': block['text'],
|
|
|
+ 'bbox': [float(v) for v in block['bbox']],
|
|
|
+ 'in_bounds': (
|
|
|
+ 0 <= block['bbox'][0] < w and
|
|
|
+ 0 <= block['bbox'][1] < h and
|
|
|
+ 0 <= block['bbox'][2] <= w and
|
|
|
+ 0 <= block['bbox'][3] <= h
|
|
|
+ )
|
|
|
+ }
|
|
|
+ for block in text_blocks
|
|
|
+ ]
|
|
|
+ }
|
|
|
+
|
|
|
+ with open(json_path, 'w', encoding='utf-8') as f:
|
|
|
+ json.dump(result_data, f, ensure_ascii=False, indent=2)
|
|
|
+ logger.info(f"💾 Saved JSON: {json_path}\n")
|
|
|
+
|
|
|
+ # 8. 验证结果
|
|
|
+ all_in_bounds = out_of_bounds_count == 0
|
|
|
+
|
|
|
+ if all_in_bounds:
|
|
|
+ logger.info("✅ All text bboxes are within image bounds - PASS\n")
|
|
|
+ else:
|
|
|
+ logger.warning(f"❌ {out_of_bounds_count} text bboxes are outside image bounds - FAIL\n")
|
|
|
+
|
|
|
+ # 关闭PDF
|
|
|
+ pdf_doc.close()
|
|
|
+
|
|
|
+ return all_in_bounds, text_blocks, rotation, (w, h)
|
|
|
+
|
|
|
+
|
|
|
+def pdf_rotation_to_image_rotation(pdf_rotation: int) -> int:
|
|
|
+ """将PDF旋转角度(顺时针)转换为图片旋转角度(逆时针)
|
|
|
+
|
|
|
+ PDF规范使用顺时针旋转定义,图片处理通常使用逆时针旋转定义。
|
|
|
+
|
|
|
+ Args:
|
|
|
+ pdf_rotation: PDF旋转角度 (0/90/180/270,顺时针)
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ 图片旋转角度 (0/90/180/270,逆时针)
|
|
|
+ """
|
|
|
+ mapping = {0: 0, 90: 270, 180: 180, 270: 90}
|
|
|
+ return mapping.get(pdf_rotation, 0)
|
|
|
+
|
|
|
+
|
|
|
+def main():
|
|
|
+ """主测试函数"""
|
|
|
+ logger.info("🚀 Starting PDF Rotation Validation Test\n")
|
|
|
+ logger.info("=" * 60)
|
|
|
+
|
|
|
+ # 创建测试PDF目录
|
|
|
+ test_dir = Path(__file__).parent / "output" / "rotation_test" / "test_pdfs"
|
|
|
+ test_dir.mkdir(parents=True, exist_ok=True)
|
|
|
+
|
|
|
+ # 测试不同rotation角度
|
|
|
+ rotations = [0, 90, 180, 270]
|
|
|
+ renderers = ["fitz"] # 先测试fitz
|
|
|
+
|
|
|
+ # 如果pypdfium2可用,也测试
|
|
|
+ try:
|
|
|
+ import pypdfium2
|
|
|
+ renderers.append("pypdfium2")
|
|
|
+ except ImportError:
|
|
|
+ logger.warning("pypdfium2 not available, skipping pypdfium2 tests")
|
|
|
+
|
|
|
+ results = {}
|
|
|
+
|
|
|
+ for rotation in rotations:
|
|
|
+ # 创建测试PDF
|
|
|
+ pdf_path = test_dir / f"test_rotation_{rotation}.pdf"
|
|
|
+ create_test_pdf_with_rotation(str(pdf_path), rotation)
|
|
|
+
|
|
|
+ if not pdf_path.exists():
|
|
|
+ logger.error(f"Failed to create test PDF for rotation={rotation}")
|
|
|
+ continue
|
|
|
+
|
|
|
+ # 测试所有渲染引擎
|
|
|
+ for renderer in renderers:
|
|
|
+ test_key = f"rotation_{rotation}_{renderer}"
|
|
|
+ try:
|
|
|
+ all_in_bounds, text_blocks, detected_rotation, image_size = test_pdf_rotation(
|
|
|
+ str(pdf_path), renderer=renderer
|
|
|
+ )
|
|
|
+
|
|
|
+ # 验证rotation检测是否正确
|
|
|
+ # 注意:现在返回的是图片旋转角度(逆时针),需要转换PDF rotation来比较
|
|
|
+ expected_image_rotation = pdf_rotation_to_image_rotation(rotation)
|
|
|
+ rotation_correct = (detected_rotation == expected_image_rotation)
|
|
|
+
|
|
|
+ if all_in_bounds and rotation_correct:
|
|
|
+ results[test_key] = "PASS"
|
|
|
+ elif not rotation_correct:
|
|
|
+ results[test_key] = f"FAIL (rotation mismatch: expected {expected_image_rotation}, got {detected_rotation})"
|
|
|
+ else:
|
|
|
+ results[test_key] = "FAIL (bbox out of bounds)"
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"❌ Test failed for {test_key}: {e}")
|
|
|
+ import traceback
|
|
|
+ logger.error(traceback.format_exc())
|
|
|
+ results[test_key] = "ERROR"
|
|
|
+
|
|
|
+ # 打印总结
|
|
|
+ logger.info(f"\n{'='*60}")
|
|
|
+ logger.info("TEST SUMMARY")
|
|
|
+ logger.info(f"{'='*60}\n")
|
|
|
+
|
|
|
+ for test_key, result in results.items():
|
|
|
+ if result == "PASS":
|
|
|
+ status_emoji = "✅"
|
|
|
+ elif "FAIL" in result:
|
|
|
+ status_emoji = "❌"
|
|
|
+ else:
|
|
|
+ status_emoji = "⚠️"
|
|
|
+ logger.info(f"{status_emoji} {test_key}: {result}")
|
|
|
+
|
|
|
+ # 统计
|
|
|
+ pass_count = sum(1 for r in results.values() if r == "PASS")
|
|
|
+ total_count = len(results)
|
|
|
+
|
|
|
+ logger.info(f"\n{'='*60}")
|
|
|
+ logger.info(f"📊 Final Score: {pass_count}/{total_count} tests passed")
|
|
|
+ logger.info(f"{'='*60}\n")
|
|
|
+
|
|
|
+ if pass_count == total_count:
|
|
|
+ logger.info("🎉 All tests passed!")
|
|
|
+ return 0
|
|
|
+ else:
|
|
|
+ logger.warning(f"⚠️ {total_count - pass_count} tests failed")
|
|
|
+ logger.info(f"\nCheck output files in: {test_dir.parent}")
|
|
|
+ return 1
|
|
|
+
|
|
|
+
|
|
|
+if __name__ == "__main__":
|
|
|
+ sys.exit(main())
|