|
|
@@ -321,8 +321,10 @@ class EnhancedDocPipeline:
|
|
|
detection_image = original_image.copy()
|
|
|
rotate_angle = 0
|
|
|
|
|
|
- # 1. 页面方向识别(仅扫描件), rotate_angle将图片旋转为正视需要旋转的角度
|
|
|
+ # 1. 页面方向识别
|
|
|
+ # rotate_angle统一定义:图像需要逆时针旋转的角度(0/90/180/270)来变为正视
|
|
|
if pdf_type == 'ocr':
|
|
|
+ # 扫描件:使用OCR方向识别
|
|
|
try:
|
|
|
detection_image, rotate_angle = self.preprocessor.process(original_image)
|
|
|
page_result['angle'] = rotate_angle
|
|
|
@@ -331,6 +333,27 @@ class EnhancedDocPipeline:
|
|
|
logger.info(f"📐 Page {page_idx}: rotated {rotate_angle}° for detection")
|
|
|
except Exception as e:
|
|
|
logger.warning(f"⚠️ Orientation detection failed: {e}")
|
|
|
+ elif pdf_type == 'txt' and pdf_doc is not None:
|
|
|
+ # 文字PDF:获取PDF页面rotation并转换为统一的rotate_angle定义
|
|
|
+ try:
|
|
|
+ pdf_rotation_angle = PDFUtils.get_page_rotation(pdf_doc, page_idx)
|
|
|
+ if pdf_rotation_angle != 0:
|
|
|
+ # 转换为OCR定义:图像需要逆时针旋转的角度
|
|
|
+ # PDF rotation 270° 表示内容逆时针270° = 顺时针90°
|
|
|
+ # 要恢复正视,需要逆时针90° (即360-270=90)
|
|
|
+ rotate_angle = (360 - pdf_rotation_angle) % 360
|
|
|
+ if rotate_angle == 360:
|
|
|
+ rotate_angle = 0
|
|
|
+
|
|
|
+ # 将图片旋转为正视(使用rotate_angle,逆时针旋转)
|
|
|
+ from PIL import Image
|
|
|
+ pil_rotated = Image.fromarray(detection_image).rotate(rotate_angle, expand=True)
|
|
|
+ detection_image = np.array(pil_rotated)
|
|
|
+ page_result['angle'] = rotate_angle
|
|
|
+ logger.info(f"📐 Page {page_idx}: PDF rotation {pdf_rotation_angle}°, rotated image {rotate_angle}° to upright")
|
|
|
+ except Exception as e:
|
|
|
+ logger.warning(f"⚠️ Failed to get PDF rotation: {e}")
|
|
|
+
|
|
|
|
|
|
# 2. Layout检测
|
|
|
try:
|
|
|
@@ -382,14 +405,10 @@ class EnhancedDocPipeline:
|
|
|
actual_page_type = PDFUtils.detect_page_type(pdf_doc, page_idx)
|
|
|
|
|
|
if actual_page_type == 'txt':
|
|
|
- # 正常提取文字
|
|
|
+ # 正常提取文字, 返回正视坐标
|
|
|
all_text_spans, rotation = PDFUtils.extract_all_text_blocks(
|
|
|
- pdf_doc, page_idx, scale=scale
|
|
|
+ pdf_doc, page_idx, scale=scale, return_upright_coords=True
|
|
|
)
|
|
|
- # 保存rotation角度
|
|
|
- page_result['angle'] = rotation
|
|
|
- if rotation != 0:
|
|
|
- logger.info(f"📐 Page {page_idx}: PDF rotation {rotation}°")
|
|
|
|
|
|
# 将 PDF 文本块转换为 span 格式
|
|
|
all_text_spans = self._convert_pdf_blocks_to_spans(
|