Procházet zdrojové kódy

feat: 更新页面方向识别逻辑,统一rotate_angle定义并处理文字PDF的旋转

zhch158_admin před 3 dny
rodič
revize
10b6bdb6e4

+ 26 - 7
ocr_tools/universal_doc_parser/core/pipeline_manager_v2.py

@@ -321,8 +321,10 @@ class EnhancedDocPipeline:
         detection_image = original_image.copy()
         rotate_angle = 0
         
-        # 1. 页面方向识别(仅扫描件), rotate_angle将图片旋转为正视需要旋转的角度
+        # 1. 页面方向识别
+        # rotate_angle统一定义:图像需要逆时针旋转的角度(0/90/180/270)来变为正视
         if pdf_type == 'ocr':
+            # 扫描件:使用OCR方向识别
             try:
                 detection_image, rotate_angle = self.preprocessor.process(original_image)
                 page_result['angle'] = rotate_angle
@@ -331,6 +333,27 @@ class EnhancedDocPipeline:
                     logger.info(f"📐 Page {page_idx}: rotated {rotate_angle}° for detection")
             except Exception as e:
                 logger.warning(f"⚠️ Orientation detection failed: {e}")
+        elif pdf_type == 'txt' and pdf_doc is not None:
+            # 文字PDF:获取PDF页面rotation并转换为统一的rotate_angle定义
+            try:
+                pdf_rotation_angle = PDFUtils.get_page_rotation(pdf_doc, page_idx)
+                if pdf_rotation_angle != 0:
+                    # 转换为OCR定义:图像需要逆时针旋转的角度
+                    # PDF rotation 270° 表示内容逆时针270° = 顺时针90°
+                    # 要恢复正视,需要逆时针90° (即360-270=90)
+                    rotate_angle = (360 - pdf_rotation_angle) % 360
+                    if rotate_angle == 360:
+                        rotate_angle = 0
+                    
+                    # 将图片旋转为正视(使用rotate_angle,逆时针旋转)
+                    from PIL import Image
+                    pil_rotated = Image.fromarray(detection_image).rotate(rotate_angle, expand=True)
+                    detection_image = np.array(pil_rotated)
+                    page_result['angle'] = rotate_angle
+                    logger.info(f"📐 Page {page_idx}: PDF rotation {pdf_rotation_angle}°, rotated image {rotate_angle}° to upright")
+            except Exception as e:
+                logger.warning(f"⚠️ Failed to get PDF rotation: {e}")
+
         
         # 2. Layout检测
         try:
@@ -382,14 +405,10 @@ class EnhancedDocPipeline:
                 actual_page_type = PDFUtils.detect_page_type(pdf_doc, page_idx)
                 
                 if actual_page_type == 'txt':
-                    # 正常提取文字
+                    # 正常提取文字, 返回正视坐标
                     all_text_spans, rotation = PDFUtils.extract_all_text_blocks(
-                        pdf_doc, page_idx, scale=scale
+                        pdf_doc, page_idx, scale=scale, return_upright_coords=True
                     )
-                    # 保存rotation角度
-                    page_result['angle'] = rotation
-                    if rotation != 0:
-                        logger.info(f"📐 Page {page_idx}: PDF rotation {rotation}°")
 
                     # 将 PDF 文本块转换为 span 格式
                     all_text_spans = self._convert_pdf_blocks_to_spans(