2 недель назад · df50db1eeb
--- a/zhch/adapters/doc_preprocessor_adapter.py
+++ b/zhch/adapters/doc_preprocessor_adapter.py
@@ -5,7 +5,7 @@
 
				 
			
 
				 import sys
			
 
				 from pathlib import Path
			
 
				-from typing import Any, Dict, List, Optional, Union
			
 
				+from typing import Any, Dict, List, Optional, Union, Tuple
			
 
				 import numpy as np
			
 
				 import cv2
			
 
				 
			
@@ -96,7 +96,7 @@ class EnhancedDocPreprocessor:
 
				         print(f"   📏 Image size: {img_width}x{img_height}, aspect_ratio: {aspect_ratio:.2f}, is_portrait: {is_portrait}")
			
 
				         return is_portrait
			
 
				     
			
 
				-    def _detect_vertical_text_boxes(self, image: np.ndarray) -> tuple[int, int]:
			
 
				+    def _detect_vertical_text_boxes(self, image: np.ndarray) -> Tuple[int, int]:
			
 
				         """
			
 
				         检测图片中的垂直文本框
			
 
				         
			
@@ -133,55 +133,24 @@ class EnhancedDocPreprocessor:
 
				             total_count = len(boxes)
			
 
				             
			
 
				             # 🎯 处理 numpy 数组格式: shape=(N, 4, 2)
			
 
				-            if isinstance(boxes, np.ndarray):
			
 
				-                if len(boxes.shape) == 3 and boxes.shape[1] == 4 and boxes.shape[2] == 2:
			
 
				-                    # 格式: (N, 4, 2) - 每个框有4个点，每个点有(x,y)坐标
			
 
				-                    for box in boxes:
			
 
				-                        # box: shape=(4, 2) - [[x1,y1], [x2,y2], [x3,y3], [x4,y4]]
			
 
				-                        p1, p2, p3, p4 = box
			
 
				-                        
			
 
				-                        # 计算宽高
			
 
				-                        width = abs(float(p2[0] - p1[0]))  # x2 - x1
			
 
				-                        height = abs(float(p3[1] - p2[1]))  # y3 - y2
			
 
				-                        
			
 
				-                        if height == 0:
			
 
				-                            continue
			
 
				-                        
			
 
				-                        aspect_ratio = width / height
			
 
				-                        
			
 
				-                        # 🎯 MinerU 的判断标准：宽高比 < 0.8 为垂直文本
			
 
				-                        if aspect_ratio < 0.8:
			
 
				-                            vertical_count += 1
			
 
				-                else:
			
 
				-                    # 其他格式，尝试遍历处理
			
 
				-                    for box in boxes:
			
 
				-                        if isinstance(box, np.ndarray) and len(box) >= 4:
			
 
				-                            self._process_single_box(box, vertical_count)
			
 
				-            else:
			
 
				-                # 处理列表格式
			
 
				+            if isinstance(boxes, np.ndarray) and len(boxes.shape) == 3 and boxes.shape[1] == 4 and boxes.shape[2] == 2:
			
 
				+                # 格式: (N, 4, 2) - 每个框有4个点，每个点有(x,y)坐标
			
 
				                 for box in boxes:
			
 
				-                    if isinstance(box, (list, tuple, np.ndarray)):
			
 
				-                        if len(box) >= 4:
			
 
				-                            # 格式: [[x1,y1], [x2,y2], [x3,y3], [x4,y4]]
			
 
				-                            if isinstance(box[0], (list, tuple, np.ndarray)) and len(box[0]) >= 2:
			
 
				-                                p1, p2, p3, p4 = box[:4]
			
 
				-                                width = abs(float(p2[0]) - float(p1[0]))
			
 
				-                                height = abs(float(p3[1]) - float(p2[1]))
			
 
				-                            # 格式: [x1,y1,x2,y2,x3,y3,x4,y4]
			
 
				-                            elif len(box) >= 8:
			
 
				-                                width = abs(float(box[2]) - float(box[0]))
			
 
				-                                height = abs(float(box[5]) - float(box[3]))
			
 
				-                            else:
			
 
				-                                continue
			
 
				-                            
			
 
				-                            if height == 0:
			
 
				-                                continue
			
 
				-                            
			
 
				-                            aspect_ratio = width / height
			
 
				-                            
			
 
				-                            # 🎯 MinerU 的判断标准：宽高比 < 0.8 为垂直文本
			
 
				-                            if aspect_ratio < 0.8:
			
 
				-                                vertical_count += 1
			
 
				+                    # box: shape=(4, 2) - [[x1,y1], [x2,y2], [x3,y3], [x4,y4]]
			
 
				+                    p1, p2, p3, p4 = box
			
 
				+                    
			
 
				+                    # 计算宽高
			
 
				+                    width = abs(float(p2[0] - p1[0]))  # x2 - x1
			
 
				+                    height = abs(float(p3[1] - p2[1]))  # y3 - y2
			
 
				+                    
			
 
				+                    if height == 0:
			
 
				+                        continue
			
 
				+                    
			
 
				+                    aspect_ratio = width / height
			
 
				+                    
			
 
				+                    # 🎯 MinerU 的判断标准：宽高比 < 0.8 为垂直文本
			
 
				+                    if aspect_ratio < 0.8:
			
 
				+                        vertical_count += 1
			
 
				             
			
 
				             print(f"   📊 OCR detection: {vertical_count}/{total_count} vertical boxes ({vertical_count/total_count:.1%} vertical)")
			
 
				             return vertical_count, total_count