Forráskód Böngészése

feat: 优化文本检测逻辑,增强垂直文本框统计和旋转判断

zhch158_admin 2 hete
szülő
commit
e829d51d1b
1 módosított fájl, 34 hozzáadás és 24 törlés
  1. 34 24
      zhch/unified_pytorch_models/orientation_classifier_v2.py

+ 34 - 24
zhch/unified_pytorch_models/orientation_classifier_v2.py

@@ -96,51 +96,61 @@ class OrientationClassifierV2:
             return False, 0
         
         try:
-            # ✅ 修改:适配 MinerUOCRAdapter 的返回格式
-            # 返回格式: [[[box], (text, conf)], ...] 或 [[boxes], ...]
+            print(f"   🎯 Detecting text boxes for orientation check...")
+            
+            # ✅ 调用检测器
             det_results = self.text_detector.ocr(img, det=True, rec=False)
             
             if not det_results or not det_results[0]:
+                print(f"   ⚠️  No detection results")
                 return False, 0
             
             boxes = det_results[0]
+            print(f"   📊 Found {len(boxes)} text boxes")
             
-            # ✅ 处理两种格式
+            # ✅ 统计垂直文本框
             vertical_count = 0
-            for item in boxes:
-                # 格式1: [box] (仅检测)
-                # 格式2: [[box], (text, conf)] (检测+识别)
-                if isinstance(item, list) and len(item) > 0:
-                    if isinstance(item[0], list):
-                        # 格式2: [[box], ...]
-                        box = np.array(item[0])
-                    else:
-                        # 格式1: [box]
-                        box = np.array(item)
-                else:
-                    continue
-                
-                # 计算文本框的宽高
-                if len(box) >= 4:
-                    points = box
-                    width = np.linalg.norm(points[1] - points[0])
-                    height = np.linalg.norm(points[2] - points[1])
+            for i, box in enumerate(boxes):
+                # ✅ box 格式: [[x1,y1], [x2,y2], [x3,y3], [x4,y4]]
+                if isinstance(box, (list, np.ndarray)):
+                    points = np.array(box)
+                    
+                    if len(points) < 4:
+                        continue
+                    
+                    # 计算宽度和高度
+                    # points[0] = 左上, points[1] = 右上, points[2] = 右下, points[3] = 左下
+                    width = np.linalg.norm(points[1] - points[0])   # 上边长度
+                    height = np.linalg.norm(points[3] - points[0])  # 左边长度
                     
-                    aspect_ratio = width / height if height > 0 else 1.0
+                    if height == 0:
+                        continue
                     
-                    # 统计垂直文本框 (高 > 宽)
+                    aspect_ratio = width / height
+                    
+                    # 统计垂直文本框 (宽 < 高,即 ratio < 1.0)
+                    # ✅ 修改阈值为 0.8,更严格地判断垂直文本
                     if aspect_ratio < 0.8:
                         vertical_count += 1
             
             # 判断是否需要旋转
             total_boxes = len(boxes)
+            vertical_ratio = vertical_count / total_boxes if total_boxes > 0 else 0
+            
+            print(f"   📏 Vertical text count: {vertical_count} ({vertical_ratio:.1%})")
+            
             is_rotated = (
                 vertical_count >= total_boxes * self.vertical_text_ratio 
                 and vertical_count >= self.vertical_text_min_count
             )
             
-            return is_rotated, vertical_count
+            if is_rotated:
+                print(f"   ⚠️  High vertical text ratio detected, predicting rotation angle...")
+            else:
+                print(f"   ✅ Normal orientation (vertical ratio: {vertical_ratio:.1%} < {self.vertical_text_ratio:.1%})")
             
+            return is_rotated, vertical_count
+        
         except Exception as e:
             print(f"⚠️  Text detection failed: {e}")
             import traceback