Просмотр исходного кода

feat: 更新表格OCR预处理,返回旋转前的尺寸信息

zhch158_admin 3 дней назад
Родитель
Сommit
5bb6e76629
1 измененных файлов с 16 добавлено и 18 удалено
  1. 16 18
      ocr_tools/universal_doc_parser/core/element_processors.py

+ 16 - 18
ocr_tools/universal_doc_parser/core/element_processors.py

@@ -191,7 +191,7 @@ class ElementProcessors:
         image: np.ndarray,
         bbox: List[float],
         pre_matched_spans: Optional[List[Dict[str, Any]]] = None
-    ) -> Tuple[np.ndarray, List[Dict[str, Any]], int, str, int]:
+    ) -> Tuple[np.ndarray, List[Dict[str, Any]], int, str, int, Tuple[int, int]]:
         """
         表格OCR预处理(共享逻辑)
         
@@ -203,7 +203,7 @@ class ElementProcessors:
             pre_matched_spans: 预匹配的 OCR spans
             
         Returns:
-            (cropped_table, ocr_boxes, table_angle, ocr_source, crop_padding)
+            (cropped_table, ocr_boxes, table_angle, ocr_source, crop_padding, orig_size_before_rotation)
             其中 cropped_table 已经过方向检测和旋转处理
             crop_padding: 裁剪时添加的 padding 值
         """
@@ -219,6 +219,11 @@ class ElementProcessors:
         crop_padding = 10
 
         cropped_table = CoordinateUtils.crop_region(image, bbox, padding=crop_padding)
+        
+        # 🔑 保存旋转前的尺寸(重要!)
+        orig_table_h_before_rotation, orig_table_w_before_rotation = cropped_table.shape[:2]
+        orig_size_before_rotation = (orig_table_w_before_rotation, orig_table_h_before_rotation)
+        
         table_angle = 0
         
         # 1. 表格方向检测
@@ -226,7 +231,7 @@ class ElementProcessors:
             rotated_table, table_angle = self.preprocessor.process(cropped_table)
             if table_angle != 0:
                 logger.info(f"📐 Table rotated {table_angle}°")
-                cropped_table = rotated_table
+                cropped_table = rotated_table  # cropped_table 现在是旋转后的图像
         except Exception as e:
             logger.debug(f"Table orientation detection skipped: {e}")
         
@@ -321,7 +326,8 @@ class ElementProcessors:
             except Exception as e:
                 logger.warning(f"Table OCR failed: {e}")
         
-        return cropped_table, ocr_boxes, table_angle, ocr_source, crop_padding
+        # 返回旋转前的尺寸
+        return cropped_table, ocr_boxes, table_angle, ocr_source, crop_padding, orig_size_before_rotation
     
     def process_table_element_wired(
         self,
@@ -353,13 +359,9 @@ class ElementProcessors:
         bbox = layout_item.get('bbox', [0, 0, 0, 0])
         
         # OCR 预处理(返回已旋转的表格图片 + OCR 框 + padding)
-        cropped_table, ocr_boxes, table_angle, ocr_source, crop_padding = \
+        cropped_table, ocr_boxes, table_angle, ocr_source, crop_padding, orig_size_before_rotation = \
             self._prepare_table_ocr(image, bbox, pre_matched_spans)
         
-        # 获取裁剪后表格图片的尺寸
-        orig_table_h, orig_table_w = cropped_table.shape[:2]
-        orig_table_size = (orig_table_w, orig_table_h)
-        
         # UNet 有线表格识别
         cells = []
         enhanced_html = ""
@@ -405,13 +407,13 @@ class ElementProcessors:
                 cells=cells,
                 html=enhanced_html,
                 rotation_angle=table_angle,
-                orig_table_size=orig_table_size,
+                orig_table_size=orig_size_before_rotation,
                 table_bbox=cropped_offset_bbox
             )
             ocr_boxes = CoordinateUtils.inverse_rotate_ocr_boxes(
                 ocr_boxes=ocr_boxes,
                 rotation_angle=table_angle,
-                orig_table_size=orig_table_size,
+                orig_table_size=orig_size_before_rotation,
                 table_bbox=cropped_offset_bbox
             )
             logger.info(f"📐 Wired table coordinates transformed back to original image")
@@ -465,13 +467,9 @@ class ElementProcessors:
         bbox = layout_item.get('bbox', [0, 0, 0, 0])
         
         # OCR 预处理(返回已旋转的表格图片 + OCR 框 + padding)
-        cropped_table, ocr_boxes, table_angle, ocr_source, crop_padding = \
+        cropped_table, ocr_boxes, table_angle, ocr_source, crop_padding, orig_size_before_rotation = \
             self._prepare_table_ocr(image, bbox, pre_matched_spans)
         
-        # 获取裁剪后表格图片的尺寸
-        orig_table_h, orig_table_w = cropped_table.shape[:2]
-        orig_table_size = (orig_table_w, orig_table_h)
-        
         # VLM 识别获取表格结构HTML
         table_html = ""
         try:
@@ -513,13 +511,13 @@ class ElementProcessors:
                 cells=cells,
                 html=enhanced_html,
                 rotation_angle=table_angle,
-                orig_table_size=orig_table_size,
+                orig_table_size=orig_size_before_rotation,
                 table_bbox=cropped_offset_bbox
             )
             ocr_boxes = CoordinateUtils.inverse_rotate_ocr_boxes(
                 ocr_boxes=ocr_boxes,
                 rotation_angle=table_angle,
-                orig_table_size=orig_table_size,
+                orig_table_size=orig_size_before_rotation,
                 table_bbox=cropped_offset_bbox
             )
             logger.info(f"📐 VLM table coordinates transformed back to original image")