|
|
@@ -191,7 +191,7 @@ class ElementProcessors:
|
|
|
image: np.ndarray,
|
|
|
bbox: List[float],
|
|
|
pre_matched_spans: Optional[List[Dict[str, Any]]] = None
|
|
|
- ) -> Tuple[np.ndarray, List[Dict[str, Any]], int, str, int]:
|
|
|
+ ) -> Tuple[np.ndarray, List[Dict[str, Any]], int, str, int, Tuple[int, int]]:
|
|
|
"""
|
|
|
表格OCR预处理(共享逻辑)
|
|
|
|
|
|
@@ -203,7 +203,7 @@ class ElementProcessors:
|
|
|
pre_matched_spans: 预匹配的 OCR spans
|
|
|
|
|
|
Returns:
|
|
|
- (cropped_table, ocr_boxes, table_angle, ocr_source, crop_padding)
|
|
|
+ (cropped_table, ocr_boxes, table_angle, ocr_source, crop_padding, orig_size_before_rotation)
|
|
|
其中 cropped_table 已经过方向检测和旋转处理
|
|
|
crop_padding: 裁剪时添加的 padding 值
|
|
|
"""
|
|
|
@@ -219,6 +219,11 @@ class ElementProcessors:
|
|
|
crop_padding = 10
|
|
|
|
|
|
cropped_table = CoordinateUtils.crop_region(image, bbox, padding=crop_padding)
|
|
|
+
|
|
|
+ # 🔑 保存旋转前的尺寸(重要!)
|
|
|
+ orig_table_h_before_rotation, orig_table_w_before_rotation = cropped_table.shape[:2]
|
|
|
+ orig_size_before_rotation = (orig_table_w_before_rotation, orig_table_h_before_rotation)
|
|
|
+
|
|
|
table_angle = 0
|
|
|
|
|
|
# 1. 表格方向检测
|
|
|
@@ -226,7 +231,7 @@ class ElementProcessors:
|
|
|
rotated_table, table_angle = self.preprocessor.process(cropped_table)
|
|
|
if table_angle != 0:
|
|
|
logger.info(f"📐 Table rotated {table_angle}°")
|
|
|
- cropped_table = rotated_table
|
|
|
+ cropped_table = rotated_table # cropped_table 现在是旋转后的图像
|
|
|
except Exception as e:
|
|
|
logger.debug(f"Table orientation detection skipped: {e}")
|
|
|
|
|
|
@@ -321,7 +326,8 @@ class ElementProcessors:
|
|
|
except Exception as e:
|
|
|
logger.warning(f"Table OCR failed: {e}")
|
|
|
|
|
|
- return cropped_table, ocr_boxes, table_angle, ocr_source, crop_padding
|
|
|
+ # 返回旋转前的尺寸
|
|
|
+ return cropped_table, ocr_boxes, table_angle, ocr_source, crop_padding, orig_size_before_rotation
|
|
|
|
|
|
def process_table_element_wired(
|
|
|
self,
|
|
|
@@ -353,13 +359,9 @@ class ElementProcessors:
|
|
|
bbox = layout_item.get('bbox', [0, 0, 0, 0])
|
|
|
|
|
|
# OCR 预处理(返回已旋转的表格图片 + OCR 框 + padding)
|
|
|
- cropped_table, ocr_boxes, table_angle, ocr_source, crop_padding = \
|
|
|
+ cropped_table, ocr_boxes, table_angle, ocr_source, crop_padding, orig_size_before_rotation = \
|
|
|
self._prepare_table_ocr(image, bbox, pre_matched_spans)
|
|
|
|
|
|
- # 获取裁剪后表格图片的尺寸
|
|
|
- orig_table_h, orig_table_w = cropped_table.shape[:2]
|
|
|
- orig_table_size = (orig_table_w, orig_table_h)
|
|
|
-
|
|
|
# UNet 有线表格识别
|
|
|
cells = []
|
|
|
enhanced_html = ""
|
|
|
@@ -405,13 +407,13 @@ class ElementProcessors:
|
|
|
cells=cells,
|
|
|
html=enhanced_html,
|
|
|
rotation_angle=table_angle,
|
|
|
- orig_table_size=orig_table_size,
|
|
|
+ orig_table_size=orig_size_before_rotation,
|
|
|
table_bbox=cropped_offset_bbox
|
|
|
)
|
|
|
ocr_boxes = CoordinateUtils.inverse_rotate_ocr_boxes(
|
|
|
ocr_boxes=ocr_boxes,
|
|
|
rotation_angle=table_angle,
|
|
|
- orig_table_size=orig_table_size,
|
|
|
+ orig_table_size=orig_size_before_rotation,
|
|
|
table_bbox=cropped_offset_bbox
|
|
|
)
|
|
|
logger.info(f"📐 Wired table coordinates transformed back to original image")
|
|
|
@@ -465,13 +467,9 @@ class ElementProcessors:
|
|
|
bbox = layout_item.get('bbox', [0, 0, 0, 0])
|
|
|
|
|
|
# OCR 预处理(返回已旋转的表格图片 + OCR 框 + padding)
|
|
|
- cropped_table, ocr_boxes, table_angle, ocr_source, crop_padding = \
|
|
|
+ cropped_table, ocr_boxes, table_angle, ocr_source, crop_padding, orig_size_before_rotation = \
|
|
|
self._prepare_table_ocr(image, bbox, pre_matched_spans)
|
|
|
|
|
|
- # 获取裁剪后表格图片的尺寸
|
|
|
- orig_table_h, orig_table_w = cropped_table.shape[:2]
|
|
|
- orig_table_size = (orig_table_w, orig_table_h)
|
|
|
-
|
|
|
# VLM 识别获取表格结构HTML
|
|
|
table_html = ""
|
|
|
try:
|
|
|
@@ -513,13 +511,13 @@ class ElementProcessors:
|
|
|
cells=cells,
|
|
|
html=enhanced_html,
|
|
|
rotation_angle=table_angle,
|
|
|
- orig_table_size=orig_table_size,
|
|
|
+ orig_table_size=orig_size_before_rotation,
|
|
|
table_bbox=cropped_offset_bbox
|
|
|
)
|
|
|
ocr_boxes = CoordinateUtils.inverse_rotate_ocr_boxes(
|
|
|
ocr_boxes=ocr_boxes,
|
|
|
rotation_angle=table_angle,
|
|
|
- orig_table_size=orig_table_size,
|
|
|
+ orig_table_size=orig_size_before_rotation,
|
|
|
table_bbox=cropped_offset_bbox
|
|
|
)
|
|
|
logger.info(f"📐 VLM table coordinates transformed back to original image")
|