Explorar el Código

优化parse_table_recognition_v2_data函数,改进表格整体bbox计算逻辑,调整嵌套文本框处理方式

zhch158_admin hace 1 mes
padre
commit
9ca9e56a55
Se han modificado 1 ficheros con 27 adiciones y 17 borrados
  1. 27 17
      ocr_validator_utils.py

+ 27 - 17
ocr_validator_utils.py

@@ -372,7 +372,6 @@ def parse_ppstructv3_data(data: Dict, config: Dict) -> List[Dict]:
     
     return parsed_data
 
-
 def parse_table_recognition_v2_data(data: Dict, config: Dict) -> List[Dict]:
     tool_config = config['ocr']['tools']['table_recognition_v2']
     parsed_data = []
@@ -385,31 +384,42 @@ def parse_table_recognition_v2_data(data: Dict, config: Dict) -> List[Dict]:
             continue
 
         html_text = item.get(tool_config['text_field'], '')
-        bbox = item.get(tool_config['bbox_field'], [])
-        if bbox and len(bbox) >= 4:
-            bbox = bbox[:4]
+
+        # 计算表格整体bbox
+        cell_boxes_raw = item.get(tool_config['bbox_field'], [])
+        if cell_boxes_raw:
+            x1_list = [box[0] for box in cell_boxes_raw]
+            y1_list = [box[1] for box in cell_boxes_raw]
+            x2_list = [box[2] for box in cell_boxes_raw]
+            y2_list = [box[3] for box in cell_boxes_raw]
+            table_bbox = [
+                float(min(x1_list)),
+                float(min(y1_list)),
+                float(max(x2_list)),
+                float(max(y2_list))
+            ]
         else:
-            bbox = [0, 0, 0, 0]
+            table_bbox = [0.0, 0.0, 0.0, 0.0]
 
         parsed_data.append({
             'text': str(html_text).strip(),
-            'bbox': bbox,
+            'bbox': table_bbox,
             'category': item.get(tool_config.get('category_field', ''), 'table'),
             'confidence': item.get(tool_config.get('confidence_field', ''), config['ocr']['default_confidence']),
             'source_tool': 'table_recognition_v2',
         })
 
-    rec_texts = get_nested_value(data, tool_config.get('rec_texts_field', ''))
-    rec_boxes = get_nested_value(data, tool_config.get('rec_boxes_field', ''))
-    if isinstance(rec_texts, list) and isinstance(rec_boxes, list):
-        for i, (text, box) in enumerate(zip(rec_texts, rec_boxes)):
-            if text and isinstance(box, list) and len(box) >= 4:
-                parsed_data.append({
-                    'text': str(text).strip(),
-                    'bbox': box[:4],
-                    'category': 'OCR_Text',
-                    'source_tool': 'ppstructv3_ocr'
-                })
+        rec_texts = get_nested_value(item, tool_config.get('rec_texts_field', ''))
+        rec_boxes = get_nested_value(item, tool_config.get('rec_boxes_field', ''))
+        if isinstance(rec_texts, list) and isinstance(rec_boxes, list):
+            for i, (text, box) in enumerate(zip(rec_texts, rec_boxes)):
+                if text and isinstance(box, list) and len(box) >= 4:
+                    parsed_data.append({
+                        'text': str(text).strip(),
+                        'bbox': box[:4],
+                        'category': 'OCR_Text',
+                        'source_tool': 'table_recognition_v2'
+                    })
     
     return parsed_data