Browse Source

feat: 在解析和处理OCR数据时添加匹配文本字段

zhch158_admin 14 hours ago
parent
commit
ff828e2d08
1 changed files with 2 additions and 0 deletions
  1. 2 0
      ocr_validator_utils.py

+ 2 - 0
ocr_validator_utils.py

@@ -344,6 +344,7 @@ def parse_mineru_data(data: List, config: Dict, tool_name="mineru") -> List[Dict
                 if cell_text and cell_bbox and len(cell_bbox) >= 4:
                     parsed_data.append({
                         'text': str(cell_text).strip(),
+                        'matched_text': cell.get('matched_text', ''),
                         'bbox': cell_bbox[:4],
                         'row': cell.get('row', -1),
                         'col': cell.get('col', -1),
@@ -671,6 +672,7 @@ def process_ocr_data(ocr_data: List, config: Dict) -> Dict[str, List]:
                 if text not in text_bbox_mapping:
                     text_bbox_mapping[text] = []
                 text_bbox_mapping[text].append({
+                    'matched_text': item.get('matched_text', ''),
                     'bbox': bbox,
                     'category': item.get('category', 'Text'),
                     'index': i,