|
|
@@ -344,6 +344,7 @@ def parse_mineru_data(data: List, config: Dict, tool_name="mineru") -> List[Dict
|
|
|
if cell_text and cell_bbox and len(cell_bbox) >= 4:
|
|
|
parsed_data.append({
|
|
|
'text': str(cell_text).strip(),
|
|
|
+ 'matched_text': cell.get('matched_text', ''),
|
|
|
'bbox': cell_bbox[:4],
|
|
|
'row': cell.get('row', -1),
|
|
|
'col': cell.get('col', -1),
|
|
|
@@ -671,6 +672,7 @@ def process_ocr_data(ocr_data: List, config: Dict) -> Dict[str, List]:
|
|
|
if text not in text_bbox_mapping:
|
|
|
text_bbox_mapping[text] = []
|
|
|
text_bbox_mapping[text].append({
|
|
|
+ 'matched_text': item.get('matched_text', ''),
|
|
|
'bbox': bbox,
|
|
|
'category': item.get('category', 'Text'),
|
|
|
'index': i,
|