|
|
@@ -348,6 +348,17 @@ def parse_mineru_data(data: List, config: Dict, tool_name="mineru") -> List[Dict
|
|
|
'source_tool': tool_name,
|
|
|
'img_path': img_path
|
|
|
})
|
|
|
+ elif category == 'table_cell':
|
|
|
+ if bbox and len(bbox) >= 4:
|
|
|
+ parsed_data.append({
|
|
|
+ 'text': str(text).strip(),
|
|
|
+ 'bbox': bbox[:4],
|
|
|
+ 'row': item.get('row', -1),
|
|
|
+ 'col': item.get('col', -1),
|
|
|
+ 'category': 'table_cell',
|
|
|
+ 'confidence': confidence,
|
|
|
+ 'source_tool': tool_name,
|
|
|
+ })
|
|
|
else:
|
|
|
# 其他类型,按文本处理, header, table_cell, ...
|
|
|
if text and bbox and len(bbox) >= 4:
|