|
|
@@ -61,16 +61,17 @@ class PaddleOCRVLMerger:
|
|
|
# 提取 PaddleOCR 的文字框信息
|
|
|
paddle_text_boxes = self.bbox_extractor.extract_paddle_text_boxes(paddle_data)
|
|
|
|
|
|
- # 处理 PaddleOCR_VL 的数据
|
|
|
+ # 处理 PaddleOCR_VL 的数据, merge后已是minerU json格式
|
|
|
merged_data = self.data_processor.process_paddleocr_vl_data(
|
|
|
paddleocr_vl_data, paddle_text_boxes
|
|
|
)
|
|
|
|
|
|
+ # 不用再转换,
|
|
|
# 转换为指定格式
|
|
|
- if data_format == 'mineru':
|
|
|
- merged_data = self.output_converter.convert_to_mineru_format(
|
|
|
- merged_data, data_source='paddleocr_vl'
|
|
|
- )
|
|
|
+ # if data_format == 'mineru':
|
|
|
+ # merged_data = self.output_converter.convert_to_mineru_format(
|
|
|
+ # merged_data, data_source='paddleocr_vl'
|
|
|
+ # )
|
|
|
|
|
|
return merged_data
|
|
|
|