Ver código fonte

feat: 更新合并逻辑以提取PaddleOCR的旋转角度和原始图像尺寸,增强数据处理能力

zhch158_admin 4 dias atrás
pai
commit
89bffd5fb5
1 arquivos alterados com 2 adições e 2 exclusões
  1. 2 2
      merger/merger_core.py

+ 2 - 2
merger/merger_core.py

@@ -55,11 +55,11 @@ class MinerUPaddleOCRMerger:
             paddle_data = json.load(f)
             paddle_data = json.load(f)
         
         
         # 提取 PaddleOCR 的文字框信息
         # 提取 PaddleOCR 的文字框信息
-        paddle_text_boxes = self.bbox_extractor.extract_paddle_text_boxes(paddle_data)
+        paddle_text_boxes, rotation_angle, orig_image_size = self.bbox_extractor.extract_paddle_text_boxes(paddle_data)
         
         
         # 处理 MinerU 的数据
         # 处理 MinerU 的数据
         merged_data = self.data_processor.process_mineru_data(
         merged_data = self.data_processor.process_mineru_data(
-            mineru_data, paddle_text_boxes
+            mineru_data, paddle_text_boxes, rotation_angle, orig_image_size
         )
         )
         
         
         return merged_data
         return merged_data