Просмотр исходного кода

feat: 更新MinerU适配器,添加ocr_platform根目录到Python路径并优化坐标处理逻辑

zhch158_admin 5 дней назад
Родитель
Сommit
5235aff1b9
1 измененных файлов с 11 добавлено и 4 удалено
  1. 11 4
      ocr_tools/universal_doc_parser/models/adapters/mineru_adapter.py

+ 11 - 4
ocr_tools/universal_doc_parser/models/adapters/mineru_adapter.py

@@ -7,11 +7,17 @@ from PIL import Image
 from loguru import logger
 
 # 添加MinerU路径
-mineru_path = Path(__file__).parents[4] / "mineru"
-if str(mineru_path) not in sys.path:
-    sys.path.insert(0, str(mineru_path))
+# mineru_path = Path(__file__).parents[4] / "mineru"
+# if str(mineru_path) not in sys.path:
+#     sys.path.insert(0, str(mineru_path))
+
+# 添加 ocr_platform 根目录到 Python 路径(用于导入 ocr_utils)
+ocr_platform_root = Path(__file__).parents[4]  # adapters -> models -> universal_doc_parser -> ocr_tools -> ocr_platform 
+if str(ocr_platform_root) not in sys.path:
+    sys.path.insert(0, str(ocr_platform_root))
 
 from .base import BasePreprocessor, BaseLayoutDetector, BaseVLRecognizer, BaseOCRRecognizer
+from core.coordinate_utils import CoordinateUtils
 
 # 导入MinerU组件
 try:
@@ -490,7 +496,8 @@ class MinerUOCRRecognizer(BaseOCRRecognizer):
                 for item in ocr_results[0]:
                     if len(item) >= 2 and len(item[1]) >= 2:
                         formatted_results.append({
-                            'bbox': item[0],  # 坐标
+                            'bbox': CoordinateUtils.poly_to_bbox(item[0]),  # 坐标
+                            'poly': item[0],  # 多边形坐标
                             'text': item[1][0],  # 识别文本
                             'confidence': item[1][1]  # 置信度
                         })