|
|
@@ -7,11 +7,17 @@ from PIL import Image
|
|
|
from loguru import logger
|
|
|
|
|
|
# 添加MinerU路径
|
|
|
-mineru_path = Path(__file__).parents[4] / "mineru"
|
|
|
-if str(mineru_path) not in sys.path:
|
|
|
- sys.path.insert(0, str(mineru_path))
|
|
|
+# mineru_path = Path(__file__).parents[4] / "mineru"
|
|
|
+# if str(mineru_path) not in sys.path:
|
|
|
+# sys.path.insert(0, str(mineru_path))
|
|
|
+
|
|
|
+# 添加 ocr_platform 根目录到 Python 路径(用于导入 ocr_utils)
|
|
|
+ocr_platform_root = Path(__file__).parents[4] # adapters -> models -> universal_doc_parser -> ocr_tools -> ocr_platform
|
|
|
+if str(ocr_platform_root) not in sys.path:
|
|
|
+ sys.path.insert(0, str(ocr_platform_root))
|
|
|
|
|
|
from .base import BasePreprocessor, BaseLayoutDetector, BaseVLRecognizer, BaseOCRRecognizer
|
|
|
+from core.coordinate_utils import CoordinateUtils
|
|
|
|
|
|
# 导入MinerU组件
|
|
|
try:
|
|
|
@@ -490,7 +496,8 @@ class MinerUOCRRecognizer(BaseOCRRecognizer):
|
|
|
for item in ocr_results[0]:
|
|
|
if len(item) >= 2 and len(item[1]) >= 2:
|
|
|
formatted_results.append({
|
|
|
- 'bbox': item[0], # 坐标
|
|
|
+ 'bbox': CoordinateUtils.poly_to_bbox(item[0]), # 坐标
|
|
|
+ 'poly': item[0], # 多边形坐标
|
|
|
'text': item[1][0], # 识别文本
|
|
|
'confidence': item[1][1] # 置信度
|
|
|
})
|