|
|
@@ -0,0 +1,107 @@
|
|
|
+import sys
|
|
|
+from pathlib import Path
|
|
|
+from typing import Dict, Any, List, Union, Optional
|
|
|
+import numpy as np
|
|
|
+from PIL import Image
|
|
|
+from loguru import logger
|
|
|
+
|
|
|
+# 导入基类
|
|
|
+from .mineru_adapter import MinerUVLRecognizer
|
|
|
+
|
|
|
+# 导入 mineru-vl-utils 的客户端
|
|
|
+try:
|
|
|
+ from mineru_vl_utils import MinerUClient
|
|
|
+ MINERU_VL_UTILS_AVAILABLE = True
|
|
|
+except ImportError as e:
|
|
|
+ logger.warning(f"mineru-vl-utils not available: {e}")
|
|
|
+ MINERU_VL_UTILS_AVAILABLE = False
|
|
|
+
|
|
|
+
|
|
|
+class PaddleVLRecognizer(MinerUVLRecognizer):
|
|
|
+ """
|
|
|
+ PaddleOCR-VL识别适配器,继承自MinerUVLRecognizer
|
|
|
+
|
|
|
+ 主要差异:
|
|
|
+ 1. 强制使用 PaddleOCR-VL-0.9B 模型
|
|
|
+ 2. 确保使用 vllm-server 后端
|
|
|
+ 3. 复用所有MinerU的预处理/后处理逻辑
|
|
|
+ """
|
|
|
+
|
|
|
+ def __init__(self, config: Dict[str, Any]):
|
|
|
+ # 🔧 强制设置 PaddleOCR-VL 模型名称
|
|
|
+ config['model_name'] = 'PaddleOCR-VL-0.9B'
|
|
|
+
|
|
|
+ # 🔧 确保使用正确的后端配置
|
|
|
+ if config.get('backend') not in ['http-client']:
|
|
|
+ logger.error(
|
|
|
+ f"Backend '{config.get('backend')}' may not be optimal for PaddleOCR-VL. "
|
|
|
+ f"must: 'http-client'"
|
|
|
+ )
|
|
|
+
|
|
|
+ # 调用父类初始化
|
|
|
+ super().__init__(config)
|
|
|
+
|
|
|
+ def initialize(self):
|
|
|
+ """初始化VL模型 - 使用MinerU的客户端"""
|
|
|
+ if not MINERU_VL_UTILS_AVAILABLE:
|
|
|
+ raise ImportError("mineru-vl-utils is required for PaddleVLRecognizer")
|
|
|
+
|
|
|
+ try:
|
|
|
+ backend = self.config.get('backend', 'http-client')
|
|
|
+ server_url = self.config.get('server_url')
|
|
|
+ model_params = self.config.get('model_params', {})
|
|
|
+
|
|
|
+ # 🔧 提取 MinerUClient 所需的参数
|
|
|
+ # 从 model_params 中获取,如果没有则使用默认值
|
|
|
+ max_concurrency = model_params.get('max_concurrency', 100)
|
|
|
+ http_timeout = model_params.get('http_timeout', 600)
|
|
|
+
|
|
|
+ # 🔧 PaddleOCR-VL 特定的提示词(可选)
|
|
|
+ prompts = model_params.get('prompts', {
|
|
|
+ "table": "\nTable Recognition:",
|
|
|
+ "equation": "\nFormula Recognition:",
|
|
|
+ "[default]": "\nText Recognition:",
|
|
|
+ "[layout]": "\nLayout Detection:",
|
|
|
+ })
|
|
|
+
|
|
|
+ # 🔧 初始化 MinerUClient
|
|
|
+ logger.info(f"Initializing PaddleOCR-VL with backend: {backend}")
|
|
|
+ logger.info(f"Server URL: {server_url}")
|
|
|
+ logger.info(f"Max concurrency: {max_concurrency}")
|
|
|
+
|
|
|
+ # 根据后端类型调整参数
|
|
|
+ if backend == 'http-client':
|
|
|
+ # HTTP客户端模式
|
|
|
+ self.vlm_model = MinerUClient(
|
|
|
+ backend=backend,
|
|
|
+ model_name=self.config['model_name'],
|
|
|
+ server_url=server_url,
|
|
|
+ prompts=prompts,
|
|
|
+ max_concurrency=max_concurrency,
|
|
|
+ http_timeout=http_timeout,
|
|
|
+ use_tqdm=False, # 可根据需要调整
|
|
|
+ )
|
|
|
+ else:
|
|
|
+ raise ValueError(f"Unsupported backend for PaddleOCR-VL: {backend}")
|
|
|
+
|
|
|
+ logger.success(f"✅ PaddleOCR-VL recognizer initialized: {backend}")
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"❌ Failed to initialize PaddleOCR-VL recognizer: {e}")
|
|
|
+ raise
|
|
|
+
|
|
|
+ # 以下方法都继承自 MinerUVLRecognizer,无需重写:
|
|
|
+ # - cleanup()
|
|
|
+ # - _preprocess_image()
|
|
|
+ # - recognize_table()
|
|
|
+ # - recognize_formula()
|
|
|
+ # - recognize_text()
|
|
|
+ # - batch_recognize_table()
|
|
|
+ # - batch_recognize_formula()
|
|
|
+ # - _clean_latex()
|
|
|
+ # - _html_to_markdown()
|
|
|
+ # - _extract_cells_from_html()
|
|
|
+
|
|
|
+
|
|
|
+# 导出适配器类
|
|
|
+__all__ = ['PaddleVLRecognizer']
|