| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107 |
- import sys
- from pathlib import Path
- from typing import Dict, Any, List, Union, Optional
- import numpy as np
- from PIL import Image
- from loguru import logger
- # 导入基类
- from .mineru_adapter import MinerUVLRecognizer
- # 导入 mineru-vl-utils 的客户端
- try:
- from mineru_vl_utils import MinerUClient
- MINERU_VL_UTILS_AVAILABLE = True
- except ImportError as e:
- logger.warning(f"mineru-vl-utils not available: {e}")
- MINERU_VL_UTILS_AVAILABLE = False
- class PaddleVLRecognizer(MinerUVLRecognizer):
- """
- PaddleOCR-VL识别适配器,继承自MinerUVLRecognizer
-
- 主要差异:
- 1. 强制使用 PaddleOCR-VL-0.9B 模型
- 2. 确保使用 vllm-server 后端
- 3. 复用所有MinerU的预处理/后处理逻辑
- """
-
- def __init__(self, config: Dict[str, Any]):
- # 🔧 强制设置 PaddleOCR-VL 模型名称
- config['model_name'] = 'PaddleOCR-VL-0.9B'
-
- # 🔧 确保使用正确的后端配置
- if config.get('backend') not in ['http-client']:
- logger.error(
- f"Backend '{config.get('backend')}' may not be optimal for PaddleOCR-VL. "
- f"must: 'http-client'"
- )
-
- # 调用父类初始化
- super().__init__(config)
-
- def initialize(self):
- """初始化VL模型 - 使用MinerU的客户端"""
- if not MINERU_VL_UTILS_AVAILABLE:
- raise ImportError("mineru-vl-utils is required for PaddleVLRecognizer")
-
- try:
- backend = self.config.get('backend', 'http-client')
- server_url = self.config.get('server_url')
- model_params = self.config.get('model_params', {})
-
- # 🔧 提取 MinerUClient 所需的参数
- # 从 model_params 中获取,如果没有则使用默认值
- max_concurrency = model_params.get('max_concurrency', 100)
- http_timeout = model_params.get('http_timeout', 600)
-
- # 🔧 PaddleOCR-VL 特定的提示词(可选)
- prompts = model_params.get('prompts', {
- "table": "\nTable Recognition:",
- "equation": "\nFormula Recognition:",
- "[default]": "\nText Recognition:",
- "[layout]": "\nLayout Detection:",
- })
-
- # 🔧 初始化 MinerUClient
- logger.info(f"Initializing PaddleOCR-VL with backend: {backend}")
- logger.info(f"Server URL: {server_url}")
- logger.info(f"Max concurrency: {max_concurrency}")
-
- # 根据后端类型调整参数
- if backend == 'http-client':
- # HTTP客户端模式
- self.vlm_model = MinerUClient(
- backend=backend,
- model_name=self.config['model_name'],
- server_url=server_url,
- prompts=prompts,
- max_concurrency=max_concurrency,
- http_timeout=http_timeout,
- use_tqdm=False, # 可根据需要调整
- )
- else:
- raise ValueError(f"Unsupported backend for PaddleOCR-VL: {backend}")
-
- logger.success(f"✅ PaddleOCR-VL recognizer initialized: {backend}")
-
- except Exception as e:
- logger.error(f"❌ Failed to initialize PaddleOCR-VL recognizer: {e}")
- raise
-
- # 以下方法都继承自 MinerUVLRecognizer,无需重写:
- # - cleanup()
- # - _preprocess_image()
- # - recognize_table()
- # - recognize_formula()
- # - recognize_text()
- # - batch_recognize_table()
- # - batch_recognize_formula()
- # - _clean_latex()
- # - _html_to_markdown()
- # - _extract_cells_from_html()
- # 导出适配器类
- __all__ = ['PaddleVLRecognizer']
|