|
|
@@ -87,35 +87,37 @@ class ModelSingleton:
|
|
|
except Exception as e:
|
|
|
logger.warning(f'Error determining VRAM: {e}, using default batch_ratio: 1')
|
|
|
batch_size = 1
|
|
|
- elif backend == "vllm-engine":
|
|
|
- try:
|
|
|
- import vllm
|
|
|
- from mineru_vl_utils import MinerULogitsProcessor
|
|
|
- except ImportError:
|
|
|
- raise ImportError("Please install vllm to use the vllm-engine backend.")
|
|
|
- if "gpu_memory_utilization" not in kwargs:
|
|
|
- kwargs["gpu_memory_utilization"] = 0.5
|
|
|
- if "model" not in kwargs:
|
|
|
- kwargs["model"] = model_path
|
|
|
- if enable_custom_logits_processors() and ("logits_processors" not in kwargs):
|
|
|
- kwargs["logits_processors"] = [MinerULogitsProcessor]
|
|
|
- # 使用kwargs为 vllm初始化参数
|
|
|
- vllm_llm = vllm.LLM(**kwargs)
|
|
|
- elif backend == "vllm-async-engine":
|
|
|
- try:
|
|
|
- from vllm.engine.arg_utils import AsyncEngineArgs
|
|
|
- from vllm.v1.engine.async_llm import AsyncLLM
|
|
|
- from mineru_vl_utils import MinerULogitsProcessor
|
|
|
- except ImportError:
|
|
|
- raise ImportError("Please install vllm to use the vllm-async-engine backend.")
|
|
|
- if "gpu_memory_utilization" not in kwargs:
|
|
|
- kwargs["gpu_memory_utilization"] = 0.5
|
|
|
- if "model" not in kwargs:
|
|
|
- kwargs["model"] = model_path
|
|
|
- if enable_custom_logits_processors() and ("logits_processors" not in kwargs):
|
|
|
- kwargs["logits_processors"] = [MinerULogitsProcessor]
|
|
|
- # 使用kwargs为 vllm初始化参数
|
|
|
- vllm_async_llm = AsyncLLM.from_engine_args(AsyncEngineArgs(**kwargs))
|
|
|
+ else:
|
|
|
+ os.environ["OMP_NUM_THREADS"] = "1"
|
|
|
+ if backend == "vllm-engine":
|
|
|
+ try:
|
|
|
+ import vllm
|
|
|
+ from mineru_vl_utils import MinerULogitsProcessor
|
|
|
+ except ImportError:
|
|
|
+ raise ImportError("Please install vllm to use the vllm-engine backend.")
|
|
|
+ if "gpu_memory_utilization" not in kwargs:
|
|
|
+ kwargs["gpu_memory_utilization"] = 0.5
|
|
|
+ if "model" not in kwargs:
|
|
|
+ kwargs["model"] = model_path
|
|
|
+ if enable_custom_logits_processors() and ("logits_processors" not in kwargs):
|
|
|
+ kwargs["logits_processors"] = [MinerULogitsProcessor]
|
|
|
+ # 使用kwargs为 vllm初始化参数
|
|
|
+ vllm_llm = vllm.LLM(**kwargs)
|
|
|
+ elif backend == "vllm-async-engine":
|
|
|
+ try:
|
|
|
+ from vllm.engine.arg_utils import AsyncEngineArgs
|
|
|
+ from vllm.v1.engine.async_llm import AsyncLLM
|
|
|
+ from mineru_vl_utils import MinerULogitsProcessor
|
|
|
+ except ImportError:
|
|
|
+ raise ImportError("Please install vllm to use the vllm-async-engine backend.")
|
|
|
+ if "gpu_memory_utilization" not in kwargs:
|
|
|
+ kwargs["gpu_memory_utilization"] = 0.5
|
|
|
+ if "model" not in kwargs:
|
|
|
+ kwargs["model"] = model_path
|
|
|
+ if enable_custom_logits_processors() and ("logits_processors" not in kwargs):
|
|
|
+ kwargs["logits_processors"] = [MinerULogitsProcessor]
|
|
|
+ # 使用kwargs为 vllm初始化参数
|
|
|
+ vllm_async_llm = AsyncLLM.from_engine_args(AsyncEngineArgs(**kwargs))
|
|
|
self._models[key] = MinerUClient(
|
|
|
backend=backend,
|
|
|
model=model,
|