|
|
@@ -39,9 +39,6 @@ class ModelSingleton:
|
|
|
if backend in ['transformers', 'vllm-engine', "vllm-async-engine"] and not model_path:
|
|
|
model_path = auto_download_and_get_model_root_path("/","vlm")
|
|
|
if backend == "transformers":
|
|
|
- if not model_path:
|
|
|
- raise ValueError("model_path must be provided when model or processor is None.")
|
|
|
-
|
|
|
try:
|
|
|
from transformers import (
|
|
|
AutoProcessor,
|
|
|
@@ -66,13 +63,10 @@ class ModelSingleton:
|
|
|
use_fast=True,
|
|
|
)
|
|
|
elif backend == "vllm-engine":
|
|
|
- if not model_path:
|
|
|
- raise ValueError("model_path must be provided when vllm_llm is None.")
|
|
|
try:
|
|
|
import vllm
|
|
|
except ImportError:
|
|
|
raise ImportError("Please install vllm to use the vllm-engine backend.")
|
|
|
- # logger.debug(kwargs)
|
|
|
if "gpu_memory_utilization" not in kwargs:
|
|
|
kwargs["gpu_memory_utilization"] = 0.5
|
|
|
if "model" not in kwargs:
|
|
|
@@ -80,15 +74,11 @@ class ModelSingleton:
|
|
|
# 使用kwargs为 vllm初始化参数
|
|
|
vllm_llm = vllm.LLM(**kwargs)
|
|
|
elif backend == "vllm-async-engine":
|
|
|
- if not model_path:
|
|
|
- raise ValueError("model_path must be provided when vllm_llm is None.")
|
|
|
try:
|
|
|
from vllm.engine.arg_utils import AsyncEngineArgs
|
|
|
from vllm.v1.engine.async_llm import AsyncLLM
|
|
|
except ImportError:
|
|
|
raise ImportError("Please install vllm to use the vllm-async-engine backend.")
|
|
|
-
|
|
|
- # logger.debug(kwargs)
|
|
|
if "gpu_memory_utilization" not in kwargs:
|
|
|
kwargs["gpu_memory_utilization"] = 0.5
|
|
|
if "model" not in kwargs:
|