|
|
@@ -9,26 +9,38 @@ def main():
|
|
|
|
|
|
has_port_arg = False
|
|
|
has_gpu_memory_utilization_arg = False
|
|
|
- has_model_arg = False
|
|
|
+ model_path = None
|
|
|
+ model_arg_indices = []
|
|
|
|
|
|
+ # 检查现有参数
|
|
|
for i, arg in enumerate(args):
|
|
|
if arg == "--port" or arg.startswith("--port="):
|
|
|
has_port_arg = True
|
|
|
if arg == "--gpu-memory-utilization" or arg.startswith("--gpu-memory-utilization="):
|
|
|
has_gpu_memory_utilization_arg = True
|
|
|
- if arg == "--model" or arg.startswith("--model="):
|
|
|
- has_model_arg = True
|
|
|
-
|
|
|
+ if arg == "--model":
|
|
|
+ if i + 1 < len(args):
|
|
|
+ model_path = args[i + 1]
|
|
|
+ model_arg_indices.extend([i, i + 1])
|
|
|
+ elif arg.startswith("--model="):
|
|
|
+ model_path = arg.split("=", 1)[1]
|
|
|
+ model_arg_indices.append(i)
|
|
|
+
|
|
|
+ # 从参数列表中移除 --model 参数
|
|
|
+ if model_arg_indices:
|
|
|
+ for index in sorted(model_arg_indices, reverse=True):
|
|
|
+ args.pop(index)
|
|
|
+
|
|
|
+ # 添加默认参数
|
|
|
if not has_port_arg:
|
|
|
args.extend(["--port", "30000"])
|
|
|
if not has_gpu_memory_utilization_arg:
|
|
|
args.extend(["--gpu-memory-utilization", "0.5"])
|
|
|
- if not has_model_arg:
|
|
|
- default_path = auto_download_and_get_model_root_path("/", "vlm")
|
|
|
- args.extend([default_path])
|
|
|
+ if not model_path:
|
|
|
+ model_path = auto_download_and_get_model_root_path("/", "vlm")
|
|
|
|
|
|
- # 重新构造sys.argv,以便透传所有参数给vllm
|
|
|
- sys.argv = [sys.argv[0]] + ["serve"] + args
|
|
|
+ # 重构参数,将模型路径作为位置参数
|
|
|
+ sys.argv = [sys.argv[0]] + ["serve", model_path] + args
|
|
|
|
|
|
# 启动vllm服务器
|
|
|
print(f"start vllm server: {sys.argv}")
|