|
|
@@ -22,16 +22,16 @@ start() {
|
|
|
nohup vllm serve $hf_model_path \
|
|
|
--host 0.0.0.0 \
|
|
|
--port 8101 \
|
|
|
- --gpu-memory-utilization 0.85 \
|
|
|
+ --gpu-memory-utilization 0.70 \
|
|
|
--max-log-len 1000 \
|
|
|
--trust-remote-code \
|
|
|
- --max-model-len 65536 \
|
|
|
- --max-num-batched-tokens 65536 \
|
|
|
+ --max-model-len 32768 \
|
|
|
+ --max-num-batched-tokens 32768 \
|
|
|
--uvicorn-log-level info \
|
|
|
- --limit-mm-per-prompt '{"image": 3}' \
|
|
|
+ --limit-mm-per-prompt '{"image": 1}' \
|
|
|
--chat-template-content-format string \
|
|
|
--data-parallel-size 3 \
|
|
|
- --max-num-seqs 32 \
|
|
|
+ --max-num-seqs 16 \
|
|
|
--enable-prefix-caching \
|
|
|
--served-model-name ${model_name} \
|
|
|
> $LOGFILE 2>&1 &
|