|
|
@@ -46,12 +46,11 @@ vllm serve ${hf_model_path} \
|
|
|
--max-model-len 65536 \
|
|
|
--max-num-batched-tokens 65536 \
|
|
|
--uvicorn-log-level info \
|
|
|
- --limit-mm-per-prompt image=10 \
|
|
|
+ --limit-mm-per-prompt '{"image": 5}' \
|
|
|
--chat-template-content-format string \
|
|
|
--data-parallel-size 2 \
|
|
|
--max-num-seqs 32 \
|
|
|
- --distributed-executor-backend mp \
|
|
|
- --enable-prefix-caching True \
|
|
|
+ --enable-prefix-caching \
|
|
|
--served-model-name ${model_name}
|
|
|
|
|
|
# # run python demo after launch vllm server
|