3 tháng trước cách đây · 546ed7a282
--- a/zhch/launch_model_vllm.sh
+++ b/zhch/launch_model_vllm.sh
@@ -46,12 +46,11 @@ vllm serve ${hf_model_path} \
 
				 	--max-model-len 65536 \
			
 
				 	--max-num-batched-tokens 65536 \
			
 
				     --uvicorn-log-level info \
			
 
				-    --limit-mm-per-prompt image=10 \
			
 
				+    --limit-mm-per-prompt '{"image": 5}' \
			
 
				     --chat-template-content-format string \
			
 
				     --data-parallel-size 2 \
			
 
				     --max-num-seqs 32 \
			
 
				-    --distributed-executor-backend mp \
			
 
				-    --enable-prefix-caching True \
			
 
				+    --enable-prefix-caching \
			
 
				     --served-model-name ${model_name}
			
 
				 
			
 
				 # # run python demo after launch vllm server