|
@@ -17,7 +17,8 @@ MODEL_NAME="DotsOCR"
|
|
|
|
|
|
|
|
# GPU 配置
|
|
# GPU 配置
|
|
|
GPU_MEMORY_UTILIZATION="0.70"
|
|
GPU_MEMORY_UTILIZATION="0.70"
|
|
|
-DATA_PARALLEL_SIZE="3" # 3个GPU
|
|
|
|
|
|
|
+CUDA_VISIBLE_DEVICES="1,2"
|
|
|
|
|
+DATA_PARALLEL_SIZE="2" # 3个GPU
|
|
|
MAX_MODEL_LEN="32768"
|
|
MAX_MODEL_LEN="32768"
|
|
|
MAX_NUM_BATCHED_TOKENS="32768"
|
|
MAX_NUM_BATCHED_TOKENS="32768"
|
|
|
MAX_NUM_SEQS="16"
|
|
MAX_NUM_SEQS="16"
|
|
@@ -108,7 +109,7 @@ start() {
|
|
|
fi
|
|
fi
|
|
|
|
|
|
|
|
# 启动vLLM服务
|
|
# 启动vLLM服务
|
|
|
- nohup vllm serve $HF_MODEL_PATH \
|
|
|
|
|
|
|
+ nohup $CUDA_VISIBLE_DEVICES; vllm serve $HF_MODEL_PATH \
|
|
|
--host $HOST \
|
|
--host $HOST \
|
|
|
--port $PORT \
|
|
--port $PORT \
|
|
|
--gpu-memory-utilization $GPU_MEMORY_UTILIZATION \
|
|
--gpu-memory-utilization $GPU_MEMORY_UTILIZATION \
|
|
@@ -347,4 +348,4 @@ case "$1" in
|
|
|
usage
|
|
usage
|
|
|
exit 1
|
|
exit 1
|
|
|
;;
|
|
;;
|
|
|
-esac
|
|
|
|
|
|
|
+esac
|