|
|
@@ -0,0 +1,82 @@
|
|
|
+#!/bin/bash
|
|
|
+
|
|
|
+LOGDIR="/home/ubuntu/zhch/logs"
|
|
|
+mkdir -p $LOGDIR
|
|
|
+PIDFILE="$LOGDIR/vllm.pid"
|
|
|
+LOGFILE="$LOGDIR/vllm.log"
|
|
|
+
|
|
|
+conda activate py312
|
|
|
+hf_model_path="/home/ubuntu/zhch/dots.ocr/weights/DotsOCR" # Path to your downloaded model weights
|
|
|
+export PYTHONPATH=$(dirname "$hf_model_path"):$PYTHONPATH
|
|
|
+# launch vllm server
|
|
|
+model_name="DotsOCR"
|
|
|
+
|
|
|
+start() {
|
|
|
+ if [ -f $PIDFILE ] && kill -0 $(cat $PIDFILE) 2>/dev/null; then
|
|
|
+ echo "vLLM is already running"
|
|
|
+ return 1
|
|
|
+ fi
|
|
|
+
|
|
|
+ echo "Starting vLLM daemon..."
|
|
|
+ nohup vllm serve $hf_model_path \
|
|
|
+ --host 0.0.0.0 \
|
|
|
+ --port 8101 \
|
|
|
+ --gpu-memory-utilization 0.90 \
|
|
|
+ --max-log-len 1000 \
|
|
|
+ --trust-remote-code \
|
|
|
+ --max-model-len 65536 \
|
|
|
+ --max-num-batched-tokens 65536 \
|
|
|
+ --uvicorn-log-level info \
|
|
|
+ --limit-mm-per-prompt '{"image": 5}' \
|
|
|
+ --chat-template-content-format string \
|
|
|
+ --data-parallel-size 2 \
|
|
|
+ --max-num-seqs 32 \
|
|
|
+ --enable-prefix-caching \
|
|
|
+ --served-model-name ${model_name} \
|
|
|
+ > $LOGFILE 2>&1 &
|
|
|
+
|
|
|
+ echo $! > $PIDFILE
|
|
|
+ echo "vLLM started with PID: $(cat $PIDFILE)"
|
|
|
+}
|
|
|
+
|
|
|
+stop() {
|
|
|
+ if [ ! -f $PIDFILE ]; then
|
|
|
+ echo "vLLM is not running"
|
|
|
+ return 1
|
|
|
+ fi
|
|
|
+
|
|
|
+ PID=$(cat $PIDFILE)
|
|
|
+ echo "Stopping vLLM (PID: $PID)..."
|
|
|
+ kill $PID
|
|
|
+ rm -f $PIDFILE
|
|
|
+ echo "vLLM stopped"
|
|
|
+}
|
|
|
+
|
|
|
+status() {
|
|
|
+ if [ -f $PIDFILE ] && kill -0 $(cat $PIDFILE) 2>/dev/null; then
|
|
|
+ echo "vLLM is running (PID: $(cat $PIDFILE))"
|
|
|
+ else
|
|
|
+ echo "vLLM is not running"
|
|
|
+ fi
|
|
|
+}
|
|
|
+
|
|
|
+case "$1" in
|
|
|
+ start)
|
|
|
+ start
|
|
|
+ ;;
|
|
|
+ stop)
|
|
|
+ stop
|
|
|
+ ;;
|
|
|
+ restart)
|
|
|
+ stop
|
|
|
+ sleep 2
|
|
|
+ start
|
|
|
+ ;;
|
|
|
+ status)
|
|
|
+ status
|
|
|
+ ;;
|
|
|
+ *)
|
|
|
+ echo "Usage: $0 {start|stop|restart|status}"
|
|
|
+ exit 1
|
|
|
+ ;;
|
|
|
+esac
|