| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182 |
- #!/bin/bash
- LOGDIR="/home/ubuntu/zhch/logs"
- mkdir -p $LOGDIR
- PIDFILE="$LOGDIR/vllm.pid"
- LOGFILE="$LOGDIR/vllm.log"
- conda activate py312
- hf_model_path="/home/ubuntu/zhch/dots.ocr/weights/DotsOCR" # Path to your downloaded model weights
- export PYTHONPATH=$(dirname "$hf_model_path"):$PYTHONPATH
- # launch vllm server
- model_name="DotsOCR"
- start() {
- if [ -f $PIDFILE ] && kill -0 $(cat $PIDFILE) 2>/dev/null; then
- echo "vLLM is already running"
- return 1
- fi
-
- echo "Starting vLLM daemon..."
- nohup vllm serve $hf_model_path \
- --host 0.0.0.0 \
- --port 8101 \
- --gpu-memory-utilization 0.90 \
- --max-log-len 1000 \
- --trust-remote-code \
- --max-model-len 65536 \
- --max-num-batched-tokens 65536 \
- --uvicorn-log-level info \
- --limit-mm-per-prompt '{"image": 5}' \
- --chat-template-content-format string \
- --data-parallel-size 3 \
- --max-num-seqs 32 \
- --enable-prefix-caching \
- --served-model-name ${model_name} \
- > $LOGFILE 2>&1 &
-
- echo $! > $PIDFILE
- echo "vLLM started with PID: $(cat $PIDFILE)"
- }
- stop() {
- if [ ! -f $PIDFILE ]; then
- echo "vLLM is not running"
- return 1
- fi
-
- PID=$(cat $PIDFILE)
- echo "Stopping vLLM (PID: $PID)..."
- kill $PID
- rm -f $PIDFILE
- echo "vLLM stopped"
- }
- status() {
- if [ -f $PIDFILE ] && kill -0 $(cat $PIDFILE) 2>/dev/null; then
- echo "vLLM is running (PID: $(cat $PIDFILE))"
- else
- echo "vLLM is not running"
- fi
- }
- case "$1" in
- start)
- start
- ;;
- stop)
- stop
- ;;
- restart)
- stop
- sleep 2
- start
- ;;
- status)
- status
- ;;
- *)
- echo "Usage: $0 {start|stop|restart|status}"
- exit 1
- ;;
- esac
|