#!/bin/bash LOGDIR="/home/ubuntu/zhch/logs" mkdir -p $LOGDIR PIDFILE="$LOGDIR/vllm.pid" LOGFILE="$LOGDIR/vllm.log" conda activate py312 hf_model_path="/home/ubuntu/zhch/dots.ocr/weights/DotsOCR" # Path to your downloaded model weights export PYTHONPATH=$(dirname "$hf_model_path"):$PYTHONPATH # launch vllm server model_name="DotsOCR" start() { if [ -f $PIDFILE ] && kill -0 $(cat $PIDFILE) 2>/dev/null; then echo "vLLM is already running" return 1 fi echo "Starting vLLM daemon..." nohup vllm serve $hf_model_path \ --host 0.0.0.0 \ --port 8101 \ --gpu-memory-utilization 0.90 \ --max-log-len 1000 \ --trust-remote-code \ --max-model-len 65536 \ --max-num-batched-tokens 65536 \ --uvicorn-log-level info \ --limit-mm-per-prompt '{"image": 5}' \ --chat-template-content-format string \ --data-parallel-size 3 \ --max-num-seqs 32 \ --enable-prefix-caching \ --served-model-name ${model_name} \ > $LOGFILE 2>&1 & echo $! > $PIDFILE echo "vLLM started with PID: $(cat $PIDFILE)" } stop() { if [ ! -f $PIDFILE ]; then echo "vLLM is not running" return 1 fi PID=$(cat $PIDFILE) echo "Stopping vLLM (PID: $PID)..." kill $PID rm -f $PIDFILE echo "vLLM stopped" } status() { if [ -f $PIDFILE ] && kill -0 $(cat $PIDFILE) 2>/dev/null; then echo "vLLM is running (PID: $(cat $PIDFILE))" else echo "vLLM is not running" fi } case "$1" in start) start ;; stop) stop ;; restart) stop sleep 2 start ;; status) status ;; *) echo "Usage: $0 {start|stop|restart|status}" exit 1 ;; esac