#!/bin/bash # filepath: /home/ubuntu/zhch/dots.ocr/zhch/vllm_daemon.sh # DotsOCR vLLM 服务守护进程脚本 LOGDIR="/home/ubuntu/zhch/logs" mkdir -p $LOGDIR PIDFILE="$LOGDIR/vllm.pid" LOGFILE="$LOGDIR/vllm.log" # 配置参数 CONDA_ENV="dots.ocr" PORT="8101" HOST="0.0.0.0" HF_MODEL_PATH="/home/ubuntu/zhch/dots.ocr/weights/DotsOCR" MODEL_NAME="DotsOCR" # GPU 配置 GPU_MEMORY_UTILIZATION="0.70" CUDA_VISIBLE_DEVICES="1,2" DATA_PARALLEL_SIZE="2" # 3个GPU MAX_MODEL_LEN="32768" MAX_NUM_BATCHED_TOKENS="32768" MAX_NUM_SEQS="16" # 正确初始化和激活conda环境 if [ -f "/home/ubuntu/anaconda3/etc/profile.d/conda.sh" ]; then source /home/ubuntu/anaconda3/etc/profile.d/conda.sh conda activate $CONDA_ENV elif [ -f "/opt/conda/etc/profile.d/conda.sh" ]; then source /opt/conda/etc/profile.d/conda.sh conda activate $CONDA_ENV else # 方法2:直接使用conda可执行文件路径 echo "Warning: Using direct conda path activation" export PATH="/home/ubuntu/anaconda3/envs/$CONDA_ENV/bin:$PATH" fi # 设置环境变量 export PYTHONPATH=$(dirname "$HF_MODEL_PATH"):$PYTHONPATH export LD_LIBRARY_PATH="/usr/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH" export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True # 注册 DotsOCR 模型到 vLLM register_model() { echo "🔧 注册 DotsOCR 模型到 vLLM..." vllm_path=$(which vllm) if [ -z "$vllm_path" ]; then echo "❌ vLLM 未找到,请检查安装和环境激活" return 1 fi if ! grep -q "from DotsOCR import modeling_dots_ocr_vllm" "$vllm_path"; then sed -i '/^from vllm\.entrypoints\.cli\.main import main$/a\ from DotsOCR import modeling_dots_ocr_vllm' "$vllm_path" echo "✅ DotsOCR 模型已注册到 vLLM" else echo "✅ DotsOCR 模型已经注册过了" fi } start() { if [ -f $PIDFILE ] && kill -0 $(cat $PIDFILE) 2>/dev/null; then echo "vLLM DotsOCR is already running" return 1 fi echo "Starting vLLM DotsOCR daemon..." echo "Host: $HOST, Port: $PORT" echo "Model path: $HF_MODEL_PATH" echo "GPU memory utilization: $GPU_MEMORY_UTILIZATION" echo "Data parallel size: $DATA_PARALLEL_SIZE" # 检查模型文件是否存在 if [ ! -d "$HF_MODEL_PATH" ]; then echo "❌ Model path not found: $HF_MODEL_PATH" return 1 fi # 检查conda环境 if ! command -v python >/dev/null 2>&1; then echo "❌ Python not found. Check conda environment activation." return 1 fi # 检查vllm命令 if ! command -v vllm >/dev/null 2>&1; then echo "❌ vLLM not found. Check installation and environment." return 1 fi echo "🔧 Using Python: $(which python)" echo "🔧 Using vLLM: $(which vllm)" # 注册模型 register_model if [ $? -ne 0 ]; then echo "❌ Model registration failed" return 1 fi # 显示GPU状态 echo "📊 GPU 状态检查:" if command -v nvidia-smi >/dev/null 2>&1; then nvidia-smi --query-gpu=index,name,memory.used,memory.total --format=csv,noheader,nounits | \ awk -F',' '{printf " GPU %s: %s - 内存: %sMB/%sMB\n", $1, $2, $3, $4}' else echo "⚠️ nvidia-smi not available" fi # 启动vLLM服务 nohup $CUDA_VISIBLE_DEVICES; vllm serve $HF_MODEL_PATH \ --host $HOST \ --port $PORT \ --gpu-memory-utilization $GPU_MEMORY_UTILIZATION \ --max-log-len 1000 \ --trust-remote-code \ --max-model-len $MAX_MODEL_LEN \ --max-num-batched-tokens $MAX_NUM_BATCHED_TOKENS \ --uvicorn-log-level info \ --limit-mm-per-prompt '{"image": 1}' \ --chat-template-content-format string \ --data-parallel-size $DATA_PARALLEL_SIZE \ --max-num-seqs $MAX_NUM_SEQS \ --enable-prefix-caching \ --served-model-name $MODEL_NAME \ > $LOGFILE 2>&1 & echo $! > $PIDFILE echo "✅ vLLM DotsOCR started with PID: $(cat $PIDFILE)" echo "📋 Log file: $LOGFILE" echo "🌐 Service URL: http://$HOST:$PORT" echo "📖 API Documentation: http://localhost:$PORT/docs" } stop() { if [ ! -f $PIDFILE ]; then echo "vLLM DotsOCR is not running" return 1 fi PID=$(cat $PIDFILE) echo "Stopping vLLM DotsOCR (PID: $PID)..." # 优雅停止 kill $PID # 等待进程结束 for i in {1..10}; do if ! kill -0 $PID 2>/dev/null; then break fi echo "Waiting for process to stop... ($i/10)" sleep 1 done # 如果进程仍在运行,强制结束 if kill -0 $PID 2>/dev/null; then echo "Force killing process..." kill -9 $PID fi rm -f $PIDFILE echo "✅ vLLM DotsOCR stopped" } status() { if [ -f $PIDFILE ] && kill -0 $(cat $PIDFILE) 2>/dev/null; then PID=$(cat $PIDFILE) echo "✅ vLLM DotsOCR is running (PID: $PID)" echo "🌐 Service URL: http://$HOST:$PORT" echo "📋 Log file: $LOGFILE" # 检查端口是否被监听 if command -v ss >/dev/null 2>&1; then if ss -tuln | grep -q ":$PORT "; then echo "🔗 Port $PORT is being listened" else echo "⚠️ Port $PORT is not being listened (service may be starting up)" fi elif command -v netstat >/dev/null 2>&1; then if netstat -tuln | grep -q ":$PORT "; then echo "🔗 Port $PORT is being listened" else echo "⚠️ Port $PORT is not being listened (service may be starting up)" fi fi # 检查API响应 if command -v curl >/dev/null 2>&1; then if curl -s --connect-timeout 2 http://127.0.0.1:$PORT/v1/models > /dev/null 2>&1; then echo "🎯 API 响应正常" else echo "⚠️ API 无响应 (service may be starting up)" fi fi # 显示GPU使用情况 if command -v nvidia-smi >/dev/null 2>&1; then echo "📊 GPU 使用情况:" nvidia-smi --query-gpu=index,utilization.gpu,utilization.memory,memory.used,memory.total --format=csv,noheader,nounits | \ awk -F',' '{printf " GPU %s: GPU利用率 %s%%, 内存利用率 %s%%, 显存 %sMB/%sMB\n", $1, $2, $3, $4, $5}' fi # 显示最新日志 if [ -f $LOGFILE ]; then echo "📄 Latest logs (last 3 lines):" tail -3 $LOGFILE | sed 's/^/ /' fi else echo "❌ vLLM DotsOCR is not running" if [ -f $PIDFILE ]; then echo "Removing stale PID file..." rm -f $PIDFILE fi fi } logs() { if [ -f $LOGFILE ]; then echo "📄 vLLM DotsOCR logs:" echo "==================" tail -f $LOGFILE else echo "❌ Log file not found: $LOGFILE" fi } config() { echo "📋 Current configuration:" echo " Conda Environment: $CONDA_ENV" echo " Host: $HOST" echo " Port: $PORT" echo " Model Path: $HF_MODEL_PATH" echo " Model Name: $MODEL_NAME" echo " GPU Memory Utilization: $GPU_MEMORY_UTILIZATION" echo " Data Parallel Size: $DATA_PARALLEL_SIZE" echo " Max Model Length: $MAX_MODEL_LEN" echo " Max Num Seqs: $MAX_NUM_SEQS" echo " PID File: $PIDFILE" echo " Log File: $LOGFILE" if [ -d "$HF_MODEL_PATH" ]; then echo "✅ Model path exists" echo " Model files:" ls -la "$HF_MODEL_PATH" | head -10 | sed 's/^/ /' if [ $(ls -1 "$HF_MODEL_PATH" | wc -l) -gt 10 ]; then echo " ... and more files" fi else echo "❌ Model path not found" fi # 显示环境信息 echo "" echo "🔧 Environment:" echo " Python: $(which python 2>/dev/null || echo 'Not found')" echo " vLLM: $(which vllm 2>/dev/null || echo 'Not found')" echo " Conda: $(which conda 2>/dev/null || echo 'Not found')" echo " CUDA: $(which nvcc 2>/dev/null || echo 'Not found')" # 显示GPU信息 if command -v nvidia-smi >/dev/null 2>&1; then echo "" echo "🔥 GPU Information:" nvidia-smi --query-gpu=index,name,driver_version,memory.total --format=csv,noheader,nounits | \ awk -F',' '{printf " GPU %s: %s (Driver: %s, Memory: %sMB)\n", $1, $2, $3, $4}' fi } test_api() { echo "🧪 Testing vLLM DotsOCR API..." if [ ! -f $PIDFILE ] || ! kill -0 $(cat $PIDFILE) 2>/dev/null; then echo "❌ vLLM service is not running" return 1 fi if ! command -v curl >/dev/null 2>&1; then echo "❌ curl command not found" return 1 fi echo "📡 Testing /v1/models endpoint..." response=$(curl -s --connect-timeout 5 http://127.0.0.1:$PORT/v1/models) if [ $? -eq 0 ]; then echo "✅ Models endpoint accessible" echo "$response" | python -m json.tool 2>/dev/null || echo "$response" else echo "❌ Models endpoint not accessible" fi } # 显示使用帮助 usage() { echo "vLLM DotsOCR Service Daemon" echo "============================" echo "Usage: $0 {start|stop|restart|status|logs|config|test}" echo "" echo "Commands:" echo " start - Start the vLLM DotsOCR service" echo " stop - Stop the vLLM DotsOCR service" echo " restart - Restart the vLLM DotsOCR service" echo " status - Show service status and resource usage" echo " logs - Show service logs (follow mode)" echo " config - Show current configuration" echo " test - Test API endpoints" echo "" echo "Configuration (edit script to modify):" echo " Host: $HOST" echo " Port: $PORT" echo " Model: $HF_MODEL_PATH" echo " GPU Memory: $GPU_MEMORY_UTILIZATION" echo " Parallel Size: $DATA_PARALLEL_SIZE" echo "" echo "Examples:" echo " ./vllm_daemon.sh start" echo " ./vllm_daemon.sh status" echo " ./vllm_daemon.sh logs" echo " ./vllm_daemon.sh test" } case "$1" in start) start ;; stop) stop ;; restart) stop sleep 3 start ;; status) status ;; logs) logs ;; config) config ;; test) test_api ;; *) usage exit 1 ;; esac