|
|
@@ -1,76 +1,323 @@
|
|
|
#!/bin/bash
|
|
|
+# filepath: /home/ubuntu/zhch/dots.ocr/zhch/vllm_daemon.sh
|
|
|
+
|
|
|
+# DotsOCR vLLM 服务守护进程脚本
|
|
|
|
|
|
LOGDIR="/home/ubuntu/zhch/logs"
|
|
|
mkdir -p $LOGDIR
|
|
|
PIDFILE="$LOGDIR/vllm.pid"
|
|
|
LOGFILE="$LOGDIR/vllm.log"
|
|
|
|
|
|
-conda activate dots.ocr
|
|
|
-hf_model_path="/home/ubuntu/zhch/dots.ocr/weights/DotsOCR" # Path to your downloaded model weights
|
|
|
-export PYTHONPATH=$(dirname "$hf_model_path"):$PYTHONPATH
|
|
|
+# 配置参数
|
|
|
+CONDA_ENV="dots.ocr"
|
|
|
+PORT="8101"
|
|
|
+HOST="0.0.0.0"
|
|
|
+HF_MODEL_PATH="/home/ubuntu/zhch/dots.ocr/weights/DotsOCR"
|
|
|
+MODEL_NAME="DotsOCR"
|
|
|
+
|
|
|
+# GPU 配置
|
|
|
+GPU_MEMORY_UTILIZATION="0.70"
|
|
|
+DATA_PARALLEL_SIZE="1" # 从3改为1,避免内存不足
|
|
|
+MAX_MODEL_LEN="32768"
|
|
|
+MAX_NUM_BATCHED_TOKENS="32768"
|
|
|
+MAX_NUM_SEQS="16"
|
|
|
+
|
|
|
+# 正确初始化和激活conda环境
|
|
|
+if [ -f "/home/ubuntu/anaconda3/etc/profile.d/conda.sh" ]; then
|
|
|
+ source /home/ubuntu/anaconda3/etc/profile.d/conda.sh
|
|
|
+ conda activate $CONDA_ENV
|
|
|
+elif [ -f "/opt/conda/etc/profile.d/conda.sh" ]; then
|
|
|
+ source /opt/conda/etc/profile.d/conda.sh
|
|
|
+ conda activate $CONDA_ENV
|
|
|
+else
|
|
|
+ # 方法2:直接使用conda可执行文件路径
|
|
|
+ echo "Warning: Using direct conda path activation"
|
|
|
+ export PATH="/home/ubuntu/anaconda3/envs/$CONDA_ENV/bin:$PATH"
|
|
|
+fi
|
|
|
|
|
|
-# launch vllm server
|
|
|
-model_name="DotsOCR"
|
|
|
+# 设置环境变量
|
|
|
+export PYTHONPATH=$(dirname "$HF_MODEL_PATH"):$PYTHONPATH
|
|
|
+export LD_LIBRARY_PATH="/usr/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH"
|
|
|
+export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
|
|
|
+
|
|
|
+# 注册 DotsOCR 模型到 vLLM
|
|
|
+register_model() {
|
|
|
+ echo "🔧 注册 DotsOCR 模型到 vLLM..."
|
|
|
+ vllm_path=$(which vllm)
|
|
|
+ if [ -z "$vllm_path" ]; then
|
|
|
+ echo "❌ vLLM 未找到,请检查安装和环境激活"
|
|
|
+ return 1
|
|
|
+ fi
|
|
|
+
|
|
|
+ if ! grep -q "from DotsOCR import modeling_dots_ocr_vllm" "$vllm_path"; then
|
|
|
+ sed -i '/^from vllm\.entrypoints\.cli\.main import main$/a\
|
|
|
+from DotsOCR import modeling_dots_ocr_vllm' "$vllm_path"
|
|
|
+ echo "✅ DotsOCR 模型已注册到 vLLM"
|
|
|
+ else
|
|
|
+ echo "✅ DotsOCR 模型已经注册过了"
|
|
|
+ fi
|
|
|
+}
|
|
|
|
|
|
start() {
|
|
|
if [ -f $PIDFILE ] && kill -0 $(cat $PIDFILE) 2>/dev/null; then
|
|
|
- echo "vLLM is already running"
|
|
|
+ echo "vLLM DotsOCR is already running"
|
|
|
return 1
|
|
|
fi
|
|
|
|
|
|
- echo "Starting vLLM daemon..."
|
|
|
- nohup vllm serve $hf_model_path \
|
|
|
- --host 0.0.0.0 \
|
|
|
- --port 8101 \
|
|
|
- --gpu-memory-utilization 0.70 \
|
|
|
- --max-log-len 1000 \
|
|
|
- --trust-remote-code \
|
|
|
- --max-model-len 32768 \
|
|
|
- --max-num-batched-tokens 32768 \
|
|
|
- --uvicorn-log-level info \
|
|
|
- --limit-mm-per-prompt '{"image": 1}' \
|
|
|
- --chat-template-content-format string \
|
|
|
- --data-parallel-size 3 \
|
|
|
- --max-num-seqs 16 \
|
|
|
- --enable-prefix-caching \
|
|
|
- --served-model-name ${model_name} \
|
|
|
- > $LOGFILE 2>&1 &
|
|
|
+ echo "Starting vLLM DotsOCR daemon..."
|
|
|
+ echo "Host: $HOST, Port: $PORT"
|
|
|
+ echo "Model path: $HF_MODEL_PATH"
|
|
|
+ echo "GPU memory utilization: $GPU_MEMORY_UTILIZATION"
|
|
|
+ echo "Data parallel size: $DATA_PARALLEL_SIZE"
|
|
|
+
|
|
|
+ # 检查模型文件是否存在
|
|
|
+ if [ ! -d "$HF_MODEL_PATH" ]; then
|
|
|
+ echo "❌ Model path not found: $HF_MODEL_PATH"
|
|
|
+ return 1
|
|
|
+ fi
|
|
|
+
|
|
|
+ # 检查conda环境
|
|
|
+ if ! command -v python >/dev/null 2>&1; then
|
|
|
+ echo "❌ Python not found. Check conda environment activation."
|
|
|
+ return 1
|
|
|
+ fi
|
|
|
+
|
|
|
+ # 检查vllm命令
|
|
|
+ if ! command -v vllm >/dev/null 2>&1; then
|
|
|
+ echo "❌ vLLM not found. Check installation and environment."
|
|
|
+ return 1
|
|
|
+ fi
|
|
|
+
|
|
|
+ echo "🔧 Using Python: $(which python)"
|
|
|
+ echo "🔧 Using vLLM: $(which vllm)"
|
|
|
+
|
|
|
+ # 注册模型
|
|
|
+ register_model
|
|
|
+ if [ $? -ne 0 ]; then
|
|
|
+ echo "❌ Model registration failed"
|
|
|
+ return 1
|
|
|
+ fi
|
|
|
+
|
|
|
+ # 显示GPU状态
|
|
|
+ echo "📊 GPU 状态检查:"
|
|
|
+ if command -v nvidia-smi >/dev/null 2>&1; then
|
|
|
+ nvidia-smi --query-gpu=index,name,memory.used,memory.total --format=csv,noheader,nounits | \
|
|
|
+ awk -F',' '{printf " GPU %s: %s - 内存: %sMB/%sMB\n", $1, $2, $3, $4}'
|
|
|
+ else
|
|
|
+ echo "⚠️ nvidia-smi not available"
|
|
|
+ fi
|
|
|
+
|
|
|
+ # 启动vLLM服务
|
|
|
+ nohup vllm serve $HF_MODEL_PATH \
|
|
|
+ --host $HOST \
|
|
|
+ --port $PORT \
|
|
|
+ --gpu-memory-utilization $GPU_MEMORY_UTILIZATION \
|
|
|
+ --max-log-len 1000 \
|
|
|
+ --trust-remote-code \
|
|
|
+ --max-model-len $MAX_MODEL_LEN \
|
|
|
+ --max-num-batched-tokens $MAX_NUM_BATCHED_TOKENS \
|
|
|
+ --uvicorn-log-level info \
|
|
|
+ --limit-mm-per-prompt '{"image": 1}' \
|
|
|
+ --chat-template-content-format string \
|
|
|
+ --data-parallel-size $DATA_PARALLEL_SIZE \
|
|
|
+ --max-num-seqs $MAX_NUM_SEQS \
|
|
|
+ --enable-prefix-caching \
|
|
|
+ --served-model-name $MODEL_NAME \
|
|
|
+ > $LOGFILE 2>&1 &
|
|
|
|
|
|
echo $! > $PIDFILE
|
|
|
- echo "vLLM started with PID: $(cat $PIDFILE)"
|
|
|
+ echo "✅ vLLM DotsOCR started with PID: $(cat $PIDFILE)"
|
|
|
+ echo "📋 Log file: $LOGFILE"
|
|
|
+ echo "🌐 Service URL: http://$HOST:$PORT"
|
|
|
+ echo "📖 API Documentation: http://localhost:$PORT/docs"
|
|
|
}
|
|
|
|
|
|
stop() {
|
|
|
if [ ! -f $PIDFILE ]; then
|
|
|
- echo "vLLM is not running"
|
|
|
+ echo "vLLM DotsOCR is not running"
|
|
|
return 1
|
|
|
fi
|
|
|
|
|
|
PID=$(cat $PIDFILE)
|
|
|
- echo "Stopping vLLM (PID: $PID)..."
|
|
|
+ echo "Stopping vLLM DotsOCR (PID: $PID)..."
|
|
|
+
|
|
|
+ # 优雅停止
|
|
|
kill $PID
|
|
|
+
|
|
|
+ # 等待进程结束
|
|
|
+ for i in {1..10}; do
|
|
|
+ if ! kill -0 $PID 2>/dev/null; then
|
|
|
+ break
|
|
|
+ fi
|
|
|
+ echo "Waiting for process to stop... ($i/10)"
|
|
|
+ sleep 1
|
|
|
+ done
|
|
|
+
|
|
|
+ # 如果进程仍在运行,强制结束
|
|
|
+ if kill -0 $PID 2>/dev/null; then
|
|
|
+ echo "Force killing process..."
|
|
|
+ kill -9 $PID
|
|
|
+ fi
|
|
|
+
|
|
|
rm -f $PIDFILE
|
|
|
- echo "vLLM stopped"
|
|
|
+ echo "✅ vLLM DotsOCR stopped"
|
|
|
}
|
|
|
|
|
|
status() {
|
|
|
if [ -f $PIDFILE ] && kill -0 $(cat $PIDFILE) 2>/dev/null; then
|
|
|
- echo "vLLM is running (PID: $(cat $PIDFILE))"
|
|
|
+ PID=$(cat $PIDFILE)
|
|
|
+ echo "✅ vLLM DotsOCR is running (PID: $PID)"
|
|
|
+ echo "🌐 Service URL: http://$HOST:$PORT"
|
|
|
+ echo "📋 Log file: $LOGFILE"
|
|
|
+
|
|
|
+ # 检查端口是否被监听
|
|
|
+ if command -v ss >/dev/null 2>&1; then
|
|
|
+ if ss -tuln | grep -q ":$PORT "; then
|
|
|
+ echo "🔗 Port $PORT is being listened"
|
|
|
+ else
|
|
|
+ echo "⚠️ Port $PORT is not being listened (service may be starting up)"
|
|
|
+ fi
|
|
|
+ elif command -v netstat >/dev/null 2>&1; then
|
|
|
+ if netstat -tuln | grep -q ":$PORT "; then
|
|
|
+ echo "🔗 Port $PORT is being listened"
|
|
|
+ else
|
|
|
+ echo "⚠️ Port $PORT is not being listened (service may be starting up)"
|
|
|
+ fi
|
|
|
+ fi
|
|
|
+
|
|
|
+ # 检查API响应
|
|
|
+ if command -v curl >/dev/null 2>&1; then
|
|
|
+ if curl -s --connect-timeout 2 http://127.0.0.1:$PORT/v1/models > /dev/null 2>&1; then
|
|
|
+ echo "🎯 API 响应正常"
|
|
|
+ else
|
|
|
+ echo "⚠️ API 无响应 (service may be starting up)"
|
|
|
+ fi
|
|
|
+ fi
|
|
|
+
|
|
|
+ # 显示GPU使用情况
|
|
|
+ if command -v nvidia-smi >/dev/null 2>&1; then
|
|
|
+ echo "📊 GPU 使用情况:"
|
|
|
+ nvidia-smi --query-gpu=index,utilization.gpu,utilization.memory,memory.used,memory.total --format=csv,noheader,nounits | \
|
|
|
+ awk -F',' '{printf " GPU %s: GPU利用率 %s%%, 内存利用率 %s%%, 显存 %sMB/%sMB\n", $1, $2, $3, $4, $5}'
|
|
|
+ fi
|
|
|
+
|
|
|
+ # 显示最新日志
|
|
|
+ if [ -f $LOGFILE ]; then
|
|
|
+ echo "📄 Latest logs (last 3 lines):"
|
|
|
+ tail -3 $LOGFILE | sed 's/^/ /'
|
|
|
+ fi
|
|
|
else
|
|
|
- echo "vLLM is not running"
|
|
|
+ echo "❌ vLLM DotsOCR is not running"
|
|
|
+ if [ -f $PIDFILE ]; then
|
|
|
+ echo "Removing stale PID file..."
|
|
|
+ rm -f $PIDFILE
|
|
|
+ fi
|
|
|
fi
|
|
|
}
|
|
|
|
|
|
-# 修复后的 launch_model_vllm.sh
|
|
|
-echo "🔧 注册 DotsOCR 模型到 vLLM..."
|
|
|
-vllm_path=$(which vllm)
|
|
|
-if ! grep -q "from DotsOCR import modeling_dots_ocr_vllm" "$vllm_path"; then
|
|
|
- sed -i '/^from vllm\.entrypoints\.cli\.main import main$/a\
|
|
|
-from DotsOCR import modeling_dots_ocr_vllm' "$vllm_path"
|
|
|
- echo "✅ DotsOCR 模型已注册到 vLLM"
|
|
|
-else
|
|
|
- echo "✅ DotsOCR 模型已经注册过了"
|
|
|
-fi
|
|
|
+logs() {
|
|
|
+ if [ -f $LOGFILE ]; then
|
|
|
+ echo "📄 vLLM DotsOCR logs:"
|
|
|
+ echo "=================="
|
|
|
+ tail -f $LOGFILE
|
|
|
+ else
|
|
|
+ echo "❌ Log file not found: $LOGFILE"
|
|
|
+ fi
|
|
|
+}
|
|
|
+
|
|
|
+config() {
|
|
|
+ echo "📋 Current configuration:"
|
|
|
+ echo " Conda Environment: $CONDA_ENV"
|
|
|
+ echo " Host: $HOST"
|
|
|
+ echo " Port: $PORT"
|
|
|
+ echo " Model Path: $HF_MODEL_PATH"
|
|
|
+ echo " Model Name: $MODEL_NAME"
|
|
|
+ echo " GPU Memory Utilization: $GPU_MEMORY_UTILIZATION"
|
|
|
+ echo " Data Parallel Size: $DATA_PARALLEL_SIZE"
|
|
|
+ echo " Max Model Length: $MAX_MODEL_LEN"
|
|
|
+ echo " Max Num Seqs: $MAX_NUM_SEQS"
|
|
|
+ echo " PID File: $PIDFILE"
|
|
|
+ echo " Log File: $LOGFILE"
|
|
|
+
|
|
|
+ if [ -d "$HF_MODEL_PATH" ]; then
|
|
|
+ echo "✅ Model path exists"
|
|
|
+ echo " Model files:"
|
|
|
+ ls -la "$HF_MODEL_PATH" | head -10 | sed 's/^/ /'
|
|
|
+ if [ $(ls -1 "$HF_MODEL_PATH" | wc -l) -gt 10 ]; then
|
|
|
+ echo " ... and more files"
|
|
|
+ fi
|
|
|
+ else
|
|
|
+ echo "❌ Model path not found"
|
|
|
+ fi
|
|
|
+
|
|
|
+ # 显示环境信息
|
|
|
+ echo ""
|
|
|
+ echo "🔧 Environment:"
|
|
|
+ echo " Python: $(which python 2>/dev/null || echo 'Not found')"
|
|
|
+ echo " vLLM: $(which vllm 2>/dev/null || echo 'Not found')"
|
|
|
+ echo " Conda: $(which conda 2>/dev/null || echo 'Not found')"
|
|
|
+ echo " CUDA: $(which nvcc 2>/dev/null || echo 'Not found')"
|
|
|
+
|
|
|
+ # 显示GPU信息
|
|
|
+ if command -v nvidia-smi >/dev/null 2>&1; then
|
|
|
+ echo ""
|
|
|
+ echo "🔥 GPU Information:"
|
|
|
+ nvidia-smi --query-gpu=index,name,driver_version,memory.total --format=csv,noheader,nounits | \
|
|
|
+ awk -F',' '{printf " GPU %s: %s (Driver: %s, Memory: %sMB)\n", $1, $2, $3, $4}'
|
|
|
+ fi
|
|
|
+}
|
|
|
+
|
|
|
+test_api() {
|
|
|
+ echo "🧪 Testing vLLM DotsOCR API..."
|
|
|
+
|
|
|
+ if [ ! -f $PIDFILE ] || ! kill -0 $(cat $PIDFILE) 2>/dev/null; then
|
|
|
+ echo "❌ vLLM service is not running"
|
|
|
+ return 1
|
|
|
+ fi
|
|
|
+
|
|
|
+ if ! command -v curl >/dev/null 2>&1; then
|
|
|
+ echo "❌ curl command not found"
|
|
|
+ return 1
|
|
|
+ fi
|
|
|
+
|
|
|
+ echo "📡 Testing /v1/models endpoint..."
|
|
|
+ response=$(curl -s --connect-timeout 5 http://127.0.0.1:$PORT/v1/models)
|
|
|
+ if [ $? -eq 0 ]; then
|
|
|
+ echo "✅ Models endpoint accessible"
|
|
|
+ echo "$response" | python -m json.tool 2>/dev/null || echo "$response"
|
|
|
+ else
|
|
|
+ echo "❌ Models endpoint not accessible"
|
|
|
+ fi
|
|
|
+}
|
|
|
+
|
|
|
+# 显示使用帮助
|
|
|
+usage() {
|
|
|
+ echo "vLLM DotsOCR Service Daemon"
|
|
|
+ echo "============================"
|
|
|
+ echo "Usage: $0 {start|stop|restart|status|logs|config|test}"
|
|
|
+ echo ""
|
|
|
+ echo "Commands:"
|
|
|
+ echo " start - Start the vLLM DotsOCR service"
|
|
|
+ echo " stop - Stop the vLLM DotsOCR service"
|
|
|
+ echo " restart - Restart the vLLM DotsOCR service"
|
|
|
+ echo " status - Show service status and resource usage"
|
|
|
+ echo " logs - Show service logs (follow mode)"
|
|
|
+ echo " config - Show current configuration"
|
|
|
+ echo " test - Test API endpoints"
|
|
|
+ echo ""
|
|
|
+ echo "Configuration (edit script to modify):"
|
|
|
+ echo " Host: $HOST"
|
|
|
+ echo " Port: $PORT"
|
|
|
+ echo " Model: $HF_MODEL_PATH"
|
|
|
+ echo " GPU Memory: $GPU_MEMORY_UTILIZATION"
|
|
|
+ echo " Parallel Size: $DATA_PARALLEL_SIZE"
|
|
|
+ echo ""
|
|
|
+ echo "Examples:"
|
|
|
+ echo " ./vllm_daemon.sh start"
|
|
|
+ echo " ./vllm_daemon.sh status"
|
|
|
+ echo " ./vllm_daemon.sh logs"
|
|
|
+ echo " ./vllm_daemon.sh test"
|
|
|
+}
|
|
|
|
|
|
case "$1" in
|
|
|
start)
|
|
|
@@ -81,14 +328,23 @@ case "$1" in
|
|
|
;;
|
|
|
restart)
|
|
|
stop
|
|
|
- sleep 2
|
|
|
+ sleep 3
|
|
|
start
|
|
|
;;
|
|
|
status)
|
|
|
status
|
|
|
;;
|
|
|
+ logs)
|
|
|
+ logs
|
|
|
+ ;;
|
|
|
+ config)
|
|
|
+ config
|
|
|
+ ;;
|
|
|
+ test)
|
|
|
+ test_api
|
|
|
+ ;;
|
|
|
*)
|
|
|
- echo "Usage: $0 {start|stop|restart|status}"
|
|
|
+ usage
|
|
|
exit 1
|
|
|
;;
|
|
|
esac
|