#!/bin/bash # filepath: /home/ubuntu/zhch/PaddleX/zhch/paddle_vllm_daemon.sh # PaddleOCR-VL vLLM 服务守护进程脚本 LOGDIR="/home/ubuntu/zhch/logs" mkdir -p $LOGDIR PIDFILE="$LOGDIR/paddleocr_vl_vllm.pid" LOGFILE="$LOGDIR/paddleocr_vl_vllm.log" # 配置参数 CONDA_ENV="paddle" # 根据你的环境调整 PORT="8110" HOST="0.0.0.0" MODEL_NAME="PaddleOCR-VL-0.9B" BACKEND="vllm" # GPU 配置 GPU_MEMORY_UTILIZATION="0.3" CUDA_VISIBLE_DEVICES="3" # 使用3号显卡 MAX_MODEL_LEN="16384" MAX_NUM_BATCHED_TOKENS="8192" MAX_NUM_SEQS="8" # PaddleX 环境变量 export PADDLE_PDX_MODEL_SOURCE="bos" export PYTHONWARNINGS="ignore::UserWarning" # 正确初始化和激活conda环境 if [ -f "/home/ubuntu/anaconda3/etc/profile.d/conda.sh" ]; then source /home/ubuntu/anaconda3/etc/profile.d/conda.sh conda activate $CONDA_ENV elif [ -f "/opt/conda/etc/profile.d/conda.sh" ]; then source /opt/conda/etc/profile.d/conda.sh conda activate $CONDA_ENV else echo "Warning: Using direct conda path activation" export PATH="/home/ubuntu/anaconda3/envs/$CONDA_ENV/bin:$PATH" fi start() { if [ -f $PIDFILE ] && kill -0 $(cat $PIDFILE) 2>/dev/null; then echo "PaddleOCR-VL vLLM is already running" return 1 fi echo "Starting PaddleOCR-VL vLLM daemon..." echo "Host: $HOST, Port: $PORT" echo "Model: $MODEL_NAME, Backend: $BACKEND" echo "GPU memory utilization: $GPU_MEMORY_UTILIZATION" echo "CUDA devices: $CUDA_VISIBLE_DEVICES" # 检查conda环境 if ! command -v python >/dev/null 2>&1; then echo "❌ Python not found. Check conda environment activation." return 1 fi # 检查paddlex_genai_server命令 if ! command -v paddlex_genai_server >/dev/null 2>&1; then echo "❌ paddlex_genai_server not found. Please install vllm-server plugin:" echo " paddlex --install genai-vllm-server" return 1 fi echo "🔧 Using Python: $(which python)" echo "🔧 Using paddlex_genai_server: $(which paddlex_genai_server)" # 显示GPU状态 echo "📊 GPU 状态检查:" if command -v nvidia-smi >/dev/null 2>&1; then nvidia-smi --query-gpu=index,name,memory.used,memory.total --format=csv,noheader,nounits | \ grep "^$CUDA_VISIBLE_DEVICES," | \ awk -F',' '{printf " GPU %s: %s - 内存: %sMB/%sMB\n", $1, $2, $3, $4}' else echo "⚠️ nvidia-smi not available" fi # 启动PaddleOCR-VL vLLM服务 CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES nohup paddlex_genai_server \ --model_name $MODEL_NAME \ --backend $BACKEND \ --host $HOST \ --port $PORT \ --backend_config <(cat < $LOGFILE 2>&1 & echo $! > $PIDFILE echo "✅ PaddleOCR-VL vLLM started with PID: $(cat $PIDFILE)" echo "📋 Log file: $LOGFILE" echo "🌐 Service URL: http://$HOST:$PORT" echo "📖 API Documentation: http://localhost:$PORT/docs" echo "" echo "Waiting for service to start..." sleep 5 status } stop() { if [ ! -f $PIDFILE ]; then echo "PaddleOCR-VL vLLM is not running" return 1 fi PID=$(cat $PIDFILE) echo "Stopping PaddleOCR-VL vLLM (PID: $PID)..." # 优雅停止 kill $PID # 等待进程结束 for i in {1..10}; do if ! kill -0 $PID 2>/dev/null; then break fi echo "Waiting for process to stop... ($i/10)" sleep 1 done # 如果进程仍在运行,强制结束 if kill -0 $PID 2>/dev/null; then echo "Force killing process..." kill -9 $PID fi rm -f $PIDFILE echo "✅ PaddleOCR-VL vLLM stopped" } status() { if [ -f $PIDFILE ] && kill -0 $(cat $PIDFILE) 2>/dev/null; then PID=$(cat $PIDFILE) echo "✅ PaddleOCR-VL vLLM is running (PID: $PID)" echo "🌐 Service URL: http://$HOST:$PORT" echo "📋 Log file: $LOGFILE" # 检查端口是否被监听 if command -v ss >/dev/null 2>&1; then if ss -tuln | grep -q ":$PORT "; then echo "🔗 Port $PORT is being listened" else echo "⚠️ Port $PORT is not being listened (service may be starting up)" fi elif command -v netstat >/dev/null 2>&1; then if netstat -tuln | grep -q ":$PORT "; then echo "🔗 Port $PORT is being listened" else echo "⚠️ Port $PORT is not being listened (service may be starting up)" fi fi # 检查API响应 if command -v curl >/dev/null 2>&1; then if curl -s --connect-timeout 2 http://127.0.0.1:$PORT/v1/models > /dev/null 2>&1; then echo "🎯 API 响应正常" else echo "⚠️ API 无响应 (service may be starting up)" fi fi # 显示GPU使用情况 if command -v nvidia-smi >/dev/null 2>&1; then echo "📊 GPU 使用情况:" nvidia-smi --query-gpu=index,utilization.gpu,utilization.memory,memory.used,memory.total --format=csv,noheader,nounits | \ grep "^$CUDA_VISIBLE_DEVICES," | \ awk -F',' '{printf " GPU %s: GPU利用率 %s%%, 内存利用率 %s%%, 显存 %sMB/%sMB\n", $1, $2, $3, $4, $5}' fi # 显示最新日志 if [ -f $LOGFILE ]; then echo "📄 Latest logs (last 3 lines):" tail -3 $LOGFILE | sed 's/^/ /' fi else echo "❌ PaddleOCR-VL vLLM is not running" if [ -f $PIDFILE ]; then echo "Removing stale PID file..." rm -f $PIDFILE fi fi } logs() { if [ -f $LOGFILE ]; then echo "📄 PaddleOCR-VL vLLM logs:" echo "==================" tail -f $LOGFILE else echo "❌ Log file not found: $LOGFILE" fi } config() { echo "📋 Current configuration:" echo " Conda Environment: $CONDA_ENV" echo " Host: $HOST" echo " Port: $PORT" echo " Model Name: $MODEL_NAME" echo " Backend: $BACKEND" echo " GPU Memory Utilization: $GPU_MEMORY_UTILIZATION" echo " CUDA Visible Devices: $CUDA_VISIBLE_DEVICES" echo " Max Model Length: $MAX_MODEL_LEN" echo " Max Num Seqs: $MAX_NUM_SEQS" echo " PID File: $PIDFILE" echo " Log File: $LOGFILE" echo "" echo " Model Source: ${PADDLE_PDX_MODEL_SOURCE:-default}" # 显示环境信息 echo "" echo "🔧 Environment:" echo " Python: $(which python 2>/dev/null || echo 'Not found')" echo " paddlex_genai_server: $(which paddlex_genai_server 2>/dev/null || echo 'Not found')" echo " Conda: $(which conda 2>/dev/null || echo 'Not found')" echo " CUDA: $(which nvcc 2>/dev/null || echo 'Not found')" # 显示GPU信息 if command -v nvidia-smi >/dev/null 2>&1; then echo "" echo "🔥 GPU Information:" nvidia-smi --query-gpu=index,name,driver_version,memory.total --format=csv,noheader,nounits | \ grep "^$CUDA_VISIBLE_DEVICES," | \ awk -F',' '{printf " GPU %s: %s (Driver: %s, Memory: %sMB)\n", $1, $2, $3, $4}' fi } test_api() { echo "🧪 Testing PaddleOCR-VL vLLM API..." if [ ! -f $PIDFILE ] || ! kill -0 $(cat $PIDFILE) 2>/dev/null; then echo "❌ PaddleOCR-VL vLLM service is not running" return 1 fi if ! command -v curl >/dev/null 2>&1; then echo "❌ curl command not found" return 1 fi echo "📡 Testing /v1/models endpoint..." response=$(curl -s --connect-timeout 5 http://127.0.0.1:$PORT/v1/models) if [ $? -eq 0 ]; then echo "✅ Models endpoint accessible" echo "$response" | python -m json.tool 2>/dev/null || echo "$response" else echo "❌ Models endpoint not accessible" fi echo "" echo "📡 Testing health endpoint..." health_response=$(curl -s --connect-timeout 5 http://127.0.0.1:$PORT/health) if [ $? -eq 0 ]; then echo "✅ Health endpoint accessible" echo "$health_response" else echo "❌ Health endpoint not accessible" fi } test_client() { echo "🧪 Testing PaddleOCR-VL client with vLLM server..." if [ ! -f $PIDFILE ] || ! kill -0 $(cat $PIDFILE) 2>/dev/null; then echo "❌ PaddleOCR-VL vLLM service is not running. Start it first with: $0 start" return 1 fi # 测试用例文件路径 TEST_IMAGE="/home/ubuntu/zhch/data/至远彩色印刷工业有限公司/2023年度报告母公司.img/2023年度报告母公司_page_006.png" TEST_OUTPUT="/tmp/paddleocr_vl_vllm_test_output" PIPELINE_CONFIG="/home/ubuntu/zhch/PaddleX/zhch/my_config/PaddleOCR-VL-Client.yaml" if [ ! -f "$TEST_IMAGE" ]; then echo "⚠️ Test image not found: $TEST_IMAGE" echo "Please provide a test image or update the TEST_IMAGE path in the script" return 1 fi if [ ! -f "$PIPELINE_CONFIG" ]; then echo "⚠️ Pipeline config not found: $PIPELINE_CONFIG" echo "Please update the PIPELINE_CONFIG path in the script" return 1 fi echo "📄 Testing with image: $TEST_IMAGE" echo "⚙️ Using pipeline config: $PIPELINE_CONFIG" echo "📁 Output directory: $TEST_OUTPUT" echo "" # 方法1: 使用 paddlex 命令行 (推荐) echo "🔧 Using paddlex CLI..." mkdir -p "$TEST_OUTPUT" paddlex --pipeline "$PIPELINE_CONFIG" \ --input "$TEST_IMAGE" \ --save_path "$TEST_OUTPUT" \ --use_doc_orientation_classify False \ --use_doc_unwarping False if [ $? -eq 0 ]; then echo "✅ CLI test completed successfully" echo "📁 Results saved to: $TEST_OUTPUT" # 显示生成的文件 if [ -d "$TEST_OUTPUT" ]; then echo "" echo "📂 Generated files:" ls -lh "$TEST_OUTPUT" | tail -n +2 | awk '{print " " $9 " (" $5 ")"}' fi else echo "❌ CLI test failed" return 1 fi } # 显示使用帮助 usage() { echo "PaddleOCR-VL vLLM Service Daemon" echo "=================================" echo "Usage: $0 {start|stop|restart|status|logs|config|test|test-client}" echo "" echo "Commands:" echo " start - Start the PaddleOCR-VL vLLM service" echo " stop - Stop the PaddleOCR-VL vLLM service" echo " restart - Restart the PaddleOCR-VL vLLM service" echo " status - Show service status and resource usage" echo " logs - Show service logs (follow mode)" echo " config - Show current configuration" echo " test - Test API endpoints" echo " test-client - Test PaddleX client with vLLM server" echo "" echo "Configuration (edit script to modify):" echo " Host: $HOST" echo " Port: $PORT" echo " Model: $MODEL_NAME" echo " Backend: $BACKEND" echo " GPU Memory: $GPU_MEMORY_UTILIZATION" echo " CUDA Devices: $CUDA_VISIBLE_DEVICES" echo "" echo "Examples:" echo " ./paddle_vllm_daemon.sh start" echo " ./paddle_vllm_daemon.sh status" echo " ./paddle_vllm_daemon.sh logs" echo " ./paddle_vllm_daemon.sh test" echo " ./paddle_vllm_daemon.sh test-client" } case "$1" in start) start ;; stop) stop ;; restart) stop sleep 3 start ;; status) status ;; logs) logs ;; config) config ;; test) test_api ;; test-client) test_client ;; *) usage exit 1 ;; esac