浏览代码

feat: 添加PaddleOCR-VL vLLM服务守护进程脚本,支持启动、停止、状态检查和日志查看功能

zhch158_admin 1 月之前
父节点
当前提交
f29c59f8fd
共有 1 个文件被更改,包括 382 次插入0 次删除
  1. 382 0
      zhch/paddle_vllm_daemon.sh

+ 382 - 0
zhch/paddle_vllm_daemon.sh

@@ -0,0 +1,382 @@
+#!/bin/bash
+# filepath: /home/ubuntu/zhch/PaddleX/zhch/paddle_vllm_daemon.sh
+
+# PaddleOCR-VL vLLM 服务守护进程脚本
+
+LOGDIR="/home/ubuntu/zhch/logs"
+mkdir -p $LOGDIR
+PIDFILE="$LOGDIR/paddleocr_vl_vllm.pid"
+LOGFILE="$LOGDIR/paddleocr_vl_vllm.log"
+
+# 配置参数
+CONDA_ENV="paddle"  # 根据你的环境调整
+PORT="8110"
+HOST="0.0.0.0"
+MODEL_NAME="PaddleOCR-VL-0.9B"
+BACKEND="vllm"
+
+# GPU 配置
+GPU_MEMORY_UTILIZATION="0.3"
+CUDA_VISIBLE_DEVICES="3"  # 使用3号显卡
+MAX_MODEL_LEN="16384"
+MAX_NUM_BATCHED_TOKENS="8192"
+MAX_NUM_SEQS="8"
+
+# PaddleX 环境变量
+export PADDLE_PDX_MODEL_SOURCE="bos"
+export PYTHONWARNINGS="ignore::UserWarning"
+
+# 正确初始化和激活conda环境
+if [ -f "/home/ubuntu/anaconda3/etc/profile.d/conda.sh" ]; then
+    source /home/ubuntu/anaconda3/etc/profile.d/conda.sh
+    conda activate $CONDA_ENV
+elif [ -f "/opt/conda/etc/profile.d/conda.sh" ]; then
+    source /opt/conda/etc/profile.d/conda.sh
+    conda activate $CONDA_ENV
+else
+    echo "Warning: Using direct conda path activation"
+    export PATH="/home/ubuntu/anaconda3/envs/$CONDA_ENV/bin:$PATH"
+fi
+
+start() {
+    if [ -f $PIDFILE ] && kill -0 $(cat $PIDFILE) 2>/dev/null; then
+        echo "PaddleOCR-VL vLLM is already running"
+        return 1
+    fi
+    
+    echo "Starting PaddleOCR-VL vLLM daemon..."
+    echo "Host: $HOST, Port: $PORT"
+    echo "Model: $MODEL_NAME, Backend: $BACKEND"
+    echo "GPU memory utilization: $GPU_MEMORY_UTILIZATION"
+    echo "CUDA devices: $CUDA_VISIBLE_DEVICES"
+    
+    # 检查conda环境
+    if ! command -v python >/dev/null 2>&1; then
+        echo "❌ Python not found. Check conda environment activation."
+        return 1
+    fi
+    
+    # 检查paddlex_genai_server命令
+    if ! command -v paddlex_genai_server >/dev/null 2>&1; then
+        echo "❌ paddlex_genai_server not found. Please install vllm-server plugin:"
+        echo "   paddlex --install genai-vllm-server"
+        return 1
+    fi
+    
+    echo "🔧 Using Python: $(which python)"
+    echo "🔧 Using paddlex_genai_server: $(which paddlex_genai_server)"
+    
+    # 显示GPU状态
+    echo "📊 GPU 状态检查:"
+    if command -v nvidia-smi >/dev/null 2>&1; then
+        nvidia-smi --query-gpu=index,name,memory.used,memory.total --format=csv,noheader,nounits | \
+        grep "^$CUDA_VISIBLE_DEVICES," | \
+        awk -F',' '{printf "  GPU %s: %s - 内存: %sMB/%sMB\n", $1, $2, $3, $4}'
+    else
+        echo "⚠️  nvidia-smi not available"
+    fi
+    
+    # 启动PaddleOCR-VL vLLM服务
+    CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES nohup paddlex_genai_server \
+        --model_name $MODEL_NAME \
+        --backend $BACKEND \
+        --host $HOST \
+        --port $PORT \
+        --backend_config <(cat <<EOF
+gpu-memory-utilization: $GPU_MEMORY_UTILIZATION
+EOF
+) > $LOGFILE 2>&1 &
+    
+    echo $! > $PIDFILE
+    echo "✅ PaddleOCR-VL vLLM started with PID: $(cat $PIDFILE)"
+    echo "📋 Log file: $LOGFILE"
+    echo "🌐 Service URL: http://$HOST:$PORT"
+    echo "📖 API Documentation: http://localhost:$PORT/docs"
+    echo ""
+    echo "Waiting for service to start..."
+    sleep 5
+    status
+}
+
+stop() {
+    if [ ! -f $PIDFILE ]; then
+        echo "PaddleOCR-VL vLLM is not running"
+        return 1
+    fi
+    
+    PID=$(cat $PIDFILE)
+    echo "Stopping PaddleOCR-VL vLLM (PID: $PID)..."
+    
+    # 优雅停止
+    kill $PID
+    
+    # 等待进程结束
+    for i in {1..10}; do
+        if ! kill -0 $PID 2>/dev/null; then
+            break
+        fi
+        echo "Waiting for process to stop... ($i/10)"
+        sleep 1
+    done
+    
+    # 如果进程仍在运行,强制结束
+    if kill -0 $PID 2>/dev/null; then
+        echo "Force killing process..."
+        kill -9 $PID
+    fi
+    
+    rm -f $PIDFILE
+    echo "✅ PaddleOCR-VL vLLM stopped"
+}
+
+status() {
+    if [ -f $PIDFILE ] && kill -0 $(cat $PIDFILE) 2>/dev/null; then
+        PID=$(cat $PIDFILE)
+        echo "✅ PaddleOCR-VL vLLM is running (PID: $PID)"
+        echo "🌐 Service URL: http://$HOST:$PORT"
+        echo "📋 Log file: $LOGFILE"
+        
+        # 检查端口是否被监听
+        if command -v ss >/dev/null 2>&1; then
+            if ss -tuln | grep -q ":$PORT "; then
+                echo "🔗 Port $PORT is being listened"
+            else
+                echo "⚠️  Port $PORT is not being listened (service may be starting up)"
+            fi
+        elif command -v netstat >/dev/null 2>&1; then
+            if netstat -tuln | grep -q ":$PORT "; then
+                echo "🔗 Port $PORT is being listened"
+            else
+                echo "⚠️  Port $PORT is not being listened (service may be starting up)"
+            fi
+        fi
+        
+        # 检查API响应
+        if command -v curl >/dev/null 2>&1; then
+            if curl -s --connect-timeout 2 http://127.0.0.1:$PORT/v1/models > /dev/null 2>&1; then
+                echo "🎯 API 响应正常"
+            else
+                echo "⚠️  API 无响应 (service may be starting up)"
+            fi
+        fi
+        
+        # 显示GPU使用情况
+        if command -v nvidia-smi >/dev/null 2>&1; then
+            echo "📊 GPU 使用情况:"
+            nvidia-smi --query-gpu=index,utilization.gpu,utilization.memory,memory.used,memory.total --format=csv,noheader,nounits | \
+            grep "^$CUDA_VISIBLE_DEVICES," | \
+            awk -F',' '{printf "  GPU %s: GPU利用率 %s%%, 内存利用率 %s%%, 显存 %sMB/%sMB\n", $1, $2, $3, $4, $5}'
+        fi
+        
+        # 显示最新日志
+        if [ -f $LOGFILE ]; then
+            echo "📄 Latest logs (last 3 lines):"
+            tail -3 $LOGFILE | sed 's/^/  /'
+        fi
+    else
+        echo "❌ PaddleOCR-VL vLLM is not running"
+        if [ -f $PIDFILE ]; then
+            echo "Removing stale PID file..."
+            rm -f $PIDFILE
+        fi
+    fi
+}
+
+logs() {
+    if [ -f $LOGFILE ]; then
+        echo "📄 PaddleOCR-VL vLLM logs:"
+        echo "=================="
+        tail -f $LOGFILE
+    else
+        echo "❌ Log file not found: $LOGFILE"
+    fi
+}
+
+config() {
+    echo "📋 Current configuration:"
+    echo "  Conda Environment: $CONDA_ENV"
+    echo "  Host: $HOST"
+    echo "  Port: $PORT"
+    echo "  Model Name: $MODEL_NAME"
+    echo "  Backend: $BACKEND"
+    echo "  GPU Memory Utilization: $GPU_MEMORY_UTILIZATION"
+    echo "  CUDA Visible Devices: $CUDA_VISIBLE_DEVICES"
+    echo "  Max Model Length: $MAX_MODEL_LEN"
+    echo "  Max Num Seqs: $MAX_NUM_SEQS"
+    echo "  PID File: $PIDFILE"
+    echo "  Log File: $LOGFILE"
+    echo ""
+    echo "  Model Source: ${PADDLE_PDX_MODEL_SOURCE:-default}"
+    
+    # 显示环境信息
+    echo ""
+    echo "🔧 Environment:"
+    echo "  Python: $(which python 2>/dev/null || echo 'Not found')"
+    echo "  paddlex_genai_server: $(which paddlex_genai_server 2>/dev/null || echo 'Not found')"
+    echo "  Conda: $(which conda 2>/dev/null || echo 'Not found')"
+    echo "  CUDA: $(which nvcc 2>/dev/null || echo 'Not found')"
+    
+    # 显示GPU信息
+    if command -v nvidia-smi >/dev/null 2>&1; then
+        echo ""
+        echo "🔥 GPU Information:"
+        nvidia-smi --query-gpu=index,name,driver_version,memory.total --format=csv,noheader,nounits | \
+        grep "^$CUDA_VISIBLE_DEVICES," | \
+        awk -F',' '{printf "  GPU %s: %s (Driver: %s, Memory: %sMB)\n", $1, $2, $3, $4}'
+    fi
+}
+
+test_api() {
+    echo "🧪 Testing PaddleOCR-VL vLLM API..."
+    
+    if [ ! -f $PIDFILE ] || ! kill -0 $(cat $PIDFILE) 2>/dev/null; then
+        echo "❌ PaddleOCR-VL vLLM service is not running"
+        return 1
+    fi
+    
+    if ! command -v curl >/dev/null 2>&1; then
+        echo "❌ curl command not found"
+        return 1
+    fi
+    
+    echo "📡 Testing /v1/models endpoint..."
+    response=$(curl -s --connect-timeout 5 http://127.0.0.1:$PORT/v1/models)
+    if [ $? -eq 0 ]; then
+        echo "✅ Models endpoint accessible"
+        echo "$response" | python -m json.tool 2>/dev/null || echo "$response"
+    else
+        echo "❌ Models endpoint not accessible"
+    fi
+    
+    echo ""
+    echo "📡 Testing health endpoint..."
+    health_response=$(curl -s --connect-timeout 5 http://127.0.0.1:$PORT/health)
+    if [ $? -eq 0 ]; then
+        echo "✅ Health endpoint accessible"
+        echo "$health_response"
+    else
+        echo "❌ Health endpoint not accessible"
+    fi
+}
+
+test_client() {
+    echo "🧪 Testing PaddleOCR-VL client with vLLM server..."
+    
+    if [ ! -f $PIDFILE ] || ! kill -0 $(cat $PIDFILE) 2>/dev/null; then
+        echo "❌ PaddleOCR-VL vLLM service is not running. Start it first with: $0 start"
+        return 1
+    fi
+    
+    # 测试用例文件路径
+    TEST_IMAGE="/home/ubuntu/zhch/data/至远彩色印刷工业有限公司/2023年度报告母公司.img/2023年度报告母公司_page_006.png"
+    TEST_OUTPUT="/tmp/paddleocr_vl_vllm_test_output"
+    PIPELINE_CONFIG="/home/ubuntu/zhch/PaddleX/zhch/my_config/PaddleOCR-VL-Client.yaml"
+    
+    if [ ! -f "$TEST_IMAGE" ]; then
+        echo "⚠️  Test image not found: $TEST_IMAGE"
+        echo "Please provide a test image or update the TEST_IMAGE path in the script"
+        return 1
+    fi
+    
+    if [ ! -f "$PIPELINE_CONFIG" ]; then
+        echo "⚠️  Pipeline config not found: $PIPELINE_CONFIG"
+        echo "Please update the PIPELINE_CONFIG path in the script"
+        return 1
+    fi
+    
+    echo "📄 Testing with image: $TEST_IMAGE"
+    echo "⚙️  Using pipeline config: $PIPELINE_CONFIG"
+    echo "📁 Output directory: $TEST_OUTPUT"
+    echo ""
+    
+    # 方法1: 使用 paddlex 命令行 (推荐)
+    echo "🔧 Using paddlex CLI..."
+    mkdir -p "$TEST_OUTPUT"
+    
+    paddlex --pipeline "$PIPELINE_CONFIG" \
+            --input "$TEST_IMAGE" \
+            --save_path "$TEST_OUTPUT" \
+            --use_doc_orientation_classify False \
+            --use_doc_unwarping False
+    
+    if [ $? -eq 0 ]; then
+        echo "✅ CLI test completed successfully"
+        echo "📁 Results saved to: $TEST_OUTPUT"
+        
+        # 显示生成的文件
+        if [ -d "$TEST_OUTPUT" ]; then
+            echo ""
+            echo "📂 Generated files:"
+            ls -lh "$TEST_OUTPUT" | tail -n +2 | awk '{print "  " $9 " (" $5 ")"}'
+        fi
+    else
+        echo "❌ CLI test failed"
+        return 1
+    fi
+    
+}
+
+# 显示使用帮助
+usage() {
+    echo "PaddleOCR-VL vLLM Service Daemon"
+    echo "================================="
+    echo "Usage: $0 {start|stop|restart|status|logs|config|test|test-client}"
+    echo ""
+    echo "Commands:"
+    echo "  start       - Start the PaddleOCR-VL vLLM service"
+    echo "  stop        - Stop the PaddleOCR-VL vLLM service"
+    echo "  restart     - Restart the PaddleOCR-VL vLLM service"
+    echo "  status      - Show service status and resource usage"
+    echo "  logs        - Show service logs (follow mode)"
+    echo "  config      - Show current configuration"
+    echo "  test        - Test API endpoints"
+    echo "  test-client - Test PaddleX client with vLLM server"
+    echo ""
+    echo "Configuration (edit script to modify):"
+    echo "  Host: $HOST"
+    echo "  Port: $PORT"
+    echo "  Model: $MODEL_NAME"
+    echo "  Backend: $BACKEND"
+    echo "  GPU Memory: $GPU_MEMORY_UTILIZATION"
+    echo "  CUDA Devices: $CUDA_VISIBLE_DEVICES"
+    echo ""
+    echo "Examples:"
+    echo "  ./paddle_vllm_daemon.sh start"
+    echo "  ./paddle_vllm_daemon.sh status"
+    echo "  ./paddle_vllm_daemon.sh logs"
+    echo "  ./paddle_vllm_daemon.sh test"
+    echo "  ./paddle_vllm_daemon.sh test-client"
+}
+
+case "$1" in
+    start)
+        start
+        ;;
+    stop)
+        stop
+        ;;
+    restart)
+        stop
+        sleep 3
+        start
+        ;;
+    status)
+        status
+        ;;
+    logs)
+        logs
+        ;;
+    config)
+        config
+        ;;
+    test)
+        test_api
+        ;;
+    test-client)
+        test_client
+        ;;
+    *)
+        usage
+        exit 1
+        ;;
+esac