zhengchun
/
MinerU


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391
							#!/bin/bash
# filepath: /home/ubuntu/zhch/MinerU/zhch/mineru_vllm_daemon.sh

# MinerU vLLM 服务守护进程脚本

LOGDIR="/home/ubuntu/zhch/logs"
mkdir -p $LOGDIR
PIDFILE="$LOGDIR/mineru_vllm.pid"
LOGFILE="$LOGDIR/mineru_vllm.log"

# 配置参数
CONDA_ENV="mineru2"
PORT="8121"
HOST="0.0.0.0"
MODEL_PATH="/home/ubuntu/models/modelscope_cache/models/OpenDataLab/MinerU2___5-2509-1___2B"
MODEL_NAME="MinerU2.5"

# GPU 配置
GPU_MEMORY_UTILIZATION="0.3"
CUDA_VISIBLE_DEVICES="4"
MAX_MODEL_LEN="16384"
MAX_NUM_BATCHED_TOKENS="8192"
MAX_NUM_SEQS="8"

# MinerU 配置
export MINERU_TOOLS_CONFIG_JSON="/home/ubuntu/zhch/MinerU/mineru.json"
export MODELSCOPE_CACHE="/home/ubuntu/models/modelscope_cache"
export USE_MODELSCOPE_HUB=1
# export CUDA_VISIBLE_DEVICES="$CUDA_VISIBLE_DEVICES"
# export NLTK_DATA="/home/ubuntu/nltk_data"
# export HF_HOME="/home/ubuntu/models/hf_home"
# export HF_ENDPOINT="https://hf-mirror.com"
# export TORCH_HOME="/home/ubuntu/models/torch/"

# 正确初始化和激活conda环境
if [ -f "/home/ubuntu/anaconda3/etc/profile.d/conda.sh" ]; then
    source /home/ubuntu/anaconda3/etc/profile.d/conda.sh
    conda activate $CONDA_ENV
elif [ -f "/opt/conda/etc/profile.d/conda.sh" ]; then
    source /opt/conda/etc/profile.d/conda.sh
    conda activate $CONDA_ENV
else
    # 方法2：直接使用conda可执行文件路径
    echo "Warning: Using direct conda path activation"
    export PATH="/home/ubuntu/anaconda3/envs/$CONDA_ENV/bin:$PATH"
fi

# 设置环境变量
# export LD_LIBRARY_PATH="/usr/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH"
# export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True

start() {
    if [ -f $PIDFILE ] && kill -0 $(cat $PIDFILE) 2>/dev/null; then
        echo "MinerU vLLM is already running"
        return 1
    fi
    
    echo "Starting MinerU vLLM daemon..."
    echo "Host: $HOST, Port: $PORT"
    echo "Model path: $MODEL_PATH"
    echo "GPU memory utilization: $GPU_MEMORY_UTILIZATION"
    echo "CUDA devices: $CUDA_VISIBLE_DEVICES"
    
    # 检查模型文件是否存在
    if [ ! -d "$MODEL_PATH" ]; then
        echo "❌ Model path not found: $MODEL_PATH"
        echo "Please download the model first:"
        echo "python -m mineru.cli.models_download"
        return 1
    fi
    
    # 检查conda环境
    if ! command -v python >/dev/null 2>&1; then
        echo "❌ Python not found. Check conda environment activation."
        return 1
    fi
    
    # 检查mineru-vllm-server命令
    if ! command -v mineru-vllm-server >/dev/null 2>&1; then
        echo "❌ mineru-vllm-server not found. Check installation and environment."
        return 1
    fi
    
    echo "🔧 Using Python: $(which python)"
    echo "🔧 Using mineru-vllm-server: $(which mineru-vllm-server)"
    
    # 显示GPU状态
    echo "📊 GPU 状态检查:"
    if command -v nvidia-smi >/dev/null 2>&1; then
        nvidia-smi --query-gpu=index,name,memory.used,memory.total --format=csv,noheader,nounits | \
        awk -F',' '{printf "  GPU %s: %s - 内存: %sMB/%sMB\n", $1, $2, $3, $4}'
    else
        echo "⚠️  nvidia-smi not available"
    fi
    
    # 启动MinerU vLLM服务
    nohup $CUDA_VISIBLE_DEVICES; mineru-vllm-server \
            --host $HOST \
            --port $PORT \
            --gpu-memory-utilization $GPU_MEMORY_UTILIZATION \
             --served-model-name $MODEL_NAME \
    > $LOGFILE 2>&1 &
    
    echo $! > $PIDFILE
    echo "✅ MinerU vLLM started with PID: $(cat $PIDFILE)"
    echo "📋 Log file: $LOGFILE"
    echo "🌐 Service URL: http://$HOST:$PORT"
    echo "📖 API Documentation: http://localhost:$PORT/docs"
    echo ""
    echo "Waiting for service to start..."
    sleep 5
    status
}

stop() {
    if [ ! -f $PIDFILE ]; then
        echo "MinerU vLLM is not running"
        return 1
    fi
    
    PID=$(cat $PIDFILE)
    echo "Stopping MinerU vLLM (PID: $PID)..."
    
    # 优雅停止
    kill $PID
    
    # 等待进程结束
    for i in {1..10}; do
        if ! kill -0 $PID 2>/dev/null; then
            break
        fi
        echo "Waiting for process to stop... ($i/10)"
        sleep 1
    done
    
    # 如果进程仍在运行，强制结束
    if kill -0 $PID 2>/dev/null; then
        echo "Force killing process..."
        kill -9 $PID
    fi
    
    rm -f $PIDFILE
    echo "✅ MinerU vLLM stopped"
}

status() {
    if [ -f $PIDFILE ] && kill -0 $(cat $PIDFILE) 2>/dev/null; then
        PID=$(cat $PIDFILE)
        echo "✅ MinerU vLLM is running (PID: $PID)"
        echo "🌐 Service URL: http://$HOST:$PORT"
        echo "📋 Log file: $LOGFILE"
        
        # 检查端口是否被监听
        if command -v ss >/dev/null 2>&1; then
            if ss -tuln | grep -q ":$PORT "; then
                echo "🔗 Port $PORT is being listened"
            else
                echo "⚠️  Port $PORT is not being listened (service may be starting up)"
            fi
        elif command -v netstat >/dev/null 2>&1; then
            if netstat -tuln | grep -q ":$PORT "; then
                echo "🔗 Port $PORT is being listened"
            else
                echo "⚠️  Port $PORT is not being listened (service may be starting up)"
            fi
        fi
        
        # 检查API响应
        if command -v curl >/dev/null 2>&1; then
            if curl -s --connect-timeout 2 http://127.0.0.1:$PORT/v1/models > /dev/null 2>&1; then
                echo "🎯 API 响应正常"
            else
                echo "⚠️  API 无响应 (service may be starting up)"
            fi
        fi
        
        # 显示GPU使用情况
        if command -v nvidia-smi >/dev/null 2>&1; then
            echo "📊 GPU 使用情况:"
            nvidia-smi --query-gpu=index,utilization.gpu,utilization.memory,memory.used,memory.total --format=csv,noheader,nounits | \
            awk -F',' '{printf "  GPU %s: GPU利用率 %s%%, 内存利用率 %s%%, 显存 %sMB/%sMB\n", $1, $2, $3, $4, $5}'
        fi
        
        # 显示最新日志
        if [ -f $LOGFILE ]; then
            echo "📄 Latest logs (last 3 lines):"
            tail -3 $LOGFILE | sed 's/^/  /'
        fi
    else
        echo "❌ MinerU vLLM is not running"
        if [ -f $PIDFILE ]; then
            echo "Removing stale PID file..."
            rm -f $PIDFILE
        fi
    fi
}

logs() {
    if [ -f $LOGFILE ]; then
        echo "📄 MinerU vLLM logs:"
        echo "=================="
        tail -f $LOGFILE
    else
        echo "❌ Log file not found: $LOGFILE"
    fi
}

config() {
    echo "📋 Current configuration:"
    echo "  Conda Environment: $CONDA_ENV"
    echo "  Host: $HOST"
    echo "  Port: $PORT"
    echo "  Model Path: $MODEL_PATH"
    echo "  Model Name: $MODEL_NAME"
    echo "  GPU Memory Utilization: $GPU_MEMORY_UTILIZATION"
    echo "  CUDA Visible Devices: $CUDA_VISIBLE_DEVICES"
    echo "  Max Model Length: $MAX_MODEL_LEN"
    echo "  Max Num Seqs: $MAX_NUM_SEQS"
    echo "  PID File: $PIDFILE"
    echo "  Log File: $LOGFILE"
    echo ""
    echo "  MinerU Config: $MINERU_TOOLS_CONFIG_JSON"
    echo "  ModelScope Cache: $MODELSCOPE_CACHE"
    
    if [ -d "$MODEL_PATH" ]; then
        echo "✅ Model path exists"
        echo "  Model files:"
        ls -la "$MODEL_PATH" | head -10 | sed 's/^/    /'
        if [ $(ls -1 "$MODEL_PATH" | wc -l) -gt 10 ]; then
            echo "    ... and more files"
        fi
    else
        echo "❌ Model path not found"
    fi
    
    # 检查MinerU配置文件
    if [ -f "$MINERU_TOOLS_CONFIG_JSON" ]; then
        echo "✅ MinerU config file exists"
    else
        echo "❌ MinerU config file not found: $MINERU_TOOLS_CONFIG_JSON"
    fi
    
    # 显示环境信息
    echo ""
    echo "🔧 Environment:"
    echo "  Python: $(which python 2>/dev/null || echo 'Not found')"
    echo "  mineru-vllm-server: $(which mineru-vllm-server 2>/dev/null || echo 'Not found')"
    echo "  Conda: $(which conda 2>/dev/null || echo 'Not found')"
    echo "  CUDA: $(which nvcc 2>/dev/null || echo 'Not found')"
    
    # 显示GPU信息
    if command -v nvidia-smi >/dev/null 2>&1; then
        echo ""
        echo "🔥 GPU Information:"
        nvidia-smi --query-gpu=index,name,driver_version,memory.total --format=csv,noheader,nounits | \
        awk -F',' '{printf "  GPU %s: %s (Driver: %s, Memory: %sMB)\n", $1, $2, $3, $4}'
    fi
}

test_api() {
    echo "🧪 Testing MinerU vLLM API..."
    
    if [ ! -f $PIDFILE ] || ! kill -0 $(cat $PIDFILE) 2>/dev/null; then
        echo "❌ MinerU vLLM service is not running"
        return 1
    fi
    
    if ! command -v curl >/dev/null 2>&1; then
        echo "❌ curl command not found"
        return 1
    fi
    
    echo "📡 Testing /v1/models endpoint..."
    response=$(curl -s --connect-timeout 5 http://127.0.0.1:$PORT/v1/models)
    if [ $? -eq 0 ]; then
        echo "✅ Models endpoint accessible"
        echo "$response" | python -m json.tool 2>/dev/null || echo "$response"
    else
        echo "❌ Models endpoint not accessible"
    fi
    
    echo ""
    echo "📡 Testing health endpoint..."
    health_response=$(curl -s --connect-timeout 5 http://127.0.0.1:$PORT/health)
    if [ $? -eq 0 ]; then
        echo "✅ Health endpoint accessible"
        echo "$health_response"
    else
        echo "❌ Health endpoint not accessible"
    fi
}

test_client() {
    echo "🧪 Testing MinerU client with vLLM server..."
    
    if [ ! -f $PIDFILE ] || ! kill -0 $(cat $PIDFILE) 2>/dev/null; then
        echo "❌ MinerU vLLM service is not running. Start it first with: $0 start"
        return 1
    fi
    
    # 测试用例文件路径（需要根据实际情况调整）
    TEST_IMAGE="/home/ubuntu/zhch/data/test/sample.png"
    TEST_OUTPUT="/tmp/mineru_vllm_test_output"
    
    if [ ! -f "$TEST_IMAGE" ]; then
        echo "⚠️  Test image not found: $TEST_IMAGE"
        echo "Please provide a test image or update the TEST_IMAGE path in the script"
        return 1
    fi
    
    echo "📄 Testing with image: $TEST_IMAGE"
    echo "📁 Output directory: $TEST_OUTPUT"
    
    # 使用HTTP客户端连接到vLLM服务器
    python -m mineru.cli.client \
        -p "$TEST_IMAGE" \
        -o "$TEST_OUTPUT" \
        --backend vlm-http-client \
        --server-url "http://127.0.0.1:$PORT"
    
    if [ $? -eq 0 ]; then
        echo "✅ Client test completed successfully"
        echo "📁 Check output in: $TEST_OUTPUT"
    else
        echo "❌ Client test failed"
    fi
}

# 显示使用帮助
usage() {
    echo "MinerU vLLM Service Daemon"
    echo "=========================="
    echo "Usage: $0 {start|stop|restart|status|logs|config|test|test-client}"
    echo ""
    echo "Commands:"
    echo "  start       - Start the MinerU vLLM service"
    echo "  stop        - Stop the MinerU vLLM service"
    echo "  restart     - Restart the MinerU vLLM service"
    echo "  status      - Show service status and resource usage"
    echo "  logs        - Show service logs (follow mode)"
    echo "  config      - Show current configuration"
    echo "  test        - Test API endpoints"
    echo "  test-client - Test MinerU client with vLLM server"
    echo ""
    echo "Configuration (edit script to modify):"
    echo "  Host: $HOST"
    echo "  Port: $PORT"
    echo "  Model: $MODEL_PATH"
    echo "  GPU Memory: $GPU_MEMORY_UTILIZATION"
    echo "  CUDA Devices: $CUDA_VISIBLE_DEVICES"
    echo ""
    echo "Examples:"
    echo "  ./mineru_vllm_daemon.sh start"
    echo "  ./mineru_vllm_daemon.sh status"
    echo "  ./mineru_vllm_daemon.sh logs"
    echo "  ./mineru_vllm_daemon.sh test"
    echo "  ./mineru_vllm_daemon.sh test-client"
}

case "$1" in
    start)
        start
        ;;
    stop)
        stop
        ;;
    restart)
        stop
        sleep 3
        start
        ;;
    status)
        status
        ;;
    logs)
        logs
        ;;
    config)
        config
        ;;
    test)
        test_api
        ;;
    test-client)
        test_client
        ;;
    *)
        usage
        exit 1
        ;;
esac