#!/bin/bash # MinerU FastAPI 服务守护进程脚本 LOGDIR="/home/ubuntu/zhch/logs" mkdir -p $LOGDIR PIDFILE="$LOGDIR/mineru_fastapi.pid" LOGFILE="$LOGDIR/mineru_fastapi.log" # 配置参数 CONDA_ENV="mineru2" API_PORT="8120" API_HOST="0.0.0.0" # vLLM 后端配置(当使用 vlm-vllm-engine 后端时) VLLM_PORT="8121" VLLM_HOST="127.0.0.1" MODEL_PATH="/home/ubuntu/models/modelscope_cache/models/OpenDataLab/MinerU2___5-2509-1___2B" MODEL_NAME="MinerU2.5" # GPU 配置 GPU_MEMORY_UTILIZATION="0.3" CUDA_VISIBLE_DEVICES="4" MAX_MODEL_LEN="16384" MAX_NUM_BATCHED_TOKENS="8192" MAX_NUM_SEQS="8" # MinerU 配置 export MINERU_TOOLS_CONFIG_JSON="/home/ubuntu/zhch/MinerU/mineru.json" export MODELSCOPE_CACHE="/home/ubuntu/models/modelscope_cache" export USE_MODELSCOPE_HUB=1 # 正确初始化和激活conda环境 if [ -f "/home/ubuntu/anaconda3/etc/profile.d/conda.sh" ]; then source /home/ubuntu/anaconda3/etc/profile.d/conda.sh conda activate $CONDA_ENV elif [ -f "/opt/conda/etc/profile.d/conda.sh" ]; then source /opt/conda/etc/profile.d/conda.sh conda activate $CONDA_ENV else echo "Warning: Using direct conda path activation" export PATH="/home/ubuntu/anaconda3/envs/$CONDA_ENV/bin:$PATH" fi start() { if [ -f $PIDFILE ] && kill -0 $(cat $PIDFILE) 2>/dev/null; then echo "MinerU FastAPI is already running" return 1 fi echo "Starting MinerU FastAPI daemon..." echo "API Host: $API_HOST, Port: $API_PORT" echo "Backend vLLM Host: $VLLM_HOST, Port: $VLLM_PORT" echo "Model path: $MODEL_PATH" echo "GPU memory utilization: $GPU_MEMORY_UTILIZATION" echo "CUDA devices: $CUDA_VISIBLE_DEVICES" # 检查模型文件是否存在(如果使用本地模型) if [ ! -d "$MODEL_PATH" ]; then echo "⚠️ Model path not found: $MODEL_PATH" echo "Will try to download model automatically or use HTTP client mode" fi # 检查conda环境 if ! command -v python >/dev/null 2>&1; then echo "❌ Python not found. Check conda environment activation." return 1 fi echo "🔧 Using Python: $(which python)" # 显示GPU状态 echo "📊 GPU 状态检查:" if command -v nvidia-smi >/dev/null 2>&1; then nvidia-smi --query-gpu=index,name,memory.used,memory.total --format=csv,noheader,nounits | \ awk -F',' '{printf " GPU %s: %s - 内存: %sMB/%sMB\n", $1, $2, $3, $4}' else echo "⚠️ nvidia-smi not available" fi # 启动MinerU FastAPI服务 # 设置GPU环境变量并启动 nohup $CUDA_VISIBLE_DEVICES; python -m mineru.cli.fast_api \ --host $API_HOST \ --port $API_PORT \ --gpu-memory-utilization $GPU_MEMORY_UTILIZATION \ --max-model-len $MAX_MODEL_LEN \ --max-num-batched-tokens $MAX_NUM_BATCHED_TOKENS \ --max-num-seqs $MAX_NUM_SEQS \ --served-model-name $MODEL_NAME \ --server-url http://$VLLM_HOST:$VLLM_PORT \ --source modelscope \ > $LOGFILE 2>&1 & echo $! > $PIDFILE echo "✅ MinerU FastAPI started with PID: $(cat $PIDFILE)" echo "📋 Log file: $LOGFILE" echo "🌐 FastAPI Service URL: http://$API_HOST:$API_PORT" echo "📖 API Documentation: http://localhost:$API_PORT/docs" echo "📖 ReDoc Documentation: http://localhost:$API_PORT/redoc" echo "" echo "Waiting for service to start..." sleep 5 status } stop() { if [ ! -f $PIDFILE ]; then echo "MinerU FastAPI is not running" return 1 fi PID=$(cat $PIDFILE) echo "Stopping MinerU FastAPI (PID: $PID)..." # 优雅停止 kill $PID # 等待进程结束 for i in {1..10}; do if ! kill -0 $PID 2>/dev/null; then break fi echo "Waiting for process to stop... ($i/10)" sleep 1 done # 如果进程仍在运行,强制结束 if kill -0 $PID 2>/dev/null; then echo "Force killing process..." kill -9 $PID fi rm -f $PIDFILE echo "✅ MinerU FastAPI stopped" } status() { if [ -f $PIDFILE ] && kill -0 $(cat $PIDFILE) 2>/dev/null; then PID=$(cat $PIDFILE) echo "✅ MinerU FastAPI is running (PID: $PID)" echo "🌐 FastAPI Service URL: http://$API_HOST:$API_PORT" echo "📋 Log file: $LOGFILE" # 检查端口是否被监听 if command -v ss >/dev/null 2>&1; then if ss -tuln | grep -q ":$API_PORT "; then echo "🔗 Port $API_PORT is being listened" else echo "⚠️ Port $API_PORT is not being listened (service may be starting up)" fi elif command -v netstat >/dev/null 2>&1; then if netstat -tuln | grep -q ":$API_PORT "; then echo "🔗 Port $API_PORT is being listened" else echo "⚠️ Port $API_PORT is not being listened (service may be starting up)" fi fi # 检查API响应 if command -v curl >/dev/null 2>&1; then if curl -s --connect-timeout 2 "http://127.0.0.1:$API_PORT/docs" > /dev/null 2>&1; then echo "🎯 FastAPI 响应正常" else echo "⚠️ FastAPI 无响应 (service may be starting up)" fi fi # 显示GPU使用情况 if command -v nvidia-smi >/dev/null 2>&1; then echo "📊 GPU 使用情况:" nvidia-smi --query-gpu=index,utilization.gpu,utilization.memory,memory.used,memory.total --format=csv,noheader,nounits | \ awk -F',' '{printf " GPU %s: GPU利用率 %s%%, 内存利用率 %s%%, 显存 %sMB/%sMB\n", $1, $2, $3, $4, $5}' fi # 显示最新日志 if [ -f $LOGFILE ]; then echo "📄 Latest logs (last 3 lines):" tail -3 $LOGFILE | sed 's/^/ /' fi else echo "❌ MinerU FastAPI is not running" if [ -f $PIDFILE ]; then echo "Removing stale PID file..." rm -f $PIDFILE fi fi } logs() { if [ -f $LOGFILE ]; then echo "📄 MinerU FastAPI logs:" echo "======================" tail -f $LOGFILE else echo "❌ Log file not found: $LOGFILE" fi } config() { echo "📋 Current configuration:" echo " Conda Environment: $CONDA_ENV" echo " FastAPI Host: $API_HOST" echo " FastAPI Port: $API_PORT" echo " vLLM Backend Host: $VLLM_HOST" echo " vLLM Backend Port: $VLLM_PORT" echo " Model Path: $MODEL_PATH" echo " Model Name: $MODEL_NAME" echo " GPU Memory Utilization: $GPU_MEMORY_UTILIZATION" echo " CUDA Visible Devices: $CUDA_VISIBLE_DEVICES" echo " Max Model Length: $MAX_MODEL_LEN" echo " Max Num Seqs: $MAX_NUM_SEQS" echo " PID File: $PIDFILE" echo " Log File: $LOGFILE" echo "" echo " MinerU Config: $MINERU_TOOLS_CONFIG_JSON" echo " ModelScope Cache: $MODELSCOPE_CACHE" if [ -d "$MODEL_PATH" ]; then echo "✅ Model path exists" echo " Model files:" ls -la "$MODEL_PATH" | head -5 | sed 's/^/ /' if [ $(ls -1 "$MODEL_PATH" | wc -l) -gt 5 ]; then echo " ... and more files" fi else echo "⚠️ Model path not found (will use HTTP client mode or auto-download)" fi # 检查MinerU配置文件 if [ -f "$MINERU_TOOLS_CONFIG_JSON" ]; then echo "✅ MinerU config file exists" else echo "❌ MinerU config file not found: $MINERU_TOOLS_CONFIG_JSON" fi # 显示环境信息 echo "" echo "🔧 Environment:" echo " Python: $(which python 2>/dev/null || echo 'Not found')" echo " FastAPI module: $(python -c 'import fastapi; print(fastapi.__version__)' 2>/dev/null || echo 'Not found')" echo " MinerU module: $(python -c 'import mineru; print(mineru.__version__)' 2>/dev/null || echo 'Not found')" echo " Conda: $(which conda 2>/dev/null || echo 'Not found')" echo " CUDA: $(which nvcc 2>/dev/null || echo 'Not found')" # 显示GPU信息 if command -v nvidia-smi >/dev/null 2>&1; then echo "" echo "🔥 GPU Information:" nvidia-smi --query-gpu=index,name,driver_version,memory.total --format=csv,noheader,nounits | \ awk -F',' '{printf " GPU %s: %s (Driver: %s, Memory: %sMB)\n", $1, $2, $3, $4}' fi } test_api() { echo "🧪 Testing MinerU FastAPI..." if [ ! -f $PIDFILE ] || ! kill -0 $(cat $PIDFILE) 2>/dev/null; then echo "❌ MinerU FastAPI service is not running" return 1 fi if ! command -v curl >/dev/null 2>&1; then echo "❌ curl command not found" return 1 fi echo "📡 Testing FastAPI health..." response=$(curl -s --connect-timeout 5 "http://127.0.0.1:$API_PORT/docs") if [ $? -eq 0 ]; then echo "✅ FastAPI docs endpoint accessible" else echo "❌ FastAPI docs endpoint not accessible" fi # 测试文件上传API(需要测试文件) TEST_FILE="/home/ubuntu/zhch/MinerU/demo/pdfs/small_ocr.pdf" if [ -f "$TEST_FILE" ]; then echo "" echo "📡 Testing /file_parse endpoint..." response=$(curl -s -X POST "http://127.0.0.1:$API_PORT/file_parse" \ -F "files=@$TEST_FILE" \ -F "backend=vlm-http-client" \ -F "server_url=http://$VLLM_HOST:$VLLM_PORT" \ -F "return_md=true" \ -F "response_format_zip=false" \ --connect-timeout 300) if [[ "$response" == *"results"* ]]; then echo "✅ File parse endpoint working" echo "Response preview: $(echo "$response" | head -c 200)..." else echo "⚠️ File parse endpoint response unexpected" echo "Response: $response" fi else echo "⚠️ Test file not found: $TEST_FILE" echo "Please provide a test file to test the upload functionality" fi } test_client() { echo "🧪 Testing MinerU client with FastAPI..." if [ ! -f $PIDFILE ] || ! kill -0 $(cat $PIDFILE) 2>/dev/null; then echo "❌ MinerU FastAPI service is not running. Start it first with: $0 start" return 1 fi # 测试用例文件路径 TEST_IMAGE="/home/ubuntu/zhch/MinerU/demo/pdfs/small_ocr.pdf" TEST_OUTPUT="/tmp/mineru_fastapi_test_output" if [ ! -f "$TEST_IMAGE" ]; then echo "⚠️ Test image not found: $TEST_IMAGE" echo "Please provide a test image or update the TEST_IMAGE path in the script" return 1 fi echo "📄 Testing with image: $TEST_IMAGE" echo "📁 Output directory: $TEST_OUTPUT" # 使用 curl 直接测试 FastAPI echo "Testing via curl..." curl -X POST "http://127.0.0.1:$API_PORT/file_parse" \ -F "files=@$TEST_IMAGE" \ -F "backend=vlm-http-client" \ -F "server_url=http://$VLLM_HOST:$VLLM_PORT" \ -F "return_md=true" \ -F "output_dir=$TEST_OUTPUT" \ -o "$TEST_OUTPUT/fastapi_result.json" if [ $? -eq 0 ]; then echo "✅ FastAPI test completed successfully" echo "📁 Check output in: $TEST_OUTPUT" if [ -f "$TEST_OUTPUT/fastapi_result.json" ]; then echo "📄 Result file size: $(du -h "$TEST_OUTPUT/fastapi_result.json" | cut -f1)" fi else echo "❌ FastAPI test failed" fi } # 显示使用帮助 usage() { echo "MinerU FastAPI Service Daemon" echo "=============================" echo "Usage: $0 {start|stop|restart|status|logs|config|test|test-client}" echo "" echo "Commands:" echo " start - Start the MinerU FastAPI service" echo " stop - Stop the MinerU FastAPI service" echo " restart - Restart the MinerU FastAPI service" echo " status - Show service status and resource usage" echo " logs - Show service logs (follow mode)" echo " config - Show current configuration" echo " test - Test API endpoints" echo " test-client - Test FastAPI with sample file" echo "" echo "Configuration (edit script to modify):" echo " FastAPI Host: $API_HOST" echo " FastAPI Port: $API_PORT" echo " vLLM Backend: $VLLM_HOST:$VLLM_PORT" echo " Model: $MODEL_PATH" echo " GPU Memory: $GPU_MEMORY_UTILIZATION" echo " CUDA Devices: $CUDA_VISIBLE_DEVICES" echo "" echo "API Endpoints:" echo " Swagger UI: http://localhost:$API_PORT/docs" echo " ReDoc: http://localhost:$API_PORT/redoc" echo " File Parse: POST http://localhost:$API_PORT/file_parse" echo "" echo "Examples:" echo " ./mineru_fast_api_daemon.sh start" echo " ./mineru_fast_api_daemon.sh status" echo " ./mineru_fast_api_daemon.sh logs" echo " ./mineru_fast_api_daemon.sh test" } case "$1" in start) start ;; stop) stop ;; restart) stop sleep 3 start ;; status) status ;; logs) logs ;; config) config ;; test) test_api ;; test-client) test_client ;; *) usage exit 1 ;; esac