| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411 |
- #!/bin/bash
- # MinerU FastAPI 服务守护进程脚本
- LOGDIR="/home/ubuntu/zhch/logs"
- mkdir -p $LOGDIR
- PIDFILE="$LOGDIR/mineru_fastapi.pid"
- LOGFILE="$LOGDIR/mineru_fastapi.log"
- # 配置参数
- CONDA_ENV="mineru2"
- API_PORT="8120"
- API_HOST="0.0.0.0"
- # vLLM 后端配置(当使用 vlm-vllm-engine 后端时)
- VLLM_PORT="8121"
- VLLM_HOST="127.0.0.1"
- MODEL_PATH="/home/ubuntu/models/modelscope_cache/models/OpenDataLab/MinerU2___5-2509-1___2B"
- MODEL_NAME="MinerU2.5"
- # GPU 配置
- GPU_MEMORY_UTILIZATION="0.3"
- CUDA_VISIBLE_DEVICES="4"
- MAX_MODEL_LEN="16384"
- MAX_NUM_BATCHED_TOKENS="8192"
- MAX_NUM_SEQS="8"
- # MinerU 配置
- export MINERU_TOOLS_CONFIG_JSON="/home/ubuntu/zhch/MinerU/mineru.json"
- export MODELSCOPE_CACHE="/home/ubuntu/models/modelscope_cache"
- export USE_MODELSCOPE_HUB=1
- # 正确初始化和激活conda环境
- if [ -f "/home/ubuntu/anaconda3/etc/profile.d/conda.sh" ]; then
- source /home/ubuntu/anaconda3/etc/profile.d/conda.sh
- conda activate $CONDA_ENV
- elif [ -f "/opt/conda/etc/profile.d/conda.sh" ]; then
- source /opt/conda/etc/profile.d/conda.sh
- conda activate $CONDA_ENV
- else
- echo "Warning: Using direct conda path activation"
- export PATH="/home/ubuntu/anaconda3/envs/$CONDA_ENV/bin:$PATH"
- fi
- start() {
- if [ -f $PIDFILE ] && kill -0 $(cat $PIDFILE) 2>/dev/null; then
- echo "MinerU FastAPI is already running"
- return 1
- fi
-
- echo "Starting MinerU FastAPI daemon..."
- echo "API Host: $API_HOST, Port: $API_PORT"
- echo "Backend vLLM Host: $VLLM_HOST, Port: $VLLM_PORT"
- echo "Model path: $MODEL_PATH"
- echo "GPU memory utilization: $GPU_MEMORY_UTILIZATION"
- echo "CUDA devices: $CUDA_VISIBLE_DEVICES"
-
- # 检查模型文件是否存在(如果使用本地模型)
- if [ ! -d "$MODEL_PATH" ]; then
- echo "⚠️ Model path not found: $MODEL_PATH"
- echo "Will try to download model automatically or use HTTP client mode"
- fi
-
- # 检查conda环境
- if ! command -v python >/dev/null 2>&1; then
- echo "❌ Python not found. Check conda environment activation."
- return 1
- fi
-
- echo "🔧 Using Python: $(which python)"
-
- # 显示GPU状态
- echo "📊 GPU 状态检查:"
- if command -v nvidia-smi >/dev/null 2>&1; then
- nvidia-smi --query-gpu=index,name,memory.used,memory.total --format=csv,noheader,nounits | \
- awk -F',' '{printf " GPU %s: %s - 内存: %sMB/%sMB\n", $1, $2, $3, $4}'
- else
- echo "⚠️ nvidia-smi not available"
- fi
-
- # 启动MinerU FastAPI服务
- # 设置GPU环境变量并启动
- nohup $CUDA_VISIBLE_DEVICES; python -m mineru.cli.fast_api \
- --host $API_HOST \
- --port $API_PORT \
- --gpu-memory-utilization $GPU_MEMORY_UTILIZATION \
- --max-model-len $MAX_MODEL_LEN \
- --max-num-batched-tokens $MAX_NUM_BATCHED_TOKENS \
- --max-num-seqs $MAX_NUM_SEQS \
- --served-model-name $MODEL_NAME \
- --server-url http://$VLLM_HOST:$VLLM_PORT \
- --source modelscope \
- > $LOGFILE 2>&1 &
-
- echo $! > $PIDFILE
- echo "✅ MinerU FastAPI started with PID: $(cat $PIDFILE)"
- echo "📋 Log file: $LOGFILE"
- echo "🌐 FastAPI Service URL: http://$API_HOST:$API_PORT"
- echo "📖 API Documentation: http://localhost:$API_PORT/docs"
- echo "📖 ReDoc Documentation: http://localhost:$API_PORT/redoc"
- echo ""
- echo "Waiting for service to start..."
- sleep 5
- status
- }
- stop() {
- if [ ! -f $PIDFILE ]; then
- echo "MinerU FastAPI is not running"
- return 1
- fi
-
- PID=$(cat $PIDFILE)
- echo "Stopping MinerU FastAPI (PID: $PID)..."
-
- # 优雅停止
- kill $PID
-
- # 等待进程结束
- for i in {1..10}; do
- if ! kill -0 $PID 2>/dev/null; then
- break
- fi
- echo "Waiting for process to stop... ($i/10)"
- sleep 1
- done
-
- # 如果进程仍在运行,强制结束
- if kill -0 $PID 2>/dev/null; then
- echo "Force killing process..."
- kill -9 $PID
- fi
-
- rm -f $PIDFILE
- echo "✅ MinerU FastAPI stopped"
- }
- status() {
- if [ -f $PIDFILE ] && kill -0 $(cat $PIDFILE) 2>/dev/null; then
- PID=$(cat $PIDFILE)
- echo "✅ MinerU FastAPI is running (PID: $PID)"
- echo "🌐 FastAPI Service URL: http://$API_HOST:$API_PORT"
- echo "📋 Log file: $LOGFILE"
-
- # 检查端口是否被监听
- if command -v ss >/dev/null 2>&1; then
- if ss -tuln | grep -q ":$API_PORT "; then
- echo "🔗 Port $API_PORT is being listened"
- else
- echo "⚠️ Port $API_PORT is not being listened (service may be starting up)"
- fi
- elif command -v netstat >/dev/null 2>&1; then
- if netstat -tuln | grep -q ":$API_PORT "; then
- echo "🔗 Port $API_PORT is being listened"
- else
- echo "⚠️ Port $API_PORT is not being listened (service may be starting up)"
- fi
- fi
-
- # 检查API响应
- if command -v curl >/dev/null 2>&1; then
- if curl -s --connect-timeout 2 "http://127.0.0.1:$API_PORT/docs" > /dev/null 2>&1; then
- echo "🎯 FastAPI 响应正常"
- else
- echo "⚠️ FastAPI 无响应 (service may be starting up)"
- fi
- fi
-
- # 显示GPU使用情况
- if command -v nvidia-smi >/dev/null 2>&1; then
- echo "📊 GPU 使用情况:"
- nvidia-smi --query-gpu=index,utilization.gpu,utilization.memory,memory.used,memory.total --format=csv,noheader,nounits | \
- awk -F',' '{printf " GPU %s: GPU利用率 %s%%, 内存利用率 %s%%, 显存 %sMB/%sMB\n", $1, $2, $3, $4, $5}'
- fi
-
- # 显示最新日志
- if [ -f $LOGFILE ]; then
- echo "📄 Latest logs (last 3 lines):"
- tail -3 $LOGFILE | sed 's/^/ /'
- fi
- else
- echo "❌ MinerU FastAPI is not running"
- if [ -f $PIDFILE ]; then
- echo "Removing stale PID file..."
- rm -f $PIDFILE
- fi
- fi
- }
- logs() {
- if [ -f $LOGFILE ]; then
- echo "📄 MinerU FastAPI logs:"
- echo "======================"
- tail -f $LOGFILE
- else
- echo "❌ Log file not found: $LOGFILE"
- fi
- }
- config() {
- echo "📋 Current configuration:"
- echo " Conda Environment: $CONDA_ENV"
- echo " FastAPI Host: $API_HOST"
- echo " FastAPI Port: $API_PORT"
- echo " vLLM Backend Host: $VLLM_HOST"
- echo " vLLM Backend Port: $VLLM_PORT"
- echo " Model Path: $MODEL_PATH"
- echo " Model Name: $MODEL_NAME"
- echo " GPU Memory Utilization: $GPU_MEMORY_UTILIZATION"
- echo " CUDA Visible Devices: $CUDA_VISIBLE_DEVICES"
- echo " Max Model Length: $MAX_MODEL_LEN"
- echo " Max Num Seqs: $MAX_NUM_SEQS"
- echo " PID File: $PIDFILE"
- echo " Log File: $LOGFILE"
- echo ""
- echo " MinerU Config: $MINERU_TOOLS_CONFIG_JSON"
- echo " ModelScope Cache: $MODELSCOPE_CACHE"
-
- if [ -d "$MODEL_PATH" ]; then
- echo "✅ Model path exists"
- echo " Model files:"
- ls -la "$MODEL_PATH" | head -5 | sed 's/^/ /'
- if [ $(ls -1 "$MODEL_PATH" | wc -l) -gt 5 ]; then
- echo " ... and more files"
- fi
- else
- echo "⚠️ Model path not found (will use HTTP client mode or auto-download)"
- fi
-
- # 检查MinerU配置文件
- if [ -f "$MINERU_TOOLS_CONFIG_JSON" ]; then
- echo "✅ MinerU config file exists"
- else
- echo "❌ MinerU config file not found: $MINERU_TOOLS_CONFIG_JSON"
- fi
-
- # 显示环境信息
- echo ""
- echo "🔧 Environment:"
- echo " Python: $(which python 2>/dev/null || echo 'Not found')"
- echo " FastAPI module: $(python -c 'import fastapi; print(fastapi.__version__)' 2>/dev/null || echo 'Not found')"
- echo " MinerU module: $(python -c 'import mineru; print(mineru.__version__)' 2>/dev/null || echo 'Not found')"
- echo " Conda: $(which conda 2>/dev/null || echo 'Not found')"
- echo " CUDA: $(which nvcc 2>/dev/null || echo 'Not found')"
-
- # 显示GPU信息
- if command -v nvidia-smi >/dev/null 2>&1; then
- echo ""
- echo "🔥 GPU Information:"
- nvidia-smi --query-gpu=index,name,driver_version,memory.total --format=csv,noheader,nounits | \
- awk -F',' '{printf " GPU %s: %s (Driver: %s, Memory: %sMB)\n", $1, $2, $3, $4}'
- fi
- }
- test_api() {
- echo "🧪 Testing MinerU FastAPI..."
-
- if [ ! -f $PIDFILE ] || ! kill -0 $(cat $PIDFILE) 2>/dev/null; then
- echo "❌ MinerU FastAPI service is not running"
- return 1
- fi
-
- if ! command -v curl >/dev/null 2>&1; then
- echo "❌ curl command not found"
- return 1
- fi
-
- echo "📡 Testing FastAPI health..."
- response=$(curl -s --connect-timeout 5 "http://127.0.0.1:$API_PORT/docs")
- if [ $? -eq 0 ]; then
- echo "✅ FastAPI docs endpoint accessible"
- else
- echo "❌ FastAPI docs endpoint not accessible"
- fi
-
- # 测试文件上传API(需要测试文件)
- TEST_FILE="/home/ubuntu/zhch/MinerU/demo/pdfs/small_ocr.pdf"
- if [ -f "$TEST_FILE" ]; then
- echo ""
- echo "📡 Testing /file_parse endpoint..."
- response=$(curl -s -X POST "http://127.0.0.1:$API_PORT/file_parse" \
- -F "files=@$TEST_FILE" \
- -F "backend=vlm-http-client" \
- -F "server_url=http://$VLLM_HOST:$VLLM_PORT" \
- -F "return_md=true" \
- -F "response_format_zip=false" \
- --connect-timeout 300)
-
- if [[ "$response" == *"results"* ]]; then
- echo "✅ File parse endpoint working"
- echo "Response preview: $(echo "$response" | head -c 200)..."
- else
- echo "⚠️ File parse endpoint response unexpected"
- echo "Response: $response"
- fi
- else
- echo "⚠️ Test file not found: $TEST_FILE"
- echo "Please provide a test file to test the upload functionality"
- fi
- }
- test_client() {
- echo "🧪 Testing MinerU client with FastAPI..."
-
- if [ ! -f $PIDFILE ] || ! kill -0 $(cat $PIDFILE) 2>/dev/null; then
- echo "❌ MinerU FastAPI service is not running. Start it first with: $0 start"
- return 1
- fi
-
- # 测试用例文件路径
- TEST_IMAGE="/home/ubuntu/zhch/MinerU/demo/pdfs/small_ocr.pdf"
- TEST_OUTPUT="/tmp/mineru_fastapi_test_output"
-
- if [ ! -f "$TEST_IMAGE" ]; then
- echo "⚠️ Test image not found: $TEST_IMAGE"
- echo "Please provide a test image or update the TEST_IMAGE path in the script"
- return 1
- fi
-
- echo "📄 Testing with image: $TEST_IMAGE"
- echo "📁 Output directory: $TEST_OUTPUT"
-
- # 使用 curl 直接测试 FastAPI
- echo "Testing via curl..."
- curl -X POST "http://127.0.0.1:$API_PORT/file_parse" \
- -F "files=@$TEST_IMAGE" \
- -F "backend=vlm-http-client" \
- -F "server_url=http://$VLLM_HOST:$VLLM_PORT" \
- -F "return_md=true" \
- -F "output_dir=$TEST_OUTPUT" \
- -o "$TEST_OUTPUT/fastapi_result.json"
-
- if [ $? -eq 0 ]; then
- echo "✅ FastAPI test completed successfully"
- echo "📁 Check output in: $TEST_OUTPUT"
- if [ -f "$TEST_OUTPUT/fastapi_result.json" ]; then
- echo "📄 Result file size: $(du -h "$TEST_OUTPUT/fastapi_result.json" | cut -f1)"
- fi
- else
- echo "❌ FastAPI test failed"
- fi
- }
- # 显示使用帮助
- usage() {
- echo "MinerU FastAPI Service Daemon"
- echo "============================="
- echo "Usage: $0 {start|stop|restart|status|logs|config|test|test-client}"
- echo ""
- echo "Commands:"
- echo " start - Start the MinerU FastAPI service"
- echo " stop - Stop the MinerU FastAPI service"
- echo " restart - Restart the MinerU FastAPI service"
- echo " status - Show service status and resource usage"
- echo " logs - Show service logs (follow mode)"
- echo " config - Show current configuration"
- echo " test - Test API endpoints"
- echo " test-client - Test FastAPI with sample file"
- echo ""
- echo "Configuration (edit script to modify):"
- echo " FastAPI Host: $API_HOST"
- echo " FastAPI Port: $API_PORT"
- echo " vLLM Backend: $VLLM_HOST:$VLLM_PORT"
- echo " Model: $MODEL_PATH"
- echo " GPU Memory: $GPU_MEMORY_UTILIZATION"
- echo " CUDA Devices: $CUDA_VISIBLE_DEVICES"
- echo ""
- echo "API Endpoints:"
- echo " Swagger UI: http://localhost:$API_PORT/docs"
- echo " ReDoc: http://localhost:$API_PORT/redoc"
- echo " File Parse: POST http://localhost:$API_PORT/file_parse"
- echo ""
- echo "Examples:"
- echo " ./mineru_fast_api_daemon.sh start"
- echo " ./mineru_fast_api_daemon.sh status"
- echo " ./mineru_fast_api_daemon.sh logs"
- echo " ./mineru_fast_api_daemon.sh test"
- }
- case "$1" in
- start)
- start
- ;;
- stop)
- stop
- ;;
- restart)
- stop
- sleep 3
- start
- ;;
- status)
- status
- ;;
- logs)
- logs
- ;;
- config)
- config
- ;;
- test)
- test_api
- ;;
- test-client)
- test_client
- ;;
- *)
- usage
- exit 1
- ;;
- esac
|