mineru_vllm_daemon.sh 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406
  1. #!/bin/bash
  2. # filepath: /home/ubuntu/zhch/MinerU/zhch/mineru_vllm_daemon.sh
  3. # MinerU vLLM 服务守护进程脚本
  4. LOGDIR="/home/ubuntu/zhch/logs"
  5. mkdir -p $LOGDIR
  6. PIDFILE="$LOGDIR/mineru_vllm.pid"
  7. LOGFILE="$LOGDIR/mineru_vllm.log"
  8. # 配置参数
  9. CONDA_ENV="mineru2"
  10. PORT="8121"
  11. HOST="0.0.0.0"
  12. MODEL_PATH="/home/ubuntu/models/modelscope_cache/models/OpenDataLab/MinerU2___5-2509-1___2B"
  13. MODEL_NAME="MinerU2.5"
  14. # GPU 配置
  15. GPU_MEMORY_UTILIZATION="0.3"
  16. CUDA_VISIBLE_DEVICES="0"
  17. MAX_MODEL_LEN="16384"
  18. MAX_NUM_BATCHED_TOKENS="8192"
  19. MAX_NUM_SEQS="8"
  20. # MinerU 配置
  21. export MINERU_TOOLS_CONFIG_JSON="/home/ubuntu/zhch/MinerU/mineru.json"
  22. export MODELSCOPE_CACHE="/home/ubuntu/models/modelscope_cache"
  23. export USE_MODELSCOPE_HUB=1
  24. # export NLTK_DATA="/home/ubuntu/nltk_data"
  25. # export HF_HOME="/home/ubuntu/models/hf_home"
  26. # export HF_ENDPOINT="https://hf-mirror.com"
  27. # export TORCH_HOME="/home/ubuntu/models/torch/"
  28. # 正确初始化和激活conda环境
  29. if [ -f "/home/ubuntu/anaconda3/etc/profile.d/conda.sh" ]; then
  30. source /home/ubuntu/anaconda3/etc/profile.d/conda.sh
  31. conda activate $CONDA_ENV
  32. elif [ -f "/opt/conda/etc/profile.d/conda.sh" ]; then
  33. source /opt/conda/etc/profile.d/conda.sh
  34. conda activate $CONDA_ENV
  35. else
  36. # 方法2:直接使用conda可执行文件路径
  37. echo "Warning: Using direct conda path activation"
  38. export PATH="/home/ubuntu/anaconda3/envs/$CONDA_ENV/bin:$PATH"
  39. fi
  40. # 设置环境变量
  41. export LD_LIBRARY_PATH="/usr/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH"
  42. export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
  43. start() {
  44. if [ -f $PIDFILE ] && kill -0 $(cat $PIDFILE) 2>/dev/null; then
  45. echo "MinerU vLLM is already running"
  46. return 1
  47. fi
  48. echo "Starting MinerU vLLM daemon..."
  49. echo "Host: $HOST, Port: $PORT"
  50. echo "Model path: $MODEL_PATH"
  51. echo "GPU memory utilization: $GPU_MEMORY_UTILIZATION"
  52. echo "CUDA devices: $CUDA_VISIBLE_DEVICES"
  53. # 检查模型文件是否存在
  54. if [ ! -d "$MODEL_PATH" ]; then
  55. echo "❌ Model path not found: $MODEL_PATH"
  56. echo "Please download the model first:"
  57. echo "python -m mineru.cli.models_download"
  58. return 1
  59. fi
  60. # 检查conda环境
  61. if ! command -v python >/dev/null 2>&1; then
  62. echo "❌ Python not found. Check conda environment activation."
  63. return 1
  64. fi
  65. # 检查mineru-vllm-server命令
  66. if ! command -v mineru-vllm-server >/dev/null 2>&1; then
  67. echo "❌ mineru-vllm-server not found. Check installation and environment."
  68. return 1
  69. fi
  70. echo "🔧 Using Python: $(which python)"
  71. echo "🔧 Using mineru-vllm-server: $(which mineru-vllm-server)"
  72. # 显示GPU状态
  73. echo "📊 GPU 状态检查:"
  74. if command -v nvidia-smi >/dev/null 2>&1; then
  75. nvidia-smi --query-gpu=index,name,memory.used,memory.total --format=csv,noheader,nounits | \
  76. awk -F',' '{printf " GPU %s: %s - 内存: %sMB/%sMB\n", $1, $2, $3, $4}'
  77. else
  78. echo "⚠️ nvidia-smi not available"
  79. fi
  80. # 启动MinerU vLLM服务
  81. nohup bash -c "
  82. export CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES
  83. export MINERU_TOOLS_CONFIG_JSON='$MINERU_TOOLS_CONFIG_JSON'
  84. # export NLTK_DATA='$NLTK_DATA'
  85. # export HF_HOME='$HF_HOME'
  86. # export HF_ENDPOINT='$HF_ENDPOINT'
  87. # export TORCH_HOME='$TORCH_HOME'
  88. export MODELSCOPE_CACHE='$MODELSCOPE_CACHE'
  89. export USE_MODELSCOPE_HUB=1
  90. mineru-vllm-server \
  91. --host $HOST \
  92. --port $PORT \
  93. --gpu-memory-utilization $GPU_MEMORY_UTILIZATION \
  94. # --max-model-len $MAX_MODEL_LEN \
  95. # --max-num-batched-tokens $MAX_NUM_BATCHED_TOKENS \
  96. # --max-num-seqs $MAX_NUM_SEQS \
  97. # --trust-remote-code \
  98. # --enable-prefix-caching \
  99. # --served-model-name $MODEL_NAME \
  100. --source modelscope
  101. " > $LOGFILE 2>&1 &
  102. echo $! > $PIDFILE
  103. echo "✅ MinerU vLLM started with PID: $(cat $PIDFILE)"
  104. echo "📋 Log file: $LOGFILE"
  105. echo "🌐 Service URL: http://$HOST:$PORT"
  106. echo "📖 API Documentation: http://localhost:$PORT/docs"
  107. echo ""
  108. echo "Waiting for service to start..."
  109. sleep 5
  110. status
  111. }
  112. stop() {
  113. if [ ! -f $PIDFILE ]; then
  114. echo "MinerU vLLM is not running"
  115. return 1
  116. fi
  117. PID=$(cat $PIDFILE)
  118. echo "Stopping MinerU vLLM (PID: $PID)..."
  119. # 优雅停止
  120. kill $PID
  121. # 等待进程结束
  122. for i in {1..10}; do
  123. if ! kill -0 $PID 2>/dev/null; then
  124. break
  125. fi
  126. echo "Waiting for process to stop... ($i/10)"
  127. sleep 1
  128. done
  129. # 如果进程仍在运行,强制结束
  130. if kill -0 $PID 2>/dev/null; then
  131. echo "Force killing process..."
  132. kill -9 $PID
  133. fi
  134. rm -f $PIDFILE
  135. echo "✅ MinerU vLLM stopped"
  136. }
  137. status() {
  138. if [ -f $PIDFILE ] && kill -0 $(cat $PIDFILE) 2>/dev/null; then
  139. PID=$(cat $PIDFILE)
  140. echo "✅ MinerU vLLM is running (PID: $PID)"
  141. echo "🌐 Service URL: http://$HOST:$PORT"
  142. echo "📋 Log file: $LOGFILE"
  143. # 检查端口是否被监听
  144. if command -v ss >/dev/null 2>&1; then
  145. if ss -tuln | grep -q ":$PORT "; then
  146. echo "🔗 Port $PORT is being listened"
  147. else
  148. echo "⚠️ Port $PORT is not being listened (service may be starting up)"
  149. fi
  150. elif command -v netstat >/dev/null 2>&1; then
  151. if netstat -tuln | grep -q ":$PORT "; then
  152. echo "🔗 Port $PORT is being listened"
  153. else
  154. echo "⚠️ Port $PORT is not being listened (service may be starting up)"
  155. fi
  156. fi
  157. # 检查API响应
  158. if command -v curl >/dev/null 2>&1; then
  159. if curl -s --connect-timeout 2 http://127.0.0.1:$PORT/v1/models > /dev/null 2>&1; then
  160. echo "🎯 API 响应正常"
  161. else
  162. echo "⚠️ API 无响应 (service may be starting up)"
  163. fi
  164. fi
  165. # 显示GPU使用情况
  166. if command -v nvidia-smi >/dev/null 2>&1; then
  167. echo "📊 GPU 使用情况:"
  168. nvidia-smi --query-gpu=index,utilization.gpu,utilization.memory,memory.used,memory.total --format=csv,noheader,nounits | \
  169. awk -F',' '{printf " GPU %s: GPU利用率 %s%%, 内存利用率 %s%%, 显存 %sMB/%sMB\n", $1, $2, $3, $4, $5}'
  170. fi
  171. # 显示最新日志
  172. if [ -f $LOGFILE ]; then
  173. echo "📄 Latest logs (last 3 lines):"
  174. tail -3 $LOGFILE | sed 's/^/ /'
  175. fi
  176. else
  177. echo "❌ MinerU vLLM is not running"
  178. if [ -f $PIDFILE ]; then
  179. echo "Removing stale PID file..."
  180. rm -f $PIDFILE
  181. fi
  182. fi
  183. }
  184. logs() {
  185. if [ -f $LOGFILE ]; then
  186. echo "📄 MinerU vLLM logs:"
  187. echo "=================="
  188. tail -f $LOGFILE
  189. else
  190. echo "❌ Log file not found: $LOGFILE"
  191. fi
  192. }
  193. config() {
  194. echo "📋 Current configuration:"
  195. echo " Conda Environment: $CONDA_ENV"
  196. echo " Host: $HOST"
  197. echo " Port: $PORT"
  198. echo " Model Path: $MODEL_PATH"
  199. echo " Model Name: $MODEL_NAME"
  200. echo " GPU Memory Utilization: $GPU_MEMORY_UTILIZATION"
  201. echo " CUDA Visible Devices: $CUDA_VISIBLE_DEVICES"
  202. echo " Max Model Length: $MAX_MODEL_LEN"
  203. echo " Max Num Seqs: $MAX_NUM_SEQS"
  204. echo " PID File: $PIDFILE"
  205. echo " Log File: $LOGFILE"
  206. echo ""
  207. echo " MinerU Config: $MINERU_TOOLS_CONFIG_JSON"
  208. echo " ModelScope Cache: $MODELSCOPE_CACHE"
  209. if [ -d "$MODEL_PATH" ]; then
  210. echo "✅ Model path exists"
  211. echo " Model files:"
  212. ls -la "$MODEL_PATH" | head -10 | sed 's/^/ /'
  213. if [ $(ls -1 "$MODEL_PATH" | wc -l) -gt 10 ]; then
  214. echo " ... and more files"
  215. fi
  216. else
  217. echo "❌ Model path not found"
  218. fi
  219. # 检查MinerU配置文件
  220. if [ -f "$MINERU_TOOLS_CONFIG_JSON" ]; then
  221. echo "✅ MinerU config file exists"
  222. else
  223. echo "❌ MinerU config file not found: $MINERU_TOOLS_CONFIG_JSON"
  224. fi
  225. # 显示环境信息
  226. echo ""
  227. echo "🔧 Environment:"
  228. echo " Python: $(which python 2>/dev/null || echo 'Not found')"
  229. echo " mineru-vllm-server: $(which mineru-vllm-server 2>/dev/null || echo 'Not found')"
  230. echo " Conda: $(which conda 2>/dev/null || echo 'Not found')"
  231. echo " CUDA: $(which nvcc 2>/dev/null || echo 'Not found')"
  232. # 显示GPU信息
  233. if command -v nvidia-smi >/dev/null 2>&1; then
  234. echo ""
  235. echo "🔥 GPU Information:"
  236. nvidia-smi --query-gpu=index,name,driver_version,memory.total --format=csv,noheader,nounits | \
  237. awk -F',' '{printf " GPU %s: %s (Driver: %s, Memory: %sMB)\n", $1, $2, $3, $4}'
  238. fi
  239. }
  240. test_api() {
  241. echo "🧪 Testing MinerU vLLM API..."
  242. if [ ! -f $PIDFILE ] || ! kill -0 $(cat $PIDFILE) 2>/dev/null; then
  243. echo "❌ MinerU vLLM service is not running"
  244. return 1
  245. fi
  246. if ! command -v curl >/dev/null 2>&1; then
  247. echo "❌ curl command not found"
  248. return 1
  249. fi
  250. echo "📡 Testing /v1/models endpoint..."
  251. response=$(curl -s --connect-timeout 5 http://127.0.0.1:$PORT/v1/models)
  252. if [ $? -eq 0 ]; then
  253. echo "✅ Models endpoint accessible"
  254. echo "$response" | python -m json.tool 2>/dev/null || echo "$response"
  255. else
  256. echo "❌ Models endpoint not accessible"
  257. fi
  258. echo ""
  259. echo "📡 Testing health endpoint..."
  260. health_response=$(curl -s --connect-timeout 5 http://127.0.0.1:$PORT/health)
  261. if [ $? -eq 0 ]; then
  262. echo "✅ Health endpoint accessible"
  263. echo "$health_response"
  264. else
  265. echo "❌ Health endpoint not accessible"
  266. fi
  267. }
  268. test_client() {
  269. echo "🧪 Testing MinerU client with vLLM server..."
  270. if [ ! -f $PIDFILE ] || ! kill -0 $(cat $PIDFILE) 2>/dev/null; then
  271. echo "❌ MinerU vLLM service is not running. Start it first with: $0 start"
  272. return 1
  273. fi
  274. # 测试用例文件路径(需要根据实际情况调整)
  275. TEST_IMAGE="/home/ubuntu/zhch/data/test/sample.png"
  276. TEST_OUTPUT="/tmp/mineru_vllm_test_output"
  277. if [ ! -f "$TEST_IMAGE" ]; then
  278. echo "⚠️ Test image not found: $TEST_IMAGE"
  279. echo "Please provide a test image or update the TEST_IMAGE path in the script"
  280. return 1
  281. fi
  282. echo "📄 Testing with image: $TEST_IMAGE"
  283. echo "📁 Output directory: $TEST_OUTPUT"
  284. # 使用HTTP客户端连接到vLLM服务器
  285. python -m mineru.cli.client \
  286. -p "$TEST_IMAGE" \
  287. -o "$TEST_OUTPUT" \
  288. --backend vlm-http-client \
  289. --server-url "http://127.0.0.1:$PORT"
  290. if [ $? -eq 0 ]; then
  291. echo "✅ Client test completed successfully"
  292. echo "📁 Check output in: $TEST_OUTPUT"
  293. else
  294. echo "❌ Client test failed"
  295. fi
  296. }
  297. # 显示使用帮助
  298. usage() {
  299. echo "MinerU vLLM Service Daemon"
  300. echo "=========================="
  301. echo "Usage: $0 {start|stop|restart|status|logs|config|test|test-client}"
  302. echo ""
  303. echo "Commands:"
  304. echo " start - Start the MinerU vLLM service"
  305. echo " stop - Stop the MinerU vLLM service"
  306. echo " restart - Restart the MinerU vLLM service"
  307. echo " status - Show service status and resource usage"
  308. echo " logs - Show service logs (follow mode)"
  309. echo " config - Show current configuration"
  310. echo " test - Test API endpoints"
  311. echo " test-client - Test MinerU client with vLLM server"
  312. echo ""
  313. echo "Configuration (edit script to modify):"
  314. echo " Host: $HOST"
  315. echo " Port: $PORT"
  316. echo " Model: $MODEL_PATH"
  317. echo " GPU Memory: $GPU_MEMORY_UTILIZATION"
  318. echo " CUDA Devices: $CUDA_VISIBLE_DEVICES"
  319. echo ""
  320. echo "Examples:"
  321. echo " ./mineru_vllm_daemon.sh start"
  322. echo " ./mineru_vllm_daemon.sh status"
  323. echo " ./mineru_vllm_daemon.sh logs"
  324. echo " ./mineru_vllm_daemon.sh test"
  325. echo " ./mineru_vllm_daemon.sh test-client"
  326. }
  327. case "$1" in
  328. start)
  329. start
  330. ;;
  331. stop)
  332. stop
  333. ;;
  334. restart)
  335. stop
  336. sleep 3
  337. start
  338. ;;
  339. status)
  340. status
  341. ;;
  342. logs)
  343. logs
  344. ;;
  345. config)
  346. config
  347. ;;
  348. test)
  349. test_api
  350. ;;
  351. test-client)
  352. test_client
  353. ;;
  354. *)
  355. usage
  356. exit 1
  357. ;;
  358. esac