mineru_vllm_daemon.sh 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393
  1. #!/bin/bash
  2. # filepath: ocr_platform/ocr_tools/daemons/mineru_vllm_daemon.sh
  3. # 对应客户端工具: ocr_tools/mineru_vl_tool/main.py
  4. # MinerU vLLM 服务守护进程脚本
  5. LOGDIR="/home/ubuntu/zhch/logs"
  6. mkdir -p $LOGDIR
  7. PIDFILE="$LOGDIR/mineru_vllm.pid"
  8. LOGFILE="$LOGDIR/mineru_vllm.log"
  9. # 配置参数
  10. CONDA_ENV="mineru2"
  11. PORT="8121"
  12. HOST="0.0.0.0"
  13. MODEL_PATH="/home/ubuntu/models/modelscope_cache/models/OpenDataLab/MinerU2___5-2509-1___2B"
  14. MODEL_NAME="MinerU2.5"
  15. # GPU 配置
  16. GPU_MEMORY_UTILIZATION="0.3"
  17. CUDA_VISIBLE_DEVICES="4"
  18. MAX_MODEL_LEN="16384"
  19. MAX_NUM_BATCHED_TOKENS="8192"
  20. MAX_NUM_SEQS="8"
  21. # MinerU 配置
  22. export MINERU_TOOLS_CONFIG_JSON="/home/ubuntu/zhch/MinerU/mineru.json"
  23. export MODELSCOPE_CACHE="/home/ubuntu/models/modelscope_cache"
  24. export USE_MODELSCOPE_HUB=1
  25. # export CUDA_VISIBLE_DEVICES="$CUDA_VISIBLE_DEVICES"
  26. # export NLTK_DATA="/home/ubuntu/nltk_data"
  27. # export HF_HOME="/home/ubuntu/models/hf_home"
  28. # export HF_ENDPOINT="https://hf-mirror.com"
  29. # export TORCH_HOME="/home/ubuntu/models/torch/"
  30. # 正确初始化和激活conda环境
  31. if [ -f "/home/ubuntu/anaconda3/etc/profile.d/conda.sh" ]; then
  32. source /home/ubuntu/anaconda3/etc/profile.d/conda.sh
  33. conda activate $CONDA_ENV
  34. elif [ -f "/opt/conda/etc/profile.d/conda.sh" ]; then
  35. source /opt/conda/etc/profile.d/conda.sh
  36. conda activate $CONDA_ENV
  37. else
  38. # 方法2:直接使用conda可执行文件路径
  39. echo "Warning: Using direct conda path activation"
  40. export PATH="/home/ubuntu/anaconda3/envs/$CONDA_ENV/bin:$PATH"
  41. fi
  42. # 设置环境变量
  43. # export LD_LIBRARY_PATH="/usr/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH"
  44. # export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
  45. start() {
  46. if [ -f $PIDFILE ] && kill -0 $(cat $PIDFILE) 2>/dev/null; then
  47. echo "MinerU vLLM is already running"
  48. return 1
  49. fi
  50. echo "Starting MinerU vLLM daemon..."
  51. echo "Host: $HOST, Port: $PORT"
  52. echo "Model path: $MODEL_PATH"
  53. echo "GPU memory utilization: $GPU_MEMORY_UTILIZATION"
  54. echo "CUDA devices: $CUDA_VISIBLE_DEVICES"
  55. # 检查模型文件是否存在
  56. if [ ! -d "$MODEL_PATH" ]; then
  57. echo "❌ Model path not found: $MODEL_PATH"
  58. echo "Please download the model first:"
  59. echo "python -m mineru.cli.models_download"
  60. return 1
  61. fi
  62. # 检查conda环境
  63. if ! command -v python >/dev/null 2>&1; then
  64. echo "❌ Python not found. Check conda environment activation."
  65. return 1
  66. fi
  67. # 检查mineru-vllm-server命令
  68. if ! command -v mineru-vllm-server >/dev/null 2>&1; then
  69. echo "❌ mineru-vllm-server not found. Check installation and environment."
  70. return 1
  71. fi
  72. echo "🔧 Using Python: $(which python)"
  73. echo "🔧 Using mineru-vllm-server: $(which mineru-vllm-server)"
  74. # 显示GPU状态
  75. echo "📊 GPU 状态检查:"
  76. if command -v nvidia-smi >/dev/null 2>&1; then
  77. nvidia-smi --query-gpu=index,name,memory.used,memory.total --format=csv,noheader,nounits | \
  78. awk -F',' '{printf " GPU %s: %s - 内存: %sMB/%sMB\n", $1, $2, $3, $4}'
  79. else
  80. echo "⚠️ nvidia-smi not available"
  81. fi
  82. # 启动MinerU vLLM服务
  83. CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES nohup mineru-vllm-server \
  84. --host $HOST \
  85. --port $PORT \
  86. --gpu-memory-utilization $GPU_MEMORY_UTILIZATION \
  87. --served-model-name $MODEL_NAME \
  88. > $LOGFILE 2>&1 &
  89. echo $! > $PIDFILE
  90. echo "✅ MinerU vLLM started with PID: $(cat $PIDFILE)"
  91. echo "📋 Log file: $LOGFILE"
  92. echo "🌐 Service URL: http://$HOST:$PORT"
  93. echo "📖 API Documentation: http://localhost:$PORT/docs"
  94. echo ""
  95. echo "Waiting for service to start..."
  96. sleep 5
  97. status
  98. }
  99. stop() {
  100. if [ ! -f $PIDFILE ]; then
  101. echo "MinerU vLLM is not running"
  102. return 1
  103. fi
  104. PID=$(cat $PIDFILE)
  105. echo "Stopping MinerU vLLM (PID: $PID)..."
  106. # 优雅停止
  107. kill $PID
  108. # 等待进程结束
  109. for i in {1..10}; do
  110. if ! kill -0 $PID 2>/dev/null; then
  111. break
  112. fi
  113. echo "Waiting for process to stop... ($i/10)"
  114. sleep 1
  115. done
  116. # 如果进程仍在运行,强制结束
  117. if kill -0 $PID 2>/dev/null; then
  118. echo "Force killing process..."
  119. kill -9 $PID
  120. fi
  121. rm -f $PIDFILE
  122. echo "✅ MinerU vLLM stopped"
  123. }
  124. status() {
  125. if [ -f $PIDFILE ] && kill -0 $(cat $PIDFILE) 2>/dev/null; then
  126. PID=$(cat $PIDFILE)
  127. echo "✅ MinerU vLLM is running (PID: $PID)"
  128. echo "🌐 Service URL: http://$HOST:$PORT"
  129. echo "📋 Log file: $LOGFILE"
  130. # 检查端口是否被监听
  131. if command -v ss >/dev/null 2>&1; then
  132. if ss -tuln | grep -q ":$PORT "; then
  133. echo "🔗 Port $PORT is being listened"
  134. else
  135. echo "⚠️ Port $PORT is not being listened (service may be starting up)"
  136. fi
  137. elif command -v netstat >/dev/null 2>&1; then
  138. if netstat -tuln | grep -q ":$PORT "; then
  139. echo "🔗 Port $PORT is being listened"
  140. else
  141. echo "⚠️ Port $PORT is not being listened (service may be starting up)"
  142. fi
  143. fi
  144. # 检查API响应
  145. if command -v curl >/dev/null 2>&1; then
  146. if curl -s --connect-timeout 2 http://127.0.0.1:$PORT/v1/models > /dev/null 2>&1; then
  147. echo "🎯 API 响应正常"
  148. else
  149. echo "⚠️ API 无响应 (service may be starting up)"
  150. fi
  151. fi
  152. # 显示GPU使用情况
  153. if command -v nvidia-smi >/dev/null 2>&1; then
  154. echo "📊 GPU 使用情况:"
  155. nvidia-smi --query-gpu=index,utilization.gpu,utilization.memory,memory.used,memory.total --format=csv,noheader,nounits | \
  156. awk -F',' '{printf " GPU %s: GPU利用率 %s%%, 内存利用率 %s%%, 显存 %sMB/%sMB\n", $1, $2, $3, $4, $5}'
  157. fi
  158. # 显示最新日志
  159. if [ -f $LOGFILE ]; then
  160. echo "📄 Latest logs (last 3 lines):"
  161. tail -3 $LOGFILE | sed 's/^/ /'
  162. fi
  163. else
  164. echo "❌ MinerU vLLM is not running"
  165. if [ -f $PIDFILE ]; then
  166. echo "Removing stale PID file..."
  167. rm -f $PIDFILE
  168. fi
  169. fi
  170. }
  171. logs() {
  172. if [ -f $LOGFILE ]; then
  173. echo "📄 MinerU vLLM logs:"
  174. echo "=================="
  175. tail -f $LOGFILE
  176. else
  177. echo "❌ Log file not found: $LOGFILE"
  178. fi
  179. }
  180. config() {
  181. echo "📋 Current configuration:"
  182. echo " Conda Environment: $CONDA_ENV"
  183. echo " Host: $HOST"
  184. echo " Port: $PORT"
  185. echo " Model Path: $MODEL_PATH"
  186. echo " Model Name: $MODEL_NAME"
  187. echo " GPU Memory Utilization: $GPU_MEMORY_UTILIZATION"
  188. echo " CUDA Visible Devices: $CUDA_VISIBLE_DEVICES"
  189. echo " Max Model Length: $MAX_MODEL_LEN"
  190. echo " Max Num Seqs: $MAX_NUM_SEQS"
  191. echo " PID File: $PIDFILE"
  192. echo " Log File: $LOGFILE"
  193. echo ""
  194. echo " MinerU Config: $MINERU_TOOLS_CONFIG_JSON"
  195. echo " ModelScope Cache: $MODELSCOPE_CACHE"
  196. if [ -d "$MODEL_PATH" ]; then
  197. echo "✅ Model path exists"
  198. echo " Model files:"
  199. ls -la "$MODEL_PATH" | head -10 | sed 's/^/ /'
  200. if [ $(ls -1 "$MODEL_PATH" | wc -l) -gt 10 ]; then
  201. echo " ... and more files"
  202. fi
  203. else
  204. echo "❌ Model path not found"
  205. fi
  206. # 检查MinerU配置文件
  207. if [ -f "$MINERU_TOOLS_CONFIG_JSON" ]; then
  208. echo "✅ MinerU config file exists"
  209. else
  210. echo "❌ MinerU config file not found: $MINERU_TOOLS_CONFIG_JSON"
  211. fi
  212. # 显示环境信息
  213. echo ""
  214. echo "🔧 Environment:"
  215. echo " Python: $(which python 2>/dev/null || echo 'Not found')"
  216. echo " mineru-vllm-server: $(which mineru-vllm-server 2>/dev/null || echo 'Not found')"
  217. echo " Conda: $(which conda 2>/dev/null || echo 'Not found')"
  218. echo " CUDA: $(which nvcc 2>/dev/null || echo 'Not found')"
  219. # 显示GPU信息
  220. if command -v nvidia-smi >/dev/null 2>&1; then
  221. echo ""
  222. echo "🔥 GPU Information:"
  223. nvidia-smi --query-gpu=index,name,driver_version,memory.total --format=csv,noheader,nounits | \
  224. awk -F',' '{printf " GPU %s: %s (Driver: %s, Memory: %sMB)\n", $1, $2, $3, $4}'
  225. fi
  226. }
  227. test_api() {
  228. echo "🧪 Testing MinerU vLLM API..."
  229. if [ ! -f $PIDFILE ] || ! kill -0 $(cat $PIDFILE) 2>/dev/null; then
  230. echo "❌ MinerU vLLM service is not running"
  231. return 1
  232. fi
  233. if ! command -v curl >/dev/null 2>&1; then
  234. echo "❌ curl command not found"
  235. return 1
  236. fi
  237. echo "📡 Testing /v1/models endpoint..."
  238. response=$(curl -s --connect-timeout 5 http://127.0.0.1:$PORT/v1/models)
  239. if [ $? -eq 0 ]; then
  240. echo "✅ Models endpoint accessible"
  241. echo "$response" | python -m json.tool 2>/dev/null || echo "$response"
  242. else
  243. echo "❌ Models endpoint not accessible"
  244. fi
  245. echo ""
  246. echo "📡 Testing health endpoint..."
  247. health_response=$(curl -s --connect-timeout 5 http://127.0.0.1:$PORT/health)
  248. if [ $? -eq 0 ]; then
  249. echo "✅ Health endpoint accessible"
  250. echo "$health_response"
  251. else
  252. echo "❌ Health endpoint not accessible"
  253. fi
  254. }
  255. test_client() {
  256. echo "🧪 Testing MinerU client with vLLM server..."
  257. if [ ! -f $PIDFILE ] || ! kill -0 $(cat $PIDFILE) 2>/dev/null; then
  258. echo "❌ MinerU vLLM service is not running. Start it first with: $0 start"
  259. return 1
  260. fi
  261. # 测试用例文件路径(需要根据实际情况调整)
  262. TEST_IMAGE="/home/ubuntu/zhch/data/test/sample.png"
  263. TEST_OUTPUT="/tmp/mineru_vllm_test_output"
  264. if [ ! -f "$TEST_IMAGE" ]; then
  265. echo "⚠️ Test image not found: $TEST_IMAGE"
  266. echo "Please provide a test image or update the TEST_IMAGE path in the script"
  267. return 1
  268. fi
  269. echo "📄 Testing with image: $TEST_IMAGE"
  270. echo "📁 Output directory: $TEST_OUTPUT"
  271. # 使用HTTP客户端连接到vLLM服务器
  272. python -m mineru.cli.client \
  273. -p "$TEST_IMAGE" \
  274. -o "$TEST_OUTPUT" \
  275. --backend vlm-http-client \
  276. --server-url "http://127.0.0.1:$PORT"
  277. if [ $? -eq 0 ]; then
  278. echo "✅ Client test completed successfully"
  279. echo "📁 Check output in: $TEST_OUTPUT"
  280. else
  281. echo "❌ Client test failed"
  282. fi
  283. }
  284. # 显示使用帮助
  285. usage() {
  286. echo "MinerU vLLM Service Daemon"
  287. echo "=========================="
  288. echo "Usage: $0 {start|stop|restart|status|logs|config|test|test-client}"
  289. echo ""
  290. echo "Commands:"
  291. echo " start - Start the MinerU vLLM service"
  292. echo " stop - Stop the MinerU vLLM service"
  293. echo " restart - Restart the MinerU vLLM service"
  294. echo " status - Show service status and resource usage"
  295. echo " logs - Show service logs (follow mode)"
  296. echo " config - Show current configuration"
  297. echo " test - Test API endpoints"
  298. echo " test-client - Test MinerU client with vLLM server"
  299. echo ""
  300. echo "Configuration (edit script to modify):"
  301. echo " Host: $HOST"
  302. echo " Port: $PORT"
  303. echo " Model: $MODEL_PATH"
  304. echo " GPU Memory: $GPU_MEMORY_UTILIZATION"
  305. echo " CUDA Devices: $CUDA_VISIBLE_DEVICES"
  306. echo ""
  307. echo "Examples:"
  308. echo " ./mineru_vllm_daemon.sh start"
  309. echo " ./mineru_vllm_daemon.sh status"
  310. echo " ./mineru_vllm_daemon.sh logs"
  311. echo " ./mineru_vllm_daemon.sh test"
  312. echo " ./mineru_vllm_daemon.sh test-client"
  313. }
  314. case "$1" in
  315. start)
  316. start
  317. ;;
  318. stop)
  319. stop
  320. ;;
  321. restart)
  322. stop
  323. sleep 3
  324. start
  325. ;;
  326. status)
  327. status
  328. ;;
  329. logs)
  330. logs
  331. ;;
  332. config)
  333. config
  334. ;;
  335. test)
  336. test_api
  337. ;;
  338. test-client)
  339. test_client
  340. ;;
  341. *)
  342. usage
  343. exit 1
  344. ;;
  345. esac