glmocr_vllm_daemon.sh 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359
  1. #!/bin/bash
  2. # filepath: ocr_platform/ocr_tools/daemons/glmocr_vllm_daemon.sh
  3. # 对应: GLM-OCR 自部署 vLLM 服务,SDK 自托管模式 (config: maas.enabled=false, ocr_api → 本服务)
  4. # 保证transformers与vllm的兼容性, 注意先后次序,vllm安装后的transformers需重新安装
  5. # uv pip install -U vllm --torch-backend=auto --extra-index-url https://wheels.vllm.ai/nightly
  6. # uv pip install -U transformers
  7. # GLM-OCR vLLM 服务守护进程脚本
  8. LOGDIR="/home/ubuntu/zhch/logs"
  9. mkdir -p $LOGDIR
  10. PIDFILE="$LOGDIR/glmocr_vllm.pid"
  11. LOGFILE="$LOGDIR/glmocr_vllm.log"
  12. # 配置参数
  13. CONDA_ENV="mineru_2_7_1"
  14. PORT="20036"
  15. HOST="0.0.0.0"
  16. # 本地模型目录(与 config-zhch.yaml 自托管模式 ocr_api.api_port 一致)
  17. MODEL_PATH="/home/ubuntu/models/modelscope_cache/models/ZhipuAI/GLM-OCR"
  18. # 也可使用 HuggingFace 模型 id: zai-org/GLM-OCR
  19. SERVED_MODEL_NAME="glm-ocr"
  20. ALLOWED_LOCAL_MEDIA_PATH="/"
  21. # GPU 配置
  22. GPU_MEMORY_UTILIZATION="0.7"
  23. CUDA_VISIBLE_DEVICES="7"
  24. # 可选:开启 MTP 推测解码以提升推理性能
  25. ENABLE_MTP="0"
  26. MTP_NUM_SPECULATIVE_TOKENS="1"
  27. # 环境变量(按需取消注释)
  28. # export HF_HOME="/home/ubuntu/models/hf_home"
  29. # export HF_ENDPOINT="https://hf-mirror.com"
  30. # export MODELSCOPE_CACHE="/home/ubuntu/models/modelscope_cache"
  31. # 正确初始化和激活 conda 环境
  32. if [ -f "/home/ubuntu/anaconda3/etc/profile.d/conda.sh" ]; then
  33. source /home/ubuntu/anaconda3/etc/profile.d/conda.sh
  34. conda activate $CONDA_ENV
  35. elif [ -f "/opt/conda/etc/profile.d/conda.sh" ]; then
  36. source /opt/conda/etc/profile.d/conda.sh
  37. conda activate $CONDA_ENV
  38. else
  39. echo "Warning: Using direct conda path activation"
  40. export PATH="/home/ubuntu/anaconda3/envs/$CONDA_ENV/bin:$PATH"
  41. fi
  42. start() {
  43. if [ -f $PIDFILE ] && kill -0 $(cat $PIDFILE) 2>/dev/null; then
  44. echo "GLM-OCR vLLM is already running"
  45. return 1
  46. fi
  47. echo "Starting GLM-OCR vLLM daemon..."
  48. echo "Host: $HOST, Port: $PORT"
  49. echo "Model path: $MODEL_PATH"
  50. echo "Served model name: $SERVED_MODEL_NAME"
  51. echo "GPU memory utilization: $GPU_MEMORY_UTILIZATION"
  52. echo "CUDA devices: $CUDA_VISIBLE_DEVICES"
  53. # 检查模型:本地路径需存在目录,HuggingFace id (含 / 且非绝对路径) 不检查
  54. if [[ "$MODEL_PATH" == /* ]]; then
  55. if [ ! -d "$MODEL_PATH" ]; then
  56. echo "❌ Model path not found: $MODEL_PATH"
  57. echo "Use a local path or HuggingFace id (e.g. zai-org/GLM-OCR). Edit MODEL_PATH in this script."
  58. return 1
  59. fi
  60. fi
  61. # 检查 Python / vLLM
  62. if ! command -v python >/dev/null 2>&1; then
  63. echo "❌ Python not found. Check conda environment activation."
  64. return 1
  65. fi
  66. if ! python -c "import vllm" 2>/dev/null; then
  67. echo "❌ vllm not found. Install: uv pip install -U vllm --torch-backend=auto --extra-index-url https://wheels.vllm.ai/nightly"
  68. return 1
  69. fi
  70. echo "🔧 Using Python: $(which python)"
  71. echo "🔧 vLLM: $(python -c 'import vllm; print(vllm.__file__)' 2>/dev/null || true)"
  72. echo "📊 GPU 状态检查:"
  73. if command -v nvidia-smi >/dev/null 2>&1; then
  74. nvidia-smi --query-gpu=index,name,memory.used,memory.total --format=csv,noheader,nounits | \
  75. awk -F',' '{printf " GPU %s: %s - 内存: %sMB/%sMB\n", $1, $2, $3, $4}'
  76. else
  77. echo "⚠️ nvidia-smi not available"
  78. fi
  79. # 构建 vllm serve 参数
  80. if [ "$ENABLE_MTP" = "1" ]; then
  81. CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES TRANSFORMERS_USE_FAST=false nohup vllm serve "$MODEL_PATH" \
  82. --host $HOST \
  83. --port $PORT \
  84. --allowed-local-media-path $ALLOWED_LOCAL_MEDIA_PATH \
  85. --served-model-name $SERVED_MODEL_NAME \
  86. --gpu-memory-utilization $GPU_MEMORY_UTILIZATION \
  87. --speculative-config "{\"method\": \"mtp\", \"num_speculative_tokens\": $MTP_NUM_SPECULATIVE_TOKENS}" \
  88. > $LOGFILE 2>&1 &
  89. else
  90. CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES TRANSFORMERS_USE_FAST=false nohup vllm serve "$MODEL_PATH" \
  91. --host $HOST \
  92. --port $PORT \
  93. --allowed-local-media-path $ALLOWED_LOCAL_MEDIA_PATH \
  94. --served-model-name $SERVED_MODEL_NAME \
  95. --gpu-memory-utilization $GPU_MEMORY_UTILIZATION \
  96. > $LOGFILE 2>&1 &
  97. fi
  98. echo $! > $PIDFILE
  99. echo "✅ GLM-OCR vLLM started with PID: $(cat $PIDFILE)"
  100. echo "📋 Log file: $LOGFILE"
  101. echo "🌐 Service URL: http://$HOST:$PORT"
  102. echo "📖 OpenAI-compatible API: http://localhost:$PORT/v1 (chat/completions, models)"
  103. echo ""
  104. echo "Waiting for service to start..."
  105. sleep 5
  106. status
  107. }
  108. stop() {
  109. if [ ! -f $PIDFILE ]; then
  110. echo "GLM-OCR vLLM is not running"
  111. return 1
  112. fi
  113. PID=$(cat $PIDFILE)
  114. echo "Stopping GLM-OCR vLLM (PID: $PID)..."
  115. kill $PID
  116. for i in {1..30}; do
  117. if ! kill -0 $PID 2>/dev/null; then
  118. break
  119. fi
  120. echo "Waiting for process to stop... ($i/30)"
  121. sleep 1
  122. done
  123. if kill -0 $PID 2>/dev/null; then
  124. echo "Force killing process..."
  125. kill -9 $PID
  126. fi
  127. rm -f $PIDFILE
  128. echo "✅ GLM-OCR vLLM stopped"
  129. }
  130. status() {
  131. if [ -f $PIDFILE ] && kill -0 $(cat $PIDFILE) 2>/dev/null; then
  132. PID=$(cat $PIDFILE)
  133. echo "✅ GLM-OCR vLLM is running (PID: $PID)"
  134. echo "🌐 Service URL: http://$HOST:$PORT"
  135. echo "📋 Log file: $LOGFILE"
  136. if command -v ss >/dev/null 2>&1; then
  137. if ss -tuln | grep -q ":$PORT "; then
  138. echo "🔗 Port $PORT is being listened"
  139. else
  140. echo "⚠️ Port $PORT is not being listened (service may be starting up)"
  141. fi
  142. elif command -v netstat >/dev/null 2>&1; then
  143. if netstat -tuln | grep -q ":$PORT "; then
  144. echo "🔗 Port $PORT is being listened"
  145. else
  146. echo "⚠️ Port $PORT is not being listened (service may be starting up)"
  147. fi
  148. fi
  149. if command -v curl >/dev/null 2>&1; then
  150. if curl -s --connect-timeout 2 http://127.0.0.1:$PORT/v1/models > /dev/null 2>&1; then
  151. echo "🎯 API 响应正常"
  152. else
  153. echo "⚠️ API 无响应 (service may be starting up)"
  154. fi
  155. fi
  156. if command -v nvidia-smi >/dev/null 2>&1; then
  157. echo "📊 GPU 使用情况:"
  158. nvidia-smi --query-gpu=index,utilization.gpu,utilization.memory,memory.used,memory.total --format=csv,noheader,nounits | \
  159. awk -F',' '{printf " GPU %s: GPU利用率 %s%%, 内存利用率 %s%%, 显存 %sMB/%sMB\n", $1, $2, $3, $4, $5}'
  160. fi
  161. if [ -f $LOGFILE ]; then
  162. echo "📄 Latest logs (last 3 lines):"
  163. tail -3 $LOGFILE | sed 's/^/ /'
  164. fi
  165. else
  166. echo "❌ GLM-OCR vLLM is not running"
  167. if [ -f $PIDFILE ]; then
  168. echo "Removing stale PID file..."
  169. rm -f $PIDFILE
  170. fi
  171. fi
  172. }
  173. logs() {
  174. if [ -f $LOGFILE ]; then
  175. echo "📄 GLM-OCR vLLM logs:"
  176. echo "====================="
  177. tail -f $LOGFILE
  178. else
  179. echo "❌ Log file not found: $LOGFILE"
  180. fi
  181. }
  182. config() {
  183. echo "📋 Current configuration:"
  184. echo " Conda Environment: $CONDA_ENV"
  185. echo " Host: $HOST"
  186. echo " Port: $PORT"
  187. echo " Model Path: $MODEL_PATH"
  188. echo " Served Model Name: $SERVED_MODEL_NAME"
  189. echo " Allowed Local Media Path: $ALLOWED_LOCAL_MEDIA_PATH"
  190. echo " GPU Memory Utilization: $GPU_MEMORY_UTILIZATION"
  191. echo " CUDA Visible Devices: $CUDA_VISIBLE_DEVICES"
  192. echo " Enable MTP: $ENABLE_MTP"
  193. echo " PID File: $PIDFILE"
  194. echo " Log File: $LOGFILE"
  195. if [ -d "$MODEL_PATH" ]; then
  196. echo "✅ Model path exists"
  197. echo " Model files:"
  198. ls -la "$MODEL_PATH" | head -10 | sed 's/^/ /'
  199. if [ $(ls -1 "$MODEL_PATH" 2>/dev/null | wc -l) -gt 10 ]; then
  200. echo " ... and more files"
  201. fi
  202. else
  203. echo "❌ Model path not found (use HuggingFace id like zai-org/GLM-OCR by setting MODEL_PATH)"
  204. fi
  205. echo ""
  206. echo "🔧 Environment:"
  207. echo " Python: $(which python 2>/dev/null || echo 'Not found')"
  208. echo " vLLM: $(python -c 'import vllm; print(vllm.__file__)' 2>/dev/null || echo 'Not found')"
  209. echo " Conda: $(which conda 2>/dev/null || echo 'Not found')"
  210. echo " CUDA: $(which nvcc 2>/dev/null || echo 'Not found')"
  211. if command -v nvidia-smi >/dev/null 2>&1; then
  212. echo ""
  213. echo "🔥 GPU Information:"
  214. nvidia-smi --query-gpu=index,name,driver_version,memory.total --format=csv,noheader,nounits | \
  215. awk -F',' '{printf " GPU %s: %s (Driver: %s, Memory: %sMB)\n", $1, $2, $3, $4}'
  216. fi
  217. }
  218. test_api() {
  219. echo "🧪 Testing GLM-OCR vLLM API..."
  220. if [ ! -f $PIDFILE ] || ! kill -0 $(cat $PIDFILE) 2>/dev/null; then
  221. echo "❌ GLM-OCR vLLM service is not running"
  222. return 1
  223. fi
  224. if ! command -v curl >/dev/null 2>&1; then
  225. echo "❌ curl command not found"
  226. return 1
  227. fi
  228. echo "📡 Testing /v1/models endpoint..."
  229. response=$(curl -s --connect-timeout 10 http://127.0.0.1:$PORT/v1/models)
  230. if [ $? -eq 0 ]; then
  231. echo "✅ Models endpoint accessible"
  232. echo "$response" | python -m json.tool 2>/dev/null || echo "$response"
  233. else
  234. echo "❌ Models endpoint not accessible"
  235. fi
  236. }
  237. test_client() {
  238. echo "🧪 Testing GLM-OCR SDK with vLLM server (self-hosted mode)..."
  239. if [ ! -f $PIDFILE ] || ! kill -0 $(cat $PIDFILE) 2>/dev/null; then
  240. echo "❌ GLM-OCR vLLM service is not running. Start it first: $0 start"
  241. return 1
  242. fi
  243. TEST_IMAGE="/home/ubuntu/zhch/data/test/sample.png"
  244. if [ ! -f "$TEST_IMAGE" ]; then
  245. echo "⚠️ Test image not found: $TEST_IMAGE"
  246. echo "Update TEST_IMAGE in this script or create the file."
  247. return 1
  248. fi
  249. echo "📄 Test image: $TEST_IMAGE"
  250. echo "Run GLM-OCR with config that has maas.enabled=false and ocr_api pointing to 127.0.0.1:$PORT"
  251. echo "Example: glmocr parse $TEST_IMAGE --config /path/to/config.yaml"
  252. echo ""
  253. echo "Or start GLM-OCR Flask server (layout+OCR) that uses this vLLM backend:"
  254. echo " glmocr server --config /path/to/config-zhch.yaml # with maas.enabled=false, ocr_api.api_port=$PORT"
  255. echo "Then: curl -X POST http://localhost:5002/glmocr/parse -H 'Content-Type: application/json' -d '{\"images\": [\"file://$TEST_IMAGE\"]}'"
  256. }
  257. usage() {
  258. echo "GLM-OCR vLLM Service Daemon"
  259. echo "==========================="
  260. echo "Usage: $0 {start|stop|restart|status|logs|config|test|test-client}"
  261. echo ""
  262. echo "Commands:"
  263. echo " start - Start the GLM-OCR vLLM service"
  264. echo " stop - Stop the GLM-OCR vLLM service"
  265. echo " restart - Restart the GLM-OCR vLLM service"
  266. echo " status - Show service status and resource usage"
  267. echo " logs - Show service logs (follow mode)"
  268. echo " config - Show current configuration"
  269. echo " test - Test /v1/models API endpoint"
  270. echo " test-client - Show how to test SDK/Flask with this vLLM backend"
  271. echo ""
  272. echo "Configuration (edit script to modify):"
  273. echo " Host: $HOST"
  274. echo " Port: $PORT"
  275. echo " Model Path: $MODEL_PATH"
  276. echo " Served Model Name: $SERVED_MODEL_NAME"
  277. echo " GPU Memory: $GPU_MEMORY_UTILIZATION"
  278. echo " CUDA Devices: $CUDA_VISIBLE_DEVICES"
  279. echo " Enable MTP: $ENABLE_MTP"
  280. echo ""
  281. echo "Examples:"
  282. echo " ./glmocr_vllm_daemon.sh start"
  283. echo " ./glmocr_vllm_daemon.sh status"
  284. echo " ./glmocr_vllm_daemon.sh logs"
  285. echo " ./glmocr_vllm_daemon.sh test"
  286. }
  287. case "$1" in
  288. start)
  289. start
  290. ;;
  291. stop)
  292. stop
  293. ;;
  294. restart)
  295. stop
  296. sleep 3
  297. start
  298. ;;
  299. status)
  300. status
  301. ;;
  302. logs)
  303. logs
  304. ;;
  305. config)
  306. config
  307. ;;
  308. test)
  309. test_api
  310. ;;
  311. test-client)
  312. test_client
  313. ;;
  314. *)
  315. usage
  316. exit 1
  317. ;;
  318. esac