vllm_daemon.sh 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350
  1. #!/bin/bash
  2. # filepath: /home/ubuntu/zhch/dots.ocr/zhch/vllm_daemon.sh
  3. # DotsOCR vLLM 服务守护进程脚本
  4. LOGDIR="/home/ubuntu/zhch/logs"
  5. mkdir -p $LOGDIR
  6. PIDFILE="$LOGDIR/vllm.pid"
  7. LOGFILE="$LOGDIR/vllm.log"
  8. # 配置参数
  9. CONDA_ENV="dots.ocr"
  10. PORT="8101"
  11. HOST="0.0.0.0"
  12. HF_MODEL_PATH="/home/ubuntu/zhch/dots.ocr/weights/DotsOCR"
  13. MODEL_NAME="DotsOCR"
  14. # GPU 配置
  15. GPU_MEMORY_UTILIZATION="0.70"
  16. DATA_PARALLEL_SIZE="1" # 从3改为1,避免内存不足
  17. MAX_MODEL_LEN="32768"
  18. MAX_NUM_BATCHED_TOKENS="32768"
  19. MAX_NUM_SEQS="16"
  20. # 正确初始化和激活conda环境
  21. if [ -f "/home/ubuntu/anaconda3/etc/profile.d/conda.sh" ]; then
  22. source /home/ubuntu/anaconda3/etc/profile.d/conda.sh
  23. conda activate $CONDA_ENV
  24. elif [ -f "/opt/conda/etc/profile.d/conda.sh" ]; then
  25. source /opt/conda/etc/profile.d/conda.sh
  26. conda activate $CONDA_ENV
  27. else
  28. # 方法2:直接使用conda可执行文件路径
  29. echo "Warning: Using direct conda path activation"
  30. export PATH="/home/ubuntu/anaconda3/envs/$CONDA_ENV/bin:$PATH"
  31. fi
  32. # 设置环境变量
  33. export PYTHONPATH=$(dirname "$HF_MODEL_PATH"):$PYTHONPATH
  34. export LD_LIBRARY_PATH="/usr/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH"
  35. export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
  36. # 注册 DotsOCR 模型到 vLLM
  37. register_model() {
  38. echo "🔧 注册 DotsOCR 模型到 vLLM..."
  39. vllm_path=$(which vllm)
  40. if [ -z "$vllm_path" ]; then
  41. echo "❌ vLLM 未找到,请检查安装和环境激活"
  42. return 1
  43. fi
  44. if ! grep -q "from DotsOCR import modeling_dots_ocr_vllm" "$vllm_path"; then
  45. sed -i '/^from vllm\.entrypoints\.cli\.main import main$/a\
  46. from DotsOCR import modeling_dots_ocr_vllm' "$vllm_path"
  47. echo "✅ DotsOCR 模型已注册到 vLLM"
  48. else
  49. echo "✅ DotsOCR 模型已经注册过了"
  50. fi
  51. }
  52. start() {
  53. if [ -f $PIDFILE ] && kill -0 $(cat $PIDFILE) 2>/dev/null; then
  54. echo "vLLM DotsOCR is already running"
  55. return 1
  56. fi
  57. echo "Starting vLLM DotsOCR daemon..."
  58. echo "Host: $HOST, Port: $PORT"
  59. echo "Model path: $HF_MODEL_PATH"
  60. echo "GPU memory utilization: $GPU_MEMORY_UTILIZATION"
  61. echo "Data parallel size: $DATA_PARALLEL_SIZE"
  62. # 检查模型文件是否存在
  63. if [ ! -d "$HF_MODEL_PATH" ]; then
  64. echo "❌ Model path not found: $HF_MODEL_PATH"
  65. return 1
  66. fi
  67. # 检查conda环境
  68. if ! command -v python >/dev/null 2>&1; then
  69. echo "❌ Python not found. Check conda environment activation."
  70. return 1
  71. fi
  72. # 检查vllm命令
  73. if ! command -v vllm >/dev/null 2>&1; then
  74. echo "❌ vLLM not found. Check installation and environment."
  75. return 1
  76. fi
  77. echo "🔧 Using Python: $(which python)"
  78. echo "🔧 Using vLLM: $(which vllm)"
  79. # 注册模型
  80. register_model
  81. if [ $? -ne 0 ]; then
  82. echo "❌ Model registration failed"
  83. return 1
  84. fi
  85. # 显示GPU状态
  86. echo "📊 GPU 状态检查:"
  87. if command -v nvidia-smi >/dev/null 2>&1; then
  88. nvidia-smi --query-gpu=index,name,memory.used,memory.total --format=csv,noheader,nounits | \
  89. awk -F',' '{printf " GPU %s: %s - 内存: %sMB/%sMB\n", $1, $2, $3, $4}'
  90. else
  91. echo "⚠️ nvidia-smi not available"
  92. fi
  93. # 启动vLLM服务
  94. nohup vllm serve $HF_MODEL_PATH \
  95. --host $HOST \
  96. --port $PORT \
  97. --gpu-memory-utilization $GPU_MEMORY_UTILIZATION \
  98. --max-log-len 1000 \
  99. --trust-remote-code \
  100. --max-model-len $MAX_MODEL_LEN \
  101. --max-num-batched-tokens $MAX_NUM_BATCHED_TOKENS \
  102. --uvicorn-log-level info \
  103. --limit-mm-per-prompt '{"image": 1}' \
  104. --chat-template-content-format string \
  105. --data-parallel-size $DATA_PARALLEL_SIZE \
  106. --max-num-seqs $MAX_NUM_SEQS \
  107. --enable-prefix-caching \
  108. --served-model-name $MODEL_NAME \
  109. > $LOGFILE 2>&1 &
  110. echo $! > $PIDFILE
  111. echo "✅ vLLM DotsOCR started with PID: $(cat $PIDFILE)"
  112. echo "📋 Log file: $LOGFILE"
  113. echo "🌐 Service URL: http://$HOST:$PORT"
  114. echo "📖 API Documentation: http://localhost:$PORT/docs"
  115. }
  116. stop() {
  117. if [ ! -f $PIDFILE ]; then
  118. echo "vLLM DotsOCR is not running"
  119. return 1
  120. fi
  121. PID=$(cat $PIDFILE)
  122. echo "Stopping vLLM DotsOCR (PID: $PID)..."
  123. # 优雅停止
  124. kill $PID
  125. # 等待进程结束
  126. for i in {1..10}; do
  127. if ! kill -0 $PID 2>/dev/null; then
  128. break
  129. fi
  130. echo "Waiting for process to stop... ($i/10)"
  131. sleep 1
  132. done
  133. # 如果进程仍在运行,强制结束
  134. if kill -0 $PID 2>/dev/null; then
  135. echo "Force killing process..."
  136. kill -9 $PID
  137. fi
  138. rm -f $PIDFILE
  139. echo "✅ vLLM DotsOCR stopped"
  140. }
  141. status() {
  142. if [ -f $PIDFILE ] && kill -0 $(cat $PIDFILE) 2>/dev/null; then
  143. PID=$(cat $PIDFILE)
  144. echo "✅ vLLM DotsOCR is running (PID: $PID)"
  145. echo "🌐 Service URL: http://$HOST:$PORT"
  146. echo "📋 Log file: $LOGFILE"
  147. # 检查端口是否被监听
  148. if command -v ss >/dev/null 2>&1; then
  149. if ss -tuln | grep -q ":$PORT "; then
  150. echo "🔗 Port $PORT is being listened"
  151. else
  152. echo "⚠️ Port $PORT is not being listened (service may be starting up)"
  153. fi
  154. elif command -v netstat >/dev/null 2>&1; then
  155. if netstat -tuln | grep -q ":$PORT "; then
  156. echo "🔗 Port $PORT is being listened"
  157. else
  158. echo "⚠️ Port $PORT is not being listened (service may be starting up)"
  159. fi
  160. fi
  161. # 检查API响应
  162. if command -v curl >/dev/null 2>&1; then
  163. if curl -s --connect-timeout 2 http://127.0.0.1:$PORT/v1/models > /dev/null 2>&1; then
  164. echo "🎯 API 响应正常"
  165. else
  166. echo "⚠️ API 无响应 (service may be starting up)"
  167. fi
  168. fi
  169. # 显示GPU使用情况
  170. if command -v nvidia-smi >/dev/null 2>&1; then
  171. echo "📊 GPU 使用情况:"
  172. nvidia-smi --query-gpu=index,utilization.gpu,utilization.memory,memory.used,memory.total --format=csv,noheader,nounits | \
  173. awk -F',' '{printf " GPU %s: GPU利用率 %s%%, 内存利用率 %s%%, 显存 %sMB/%sMB\n", $1, $2, $3, $4, $5}'
  174. fi
  175. # 显示最新日志
  176. if [ -f $LOGFILE ]; then
  177. echo "📄 Latest logs (last 3 lines):"
  178. tail -3 $LOGFILE | sed 's/^/ /'
  179. fi
  180. else
  181. echo "❌ vLLM DotsOCR is not running"
  182. if [ -f $PIDFILE ]; then
  183. echo "Removing stale PID file..."
  184. rm -f $PIDFILE
  185. fi
  186. fi
  187. }
  188. logs() {
  189. if [ -f $LOGFILE ]; then
  190. echo "📄 vLLM DotsOCR logs:"
  191. echo "=================="
  192. tail -f $LOGFILE
  193. else
  194. echo "❌ Log file not found: $LOGFILE"
  195. fi
  196. }
  197. config() {
  198. echo "📋 Current configuration:"
  199. echo " Conda Environment: $CONDA_ENV"
  200. echo " Host: $HOST"
  201. echo " Port: $PORT"
  202. echo " Model Path: $HF_MODEL_PATH"
  203. echo " Model Name: $MODEL_NAME"
  204. echo " GPU Memory Utilization: $GPU_MEMORY_UTILIZATION"
  205. echo " Data Parallel Size: $DATA_PARALLEL_SIZE"
  206. echo " Max Model Length: $MAX_MODEL_LEN"
  207. echo " Max Num Seqs: $MAX_NUM_SEQS"
  208. echo " PID File: $PIDFILE"
  209. echo " Log File: $LOGFILE"
  210. if [ -d "$HF_MODEL_PATH" ]; then
  211. echo "✅ Model path exists"
  212. echo " Model files:"
  213. ls -la "$HF_MODEL_PATH" | head -10 | sed 's/^/ /'
  214. if [ $(ls -1 "$HF_MODEL_PATH" | wc -l) -gt 10 ]; then
  215. echo " ... and more files"
  216. fi
  217. else
  218. echo "❌ Model path not found"
  219. fi
  220. # 显示环境信息
  221. echo ""
  222. echo "🔧 Environment:"
  223. echo " Python: $(which python 2>/dev/null || echo 'Not found')"
  224. echo " vLLM: $(which vllm 2>/dev/null || echo 'Not found')"
  225. echo " Conda: $(which conda 2>/dev/null || echo 'Not found')"
  226. echo " CUDA: $(which nvcc 2>/dev/null || echo 'Not found')"
  227. # 显示GPU信息
  228. if command -v nvidia-smi >/dev/null 2>&1; then
  229. echo ""
  230. echo "🔥 GPU Information:"
  231. nvidia-smi --query-gpu=index,name,driver_version,memory.total --format=csv,noheader,nounits | \
  232. awk -F',' '{printf " GPU %s: %s (Driver: %s, Memory: %sMB)\n", $1, $2, $3, $4}'
  233. fi
  234. }
  235. test_api() {
  236. echo "🧪 Testing vLLM DotsOCR API..."
  237. if [ ! -f $PIDFILE ] || ! kill -0 $(cat $PIDFILE) 2>/dev/null; then
  238. echo "❌ vLLM service is not running"
  239. return 1
  240. fi
  241. if ! command -v curl >/dev/null 2>&1; then
  242. echo "❌ curl command not found"
  243. return 1
  244. fi
  245. echo "📡 Testing /v1/models endpoint..."
  246. response=$(curl -s --connect-timeout 5 http://127.0.0.1:$PORT/v1/models)
  247. if [ $? -eq 0 ]; then
  248. echo "✅ Models endpoint accessible"
  249. echo "$response" | python -m json.tool 2>/dev/null || echo "$response"
  250. else
  251. echo "❌ Models endpoint not accessible"
  252. fi
  253. }
  254. # 显示使用帮助
  255. usage() {
  256. echo "vLLM DotsOCR Service Daemon"
  257. echo "============================"
  258. echo "Usage: $0 {start|stop|restart|status|logs|config|test}"
  259. echo ""
  260. echo "Commands:"
  261. echo " start - Start the vLLM DotsOCR service"
  262. echo " stop - Stop the vLLM DotsOCR service"
  263. echo " restart - Restart the vLLM DotsOCR service"
  264. echo " status - Show service status and resource usage"
  265. echo " logs - Show service logs (follow mode)"
  266. echo " config - Show current configuration"
  267. echo " test - Test API endpoints"
  268. echo ""
  269. echo "Configuration (edit script to modify):"
  270. echo " Host: $HOST"
  271. echo " Port: $PORT"
  272. echo " Model: $HF_MODEL_PATH"
  273. echo " GPU Memory: $GPU_MEMORY_UTILIZATION"
  274. echo " Parallel Size: $DATA_PARALLEL_SIZE"
  275. echo ""
  276. echo "Examples:"
  277. echo " ./vllm_daemon.sh start"
  278. echo " ./vllm_daemon.sh status"
  279. echo " ./vllm_daemon.sh logs"
  280. echo " ./vllm_daemon.sh test"
  281. }
  282. case "$1" in
  283. start)
  284. start
  285. ;;
  286. stop)
  287. stop
  288. ;;
  289. restart)
  290. stop
  291. sleep 3
  292. start
  293. ;;
  294. status)
  295. status
  296. ;;
  297. logs)
  298. logs
  299. ;;
  300. config)
  301. config
  302. ;;
  303. test)
  304. test_api
  305. ;;
  306. *)
  307. usage
  308. exit 1
  309. ;;
  310. esac