mineru_fast_api_daemon.sh 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411
  1. #!/bin/bash
  2. # MinerU FastAPI 服务守护进程脚本
  3. LOGDIR="/home/ubuntu/zhch/logs"
  4. mkdir -p $LOGDIR
  5. PIDFILE="$LOGDIR/mineru_fastapi.pid"
  6. LOGFILE="$LOGDIR/mineru_fastapi.log"
  7. # 配置参数
  8. CONDA_ENV="mineru2"
  9. API_PORT="8120"
  10. API_HOST="0.0.0.0"
  11. # vLLM 后端配置(当使用 vlm-vllm-engine 后端时)
  12. VLLM_PORT="8121"
  13. VLLM_HOST="127.0.0.1"
  14. MODEL_PATH="/home/ubuntu/models/modelscope_cache/models/OpenDataLab/MinerU2___5-2509-1___2B"
  15. MODEL_NAME="MinerU2.5"
  16. # GPU 配置
  17. GPU_MEMORY_UTILIZATION="0.3"
  18. CUDA_VISIBLE_DEVICES="4"
  19. MAX_MODEL_LEN="16384"
  20. MAX_NUM_BATCHED_TOKENS="8192"
  21. MAX_NUM_SEQS="8"
  22. # MinerU 配置
  23. export MINERU_TOOLS_CONFIG_JSON="/home/ubuntu/zhch/MinerU/mineru.json"
  24. export MODELSCOPE_CACHE="/home/ubuntu/models/modelscope_cache"
  25. export USE_MODELSCOPE_HUB=1
  26. # 正确初始化和激活conda环境
  27. if [ -f "/home/ubuntu/anaconda3/etc/profile.d/conda.sh" ]; then
  28. source /home/ubuntu/anaconda3/etc/profile.d/conda.sh
  29. conda activate $CONDA_ENV
  30. elif [ -f "/opt/conda/etc/profile.d/conda.sh" ]; then
  31. source /opt/conda/etc/profile.d/conda.sh
  32. conda activate $CONDA_ENV
  33. else
  34. echo "Warning: Using direct conda path activation"
  35. export PATH="/home/ubuntu/anaconda3/envs/$CONDA_ENV/bin:$PATH"
  36. fi
  37. start() {
  38. if [ -f $PIDFILE ] && kill -0 $(cat $PIDFILE) 2>/dev/null; then
  39. echo "MinerU FastAPI is already running"
  40. return 1
  41. fi
  42. echo "Starting MinerU FastAPI daemon..."
  43. echo "API Host: $API_HOST, Port: $API_PORT"
  44. echo "Backend vLLM Host: $VLLM_HOST, Port: $VLLM_PORT"
  45. echo "Model path: $MODEL_PATH"
  46. echo "GPU memory utilization: $GPU_MEMORY_UTILIZATION"
  47. echo "CUDA devices: $CUDA_VISIBLE_DEVICES"
  48. # 检查模型文件是否存在(如果使用本地模型)
  49. if [ ! -d "$MODEL_PATH" ]; then
  50. echo "⚠️ Model path not found: $MODEL_PATH"
  51. echo "Will try to download model automatically or use HTTP client mode"
  52. fi
  53. # 检查conda环境
  54. if ! command -v python >/dev/null 2>&1; then
  55. echo "❌ Python not found. Check conda environment activation."
  56. return 1
  57. fi
  58. echo "🔧 Using Python: $(which python)"
  59. # 显示GPU状态
  60. echo "📊 GPU 状态检查:"
  61. if command -v nvidia-smi >/dev/null 2>&1; then
  62. nvidia-smi --query-gpu=index,name,memory.used,memory.total --format=csv,noheader,nounits | \
  63. awk -F',' '{printf " GPU %s: %s - 内存: %sMB/%sMB\n", $1, $2, $3, $4}'
  64. else
  65. echo "⚠️ nvidia-smi not available"
  66. fi
  67. # 启动MinerU FastAPI服务
  68. # 设置GPU环境变量并启动
  69. nohup $CUDA_VISIBLE_DEVICES; python -m mineru.cli.fast_api \
  70. --host $API_HOST \
  71. --port $API_PORT \
  72. --gpu-memory-utilization $GPU_MEMORY_UTILIZATION \
  73. --max-model-len $MAX_MODEL_LEN \
  74. --max-num-batched-tokens $MAX_NUM_BATCHED_TOKENS \
  75. --max-num-seqs $MAX_NUM_SEQS \
  76. --served-model-name $MODEL_NAME \
  77. --server-url http://$VLLM_HOST:$VLLM_PORT \
  78. --source modelscope \
  79. > $LOGFILE 2>&1 &
  80. echo $! > $PIDFILE
  81. echo "✅ MinerU FastAPI started with PID: $(cat $PIDFILE)"
  82. echo "📋 Log file: $LOGFILE"
  83. echo "🌐 FastAPI Service URL: http://$API_HOST:$API_PORT"
  84. echo "📖 API Documentation: http://localhost:$API_PORT/docs"
  85. echo "📖 ReDoc Documentation: http://localhost:$API_PORT/redoc"
  86. echo ""
  87. echo "Waiting for service to start..."
  88. sleep 5
  89. status
  90. }
  91. stop() {
  92. if [ ! -f $PIDFILE ]; then
  93. echo "MinerU FastAPI is not running"
  94. return 1
  95. fi
  96. PID=$(cat $PIDFILE)
  97. echo "Stopping MinerU FastAPI (PID: $PID)..."
  98. # 优雅停止
  99. kill $PID
  100. # 等待进程结束
  101. for i in {1..10}; do
  102. if ! kill -0 $PID 2>/dev/null; then
  103. break
  104. fi
  105. echo "Waiting for process to stop... ($i/10)"
  106. sleep 1
  107. done
  108. # 如果进程仍在运行,强制结束
  109. if kill -0 $PID 2>/dev/null; then
  110. echo "Force killing process..."
  111. kill -9 $PID
  112. fi
  113. rm -f $PIDFILE
  114. echo "✅ MinerU FastAPI stopped"
  115. }
  116. status() {
  117. if [ -f $PIDFILE ] && kill -0 $(cat $PIDFILE) 2>/dev/null; then
  118. PID=$(cat $PIDFILE)
  119. echo "✅ MinerU FastAPI is running (PID: $PID)"
  120. echo "🌐 FastAPI Service URL: http://$API_HOST:$API_PORT"
  121. echo "📋 Log file: $LOGFILE"
  122. # 检查端口是否被监听
  123. if command -v ss >/dev/null 2>&1; then
  124. if ss -tuln | grep -q ":$API_PORT "; then
  125. echo "🔗 Port $API_PORT is being listened"
  126. else
  127. echo "⚠️ Port $API_PORT is not being listened (service may be starting up)"
  128. fi
  129. elif command -v netstat >/dev/null 2>&1; then
  130. if netstat -tuln | grep -q ":$API_PORT "; then
  131. echo "🔗 Port $API_PORT is being listened"
  132. else
  133. echo "⚠️ Port $API_PORT is not being listened (service may be starting up)"
  134. fi
  135. fi
  136. # 检查API响应
  137. if command -v curl >/dev/null 2>&1; then
  138. if curl -s --connect-timeout 2 "http://127.0.0.1:$API_PORT/docs" > /dev/null 2>&1; then
  139. echo "🎯 FastAPI 响应正常"
  140. else
  141. echo "⚠️ FastAPI 无响应 (service may be starting up)"
  142. fi
  143. fi
  144. # 显示GPU使用情况
  145. if command -v nvidia-smi >/dev/null 2>&1; then
  146. echo "📊 GPU 使用情况:"
  147. nvidia-smi --query-gpu=index,utilization.gpu,utilization.memory,memory.used,memory.total --format=csv,noheader,nounits | \
  148. awk -F',' '{printf " GPU %s: GPU利用率 %s%%, 内存利用率 %s%%, 显存 %sMB/%sMB\n", $1, $2, $3, $4, $5}'
  149. fi
  150. # 显示最新日志
  151. if [ -f $LOGFILE ]; then
  152. echo "📄 Latest logs (last 3 lines):"
  153. tail -3 $LOGFILE | sed 's/^/ /'
  154. fi
  155. else
  156. echo "❌ MinerU FastAPI is not running"
  157. if [ -f $PIDFILE ]; then
  158. echo "Removing stale PID file..."
  159. rm -f $PIDFILE
  160. fi
  161. fi
  162. }
  163. logs() {
  164. if [ -f $LOGFILE ]; then
  165. echo "📄 MinerU FastAPI logs:"
  166. echo "======================"
  167. tail -f $LOGFILE
  168. else
  169. echo "❌ Log file not found: $LOGFILE"
  170. fi
  171. }
  172. config() {
  173. echo "📋 Current configuration:"
  174. echo " Conda Environment: $CONDA_ENV"
  175. echo " FastAPI Host: $API_HOST"
  176. echo " FastAPI Port: $API_PORT"
  177. echo " vLLM Backend Host: $VLLM_HOST"
  178. echo " vLLM Backend Port: $VLLM_PORT"
  179. echo " Model Path: $MODEL_PATH"
  180. echo " Model Name: $MODEL_NAME"
  181. echo " GPU Memory Utilization: $GPU_MEMORY_UTILIZATION"
  182. echo " CUDA Visible Devices: $CUDA_VISIBLE_DEVICES"
  183. echo " Max Model Length: $MAX_MODEL_LEN"
  184. echo " Max Num Seqs: $MAX_NUM_SEQS"
  185. echo " PID File: $PIDFILE"
  186. echo " Log File: $LOGFILE"
  187. echo ""
  188. echo " MinerU Config: $MINERU_TOOLS_CONFIG_JSON"
  189. echo " ModelScope Cache: $MODELSCOPE_CACHE"
  190. if [ -d "$MODEL_PATH" ]; then
  191. echo "✅ Model path exists"
  192. echo " Model files:"
  193. ls -la "$MODEL_PATH" | head -5 | sed 's/^/ /'
  194. if [ $(ls -1 "$MODEL_PATH" | wc -l) -gt 5 ]; then
  195. echo " ... and more files"
  196. fi
  197. else
  198. echo "⚠️ Model path not found (will use HTTP client mode or auto-download)"
  199. fi
  200. # 检查MinerU配置文件
  201. if [ -f "$MINERU_TOOLS_CONFIG_JSON" ]; then
  202. echo "✅ MinerU config file exists"
  203. else
  204. echo "❌ MinerU config file not found: $MINERU_TOOLS_CONFIG_JSON"
  205. fi
  206. # 显示环境信息
  207. echo ""
  208. echo "🔧 Environment:"
  209. echo " Python: $(which python 2>/dev/null || echo 'Not found')"
  210. echo " FastAPI module: $(python -c 'import fastapi; print(fastapi.__version__)' 2>/dev/null || echo 'Not found')"
  211. echo " MinerU module: $(python -c 'import mineru; print(mineru.__version__)' 2>/dev/null || echo 'Not found')"
  212. echo " Conda: $(which conda 2>/dev/null || echo 'Not found')"
  213. echo " CUDA: $(which nvcc 2>/dev/null || echo 'Not found')"
  214. # 显示GPU信息
  215. if command -v nvidia-smi >/dev/null 2>&1; then
  216. echo ""
  217. echo "🔥 GPU Information:"
  218. nvidia-smi --query-gpu=index,name,driver_version,memory.total --format=csv,noheader,nounits | \
  219. awk -F',' '{printf " GPU %s: %s (Driver: %s, Memory: %sMB)\n", $1, $2, $3, $4}'
  220. fi
  221. }
  222. test_api() {
  223. echo "🧪 Testing MinerU FastAPI..."
  224. if [ ! -f $PIDFILE ] || ! kill -0 $(cat $PIDFILE) 2>/dev/null; then
  225. echo "❌ MinerU FastAPI service is not running"
  226. return 1
  227. fi
  228. if ! command -v curl >/dev/null 2>&1; then
  229. echo "❌ curl command not found"
  230. return 1
  231. fi
  232. echo "📡 Testing FastAPI health..."
  233. response=$(curl -s --connect-timeout 5 "http://127.0.0.1:$API_PORT/docs")
  234. if [ $? -eq 0 ]; then
  235. echo "✅ FastAPI docs endpoint accessible"
  236. else
  237. echo "❌ FastAPI docs endpoint not accessible"
  238. fi
  239. # 测试文件上传API(需要测试文件)
  240. TEST_FILE="/home/ubuntu/zhch/MinerU/demo/pdfs/small_ocr.pdf"
  241. if [ -f "$TEST_FILE" ]; then
  242. echo ""
  243. echo "📡 Testing /file_parse endpoint..."
  244. response=$(curl -s -X POST "http://127.0.0.1:$API_PORT/file_parse" \
  245. -F "files=@$TEST_FILE" \
  246. -F "backend=vlm-http-client" \
  247. -F "server_url=http://$VLLM_HOST:$VLLM_PORT" \
  248. -F "return_md=true" \
  249. -F "response_format_zip=false" \
  250. --connect-timeout 300)
  251. if [[ "$response" == *"results"* ]]; then
  252. echo "✅ File parse endpoint working"
  253. echo "Response preview: $(echo "$response" | head -c 200)..."
  254. else
  255. echo "⚠️ File parse endpoint response unexpected"
  256. echo "Response: $response"
  257. fi
  258. else
  259. echo "⚠️ Test file not found: $TEST_FILE"
  260. echo "Please provide a test file to test the upload functionality"
  261. fi
  262. }
  263. test_client() {
  264. echo "🧪 Testing MinerU client with FastAPI..."
  265. if [ ! -f $PIDFILE ] || ! kill -0 $(cat $PIDFILE) 2>/dev/null; then
  266. echo "❌ MinerU FastAPI service is not running. Start it first with: $0 start"
  267. return 1
  268. fi
  269. # 测试用例文件路径
  270. TEST_IMAGE="/home/ubuntu/zhch/MinerU/demo/pdfs/small_ocr.pdf"
  271. TEST_OUTPUT="/tmp/mineru_fastapi_test_output"
  272. if [ ! -f "$TEST_IMAGE" ]; then
  273. echo "⚠️ Test image not found: $TEST_IMAGE"
  274. echo "Please provide a test image or update the TEST_IMAGE path in the script"
  275. return 1
  276. fi
  277. echo "📄 Testing with image: $TEST_IMAGE"
  278. echo "📁 Output directory: $TEST_OUTPUT"
  279. # 使用 curl 直接测试 FastAPI
  280. echo "Testing via curl..."
  281. curl -X POST "http://127.0.0.1:$API_PORT/file_parse" \
  282. -F "files=@$TEST_IMAGE" \
  283. -F "backend=vlm-http-client" \
  284. -F "server_url=http://$VLLM_HOST:$VLLM_PORT" \
  285. -F "return_md=true" \
  286. -F "output_dir=$TEST_OUTPUT" \
  287. -o "$TEST_OUTPUT/fastapi_result.json"
  288. if [ $? -eq 0 ]; then
  289. echo "✅ FastAPI test completed successfully"
  290. echo "📁 Check output in: $TEST_OUTPUT"
  291. if [ -f "$TEST_OUTPUT/fastapi_result.json" ]; then
  292. echo "📄 Result file size: $(du -h "$TEST_OUTPUT/fastapi_result.json" | cut -f1)"
  293. fi
  294. else
  295. echo "❌ FastAPI test failed"
  296. fi
  297. }
  298. # 显示使用帮助
  299. usage() {
  300. echo "MinerU FastAPI Service Daemon"
  301. echo "============================="
  302. echo "Usage: $0 {start|stop|restart|status|logs|config|test|test-client}"
  303. echo ""
  304. echo "Commands:"
  305. echo " start - Start the MinerU FastAPI service"
  306. echo " stop - Stop the MinerU FastAPI service"
  307. echo " restart - Restart the MinerU FastAPI service"
  308. echo " status - Show service status and resource usage"
  309. echo " logs - Show service logs (follow mode)"
  310. echo " config - Show current configuration"
  311. echo " test - Test API endpoints"
  312. echo " test-client - Test FastAPI with sample file"
  313. echo ""
  314. echo "Configuration (edit script to modify):"
  315. echo " FastAPI Host: $API_HOST"
  316. echo " FastAPI Port: $API_PORT"
  317. echo " vLLM Backend: $VLLM_HOST:$VLLM_PORT"
  318. echo " Model: $MODEL_PATH"
  319. echo " GPU Memory: $GPU_MEMORY_UTILIZATION"
  320. echo " CUDA Devices: $CUDA_VISIBLE_DEVICES"
  321. echo ""
  322. echo "API Endpoints:"
  323. echo " Swagger UI: http://localhost:$API_PORT/docs"
  324. echo " ReDoc: http://localhost:$API_PORT/redoc"
  325. echo " File Parse: POST http://localhost:$API_PORT/file_parse"
  326. echo ""
  327. echo "Examples:"
  328. echo " ./mineru_fast_api_daemon.sh start"
  329. echo " ./mineru_fast_api_daemon.sh status"
  330. echo " ./mineru_fast_api_daemon.sh logs"
  331. echo " ./mineru_fast_api_daemon.sh test"
  332. }
  333. case "$1" in
  334. start)
  335. start
  336. ;;
  337. stop)
  338. stop
  339. ;;
  340. restart)
  341. stop
  342. sleep 3
  343. start
  344. ;;
  345. status)
  346. status
  347. ;;
  348. logs)
  349. logs
  350. ;;
  351. config)
  352. config
  353. ;;
  354. test)
  355. test_api
  356. ;;
  357. test-client)
  358. test_client
  359. ;;
  360. *)
  361. usage
  362. exit 1
  363. ;;
  364. esac