paddle_local_daemon.sh 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389
  1. #!/bin/bash
  2. # filepath: ocr_platform/ocr_tools/daemons/paddleocr_local_daemon.sh
  3. # 对应: PaddleOCR-VL 本地 llama-server 服务(macOS),使用 GGUF 格式模型
  4. # 适用于 Mac M4 Pro 48G,使用 Metal GPU 加速
  5. # 模型下载地址: https://huggingface.co/PaddlePaddle/PaddleOCR-VL-1.5-GGUF
  6. # unset https_proxy http_proxy HF_ENDPOINT
  7. # llama-server -hf PaddlePaddle/PaddleOCR-VL-1.5-GGUF
  8. # mv ~/Library/Caches/llama.cpp/PaddlePaddle_PaddleOCR-VL-1.5-GGUF_PaddleOCR-VL-1.5.gguf ~/models/paddleocr_vl
  9. # mv ~/Library/Caches/llama.cpp/PaddlePaddle_PaddleOCR-VL-1.5-GGUF_PaddleOCR-VL-1.5-mmproj.gguf ~/models/paddleocr_vl
  10. # curl -X POST http://localhost:8102/v1/chat/completions -d @payload.json
  11. LOGDIR="$HOME/workspace/logs"
  12. mkdir -p $LOGDIR
  13. PIDFILE="$LOGDIR/paddleocr_llamaserver.pid"
  14. LOGFILE="$LOGDIR/paddleocr_llamaserver.log"
  15. # 配置参数
  16. CONDA_ENV="mineru2"
  17. PORT="8102"
  18. HOST="0.0.0.0"
  19. # 本地 GGUF 模型路径
  20. MODEL_PATH="$HOME/models/paddleocr_vl/PaddlePaddle_PaddleOCR-VL-1.5-GGUF_PaddleOCR-VL-1.5.gguf"
  21. MMPROJ_PATH="$HOME/models/paddleocr_vl/PaddlePaddle_PaddleOCR-VL-1.5-GGUF_PaddleOCR-VL-1.5-mmproj.gguf"
  22. # 模型别名(对外暴露的模型 ID,对应 yaml 中的 model_name)
  23. MODEL_NAME="PaddleOCR-VL-1.5"
  24. # llama-server 参数
  25. CONTEXT_SIZE="16384" # 上下文长度(需 >= max_tokens,推荐 8192-16384)
  26. GPU_LAYERS="99" # Metal GPU 层数(99 表示全部)
  27. THREADS="8" # CPU 线程数(M4 Pro 建议值)
  28. BATCH_SIZE="512" # 批处理大小
  29. UBATCH_SIZE="128" # 微批处理大小
  30. # conda 环境激活
  31. if [ -f "$HOME/anaconda3/etc/profile.d/conda.sh" ]; then
  32. source "$HOME/anaconda3/etc/profile.d/conda.sh"
  33. conda activate $CONDA_ENV
  34. elif [ -f "$HOME/miniconda3/etc/profile.d/conda.sh" ]; then
  35. source "$HOME/miniconda3/etc/profile.d/conda.sh"
  36. conda activate $CONDA_ENV
  37. elif [ -f "/opt/miniconda3/etc/profile.d/conda.sh" ]; then
  38. source /opt/miniconda3/etc/profile.d/conda.sh
  39. conda activate $CONDA_ENV
  40. else
  41. echo "Warning: conda initialization file not found, trying direct path"
  42. export PATH="/opt/miniconda3/envs/$CONDA_ENV/bin:$PATH"
  43. fi
  44. start() {
  45. if [ -f $PIDFILE ] && kill -0 $(cat $PIDFILE) 2>/dev/null; then
  46. echo "PaddleOCR-VL llama-server 已在运行"
  47. return 1
  48. fi
  49. echo "启动 PaddleOCR-VL llama-server 守护进程..."
  50. echo "Host: $HOST, Port: $PORT"
  51. echo "主模型: $MODEL_PATH"
  52. echo "多模态投影器: $MMPROJ_PATH"
  53. echo "上下文长度: $CONTEXT_SIZE"
  54. echo "GPU 层数: $GPU_LAYERS (Metal)"
  55. echo "线程数: $THREADS"
  56. # 检查模型文件是否存在
  57. if [ ! -f "$MODEL_PATH" ]; then
  58. echo "❌ 主模型文件不存在: $MODEL_PATH"
  59. echo "请确认模型已下载到 llama.cpp 缓存目录"
  60. return 1
  61. fi
  62. if [ ! -f "$MMPROJ_PATH" ]; then
  63. echo "❌ 多模态投影器文件不存在: $MMPROJ_PATH"
  64. echo "请确认 mmproj 文件已下载"
  65. return 1
  66. fi
  67. # 检查 llama-server 命令
  68. if ! command -v llama-server >/dev/null 2>&1; then
  69. echo "❌ llama-server 未找到"
  70. echo "请安装: brew install llama.cpp"
  71. return 1
  72. fi
  73. echo "🔧 使用 llama-server: $(which llama-server)"
  74. echo "🔧 llama.cpp 版本: $(llama-server --version 2>&1 | head -1 || echo 'Unknown')"
  75. echo "💻 系统信息:"
  76. echo " 架构: $(uname -m)"
  77. echo " 系统: $(uname -s)"
  78. echo " 内存: $(sysctl -n hw.memsize | awk '{printf "%.1f GB", $1/1024/1024/1024}')"
  79. # 启动 llama-server
  80. nohup llama-server \
  81. -m "$MODEL_PATH" \
  82. --mmproj "$MMPROJ_PATH" \
  83. --alias $MODEL_NAME \
  84. --host $HOST \
  85. --port $PORT \
  86. --media-path $HOME/workspace \
  87. -c $CONTEXT_SIZE \
  88. -ngl $GPU_LAYERS \
  89. -t $THREADS \
  90. -b $BATCH_SIZE \
  91. -ub $UBATCH_SIZE \
  92. --temp 0 \
  93. > $LOGFILE 2>&1 &
  94. echo $! > $PIDFILE
  95. echo "✅ PaddleOCR-VL llama-server 已启动,PID: $(cat $PIDFILE)"
  96. echo "📋 日志文件: $LOGFILE"
  97. echo "🌐 服务 URL: http://$HOST:$PORT"
  98. echo "📖 OpenAI 兼容 API: http://localhost:$PORT/v1 (chat/completions, models)"
  99. echo ""
  100. echo "等待服务启动..."
  101. sleep 5
  102. status
  103. }
  104. stop() {
  105. if [ ! -f $PIDFILE ]; then
  106. echo "PaddleOCR-VL llama-server 未在运行"
  107. return 1
  108. fi
  109. PID=$(cat $PIDFILE)
  110. echo "停止 PaddleOCR-VL llama-server (PID: $PID)..."
  111. kill $PID
  112. for i in {1..30}; do
  113. if ! kill -0 $PID 2>/dev/null; then
  114. break
  115. fi
  116. echo "等待进程停止... ($i/30)"
  117. sleep 1
  118. done
  119. if kill -0 $PID 2>/dev/null; then
  120. echo "强制终止进程..."
  121. kill -9 $PID
  122. fi
  123. rm -f $PIDFILE
  124. echo "✅ PaddleOCR-VL llama-server 已停止"
  125. }
  126. status() {
  127. if [ -f $PIDFILE ] && kill -0 $(cat $PIDFILE) 2>/dev/null; then
  128. PID=$(cat $PIDFILE)
  129. echo "✅ PaddleOCR-VL llama-server 正在运行 (PID: $PID)"
  130. echo "🌐 服务 URL: http://$HOST:$PORT"
  131. echo "📋 日志文件: $LOGFILE"
  132. # 检查端口监听状态
  133. if lsof -nP -iTCP:$PORT -sTCP:LISTEN >/dev/null 2>&1; then
  134. echo "🔗 端口 $PORT 正在监听"
  135. else
  136. echo "⚠️ 端口 $PORT 未在监听(服务可能正在启动)"
  137. fi
  138. # 检查 API 响应
  139. if command -v curl >/dev/null 2>&1; then
  140. if curl -s --connect-timeout 2 http://127.0.0.1:$PORT/v1/models > /dev/null 2>&1; then
  141. echo "🎯 API 响应正常"
  142. else
  143. echo "⚠️ API 无响应(服务可能正在启动)"
  144. fi
  145. fi
  146. # 显示进程内存使用
  147. if command -v ps >/dev/null 2>&1; then
  148. MEM=$(ps -o rss= -p $PID 2>/dev/null | awk '{printf "%.2f GB", $1/1024/1024}')
  149. if [ -n "$MEM" ]; then
  150. echo "💾 内存使用: $MEM"
  151. fi
  152. fi
  153. if [ -f $LOGFILE ]; then
  154. echo "📄 最近日志(最后 3 行):"
  155. tail -3 $LOGFILE | sed 's/^/ /'
  156. fi
  157. else
  158. echo "❌ PaddleOCR-VL llama-server 未在运行"
  159. if [ -f $PIDFILE ]; then
  160. echo "删除过期的 PID 文件..."
  161. rm -f $PIDFILE
  162. fi
  163. fi
  164. }
  165. logs() {
  166. if [ -f $LOGFILE ]; then
  167. echo "📄 PaddleOCR-VL llama-server 日志:"
  168. echo "====================="
  169. tail -f $LOGFILE
  170. else
  171. echo "❌ 日志文件不存在: $LOGFILE"
  172. fi
  173. }
  174. config() {
  175. echo "📋 当前配置:"
  176. echo " Conda 环境: $CONDA_ENV"
  177. echo " Host: $HOST"
  178. echo " Port: $PORT"
  179. echo " 模型别名: $MODEL_NAME"
  180. echo " 主模型路径: $MODEL_PATH"
  181. echo " 多模态投影器: $MMPROJ_PATH"
  182. echo " 上下文长度: $CONTEXT_SIZE"
  183. echo " GPU 层数: $GPU_LAYERS"
  184. echo " 线程数: $THREADS"
  185. echo " 批处理大小: $BATCH_SIZE"
  186. echo " 微批处理大小: $UBATCH_SIZE"
  187. echo " PID 文件: $PIDFILE"
  188. echo " 日志文件: $LOGFILE"
  189. echo ""
  190. echo "📦 模型文件检查:"
  191. if [ -f "$MODEL_PATH" ]; then
  192. SIZE=$(du -h "$MODEL_PATH" | cut -f1)
  193. echo " ✅ 主模型存在 ($SIZE)"
  194. else
  195. echo " ❌ 主模型不存在"
  196. fi
  197. if [ -f "$MMPROJ_PATH" ]; then
  198. SIZE=$(du -h "$MMPROJ_PATH" | cut -f1)
  199. echo " ✅ 多模态投影器存在 ($SIZE)"
  200. else
  201. echo " ❌ 多模态投影器不存在"
  202. fi
  203. echo ""
  204. echo "🔧 环境检查:"
  205. echo " llama-server: $(which llama-server 2>/dev/null || echo '未安装')"
  206. if command -v llama-server >/dev/null 2>&1; then
  207. LLAMA_VERSION=$(llama-server --version 2>&1 | head -1 || echo 'Unknown')
  208. echo " 版本: $LLAMA_VERSION"
  209. fi
  210. echo " Conda: $(which conda 2>/dev/null || echo '未找到')"
  211. echo " 当前 Python: $(which python 2>/dev/null || echo '未找到')"
  212. echo ""
  213. echo "💻 系统信息:"
  214. echo " 架构: $(uname -m)"
  215. echo " 系统版本: $(sw_vers -productVersion 2>/dev/null || echo 'Unknown')"
  216. echo " 总内存: $(sysctl -n hw.memsize 2>/dev/null | awk '{printf "%.1f GB", $1/1024/1024/1024}' || echo 'Unknown')"
  217. echo " CPU 核心: $(sysctl -n hw.ncpu 2>/dev/null || echo 'Unknown')"
  218. }
  219. test_api() {
  220. echo "🧪 测试 PaddleOCR-VL llama-server API..."
  221. if [ ! -f $PIDFILE ] || ! kill -0 $(cat $PIDFILE) 2>/dev/null; then
  222. echo "❌ PaddleOCR-VL llama-server 服务未在运行"
  223. return 1
  224. fi
  225. if ! command -v curl >/dev/null 2>&1; then
  226. echo "❌ curl 命令未找到"
  227. return 1
  228. fi
  229. echo "📡 测试 /v1/models 端点..."
  230. response=$(curl -s --connect-timeout 10 http://127.0.0.1:$PORT/v1/models)
  231. if [ $? -eq 0 ]; then
  232. echo "✅ Models 端点可访问"
  233. echo "$response" | python -m json.tool 2>/dev/null || echo "$response"
  234. else
  235. echo "❌ Models 端点不可访问"
  236. fi
  237. echo ""
  238. echo "📡 测试 /health 端点..."
  239. health=$(curl -s --connect-timeout 5 http://127.0.0.1:$PORT/health)
  240. if [ $? -eq 0 ]; then
  241. echo "✅ Health 端点: $health"
  242. else
  243. echo "⚠️ Health 端点不可访问"
  244. fi
  245. }
  246. test_client() {
  247. echo "🧪 测试 PaddleOCR-VL 与 llama-server 集成..."
  248. if [ ! -f $PIDFILE ] || ! kill -0 $(cat $PIDFILE) 2>/dev/null; then
  249. echo "❌ PaddleOCR-VL llama-server 服务未在运行,请先启动: $0 start"
  250. return 1
  251. fi
  252. CONFIG_FILE="/Users/zhch158/workspace/repository.git/ocr_platform/ocr_tools/universal_doc_parser/config/bank_statement_paddleocr_local.yaml"
  253. echo "📄 配置文件: $CONFIG_FILE"
  254. echo ""
  255. echo "确保配置文件中 vl_recognition.api_url 指向: http://localhost:$PORT/v1/chat/completions"
  256. echo ""
  257. echo "测试命令示例:"
  258. echo " cd /Users/zhch158/workspace/repository.git/ocr_platform/ocr_tools/universal_doc_parser"
  259. echo " conda activate mineru2"
  260. echo " python parse.py --input /path/to/test/image.png --config $CONFIG_FILE --debug"
  261. echo ""
  262. echo "或者使用 curl 直接测试 API:"
  263. echo " curl -X POST http://localhost:$PORT/v1/chat/completions \\"
  264. echo " -H 'Content-Type: application/json' \\"
  265. echo " -d '{"
  266. echo " \"model\": \"paddleocr-vl\","
  267. echo " \"messages\": ["
  268. echo " {"
  269. echo " \"role\": \"user\","
  270. echo " \"content\": ["
  271. echo " {\"type\": \"text\", \"text\": \"Table Recognition:\"},"
  272. echo " {\"type\": \"image_url\", \"image_url\": {\"url\": \"file:///path/to/image.png\"}}"
  273. echo " ]"
  274. echo " }"
  275. echo " ],"
  276. echo " \"max_tokens\": 4096"
  277. echo " }'"
  278. }
  279. usage() {
  280. echo "PaddleOCR-VL llama-server 服务守护进程(macOS)"
  281. echo "==========================================="
  282. echo "用法: $0 {start|stop|restart|status|logs|config|test|test-client}"
  283. echo ""
  284. echo "命令:"
  285. echo " start - 启动 PaddleOCR-VL llama-server 服务"
  286. echo " stop - 停止 PaddleOCR-VL llama-server 服务"
  287. echo " restart - 重启 PaddleOCR-VL llama-server 服务"
  288. echo " status - 显示服务状态和资源使用"
  289. echo " logs - 显示服务日志(跟踪模式)"
  290. echo " config - 显示当前配置"
  291. echo " test - 测试 /v1/models API 端点"
  292. echo " test-client - 显示如何测试与配置文件集成"
  293. echo ""
  294. echo "配置(编辑脚本修改):"
  295. echo " Host: $HOST"
  296. echo " Port: $PORT"
  297. echo " 主模型: $MODEL_PATH"
  298. echo " 多模态投影器: $MMPROJ_PATH"
  299. echo " 上下文长度: $CONTEXT_SIZE"
  300. echo " GPU 层数: $GPU_LAYERS (Metal)"
  301. echo ""
  302. echo "示例:"
  303. echo " ./paddleocr_local_daemon.sh start"
  304. echo " ./paddleocr_local_daemon.sh status"
  305. echo " ./paddleocr_local_daemon.sh logs"
  306. echo " ./paddleocr_local_daemon.sh test"
  307. echo ""
  308. echo "前置要求:"
  309. echo " 1. 安装 llama.cpp: brew install llama.cpp"
  310. echo " 2. 模型文件位于: ~/Library/Caches/llama.cpp/"
  311. echo " 3. conda 环境 mineru2 已配置"
  312. }
  313. case "$1" in
  314. start)
  315. start
  316. ;;
  317. stop)
  318. stop
  319. ;;
  320. restart)
  321. stop
  322. sleep 3
  323. start
  324. ;;
  325. status)
  326. status
  327. ;;
  328. logs)
  329. logs
  330. ;;
  331. config)
  332. config
  333. ;;
  334. test)
  335. test_api
  336. ;;
  337. test-client)
  338. test_client
  339. ;;
  340. *)
  341. usage
  342. exit 1
  343. ;;
  344. esac