#!/bin/bash # filepath: ocr_platform/ocr_tools/daemons/paddleocr_local_daemon.sh # 对应: PaddleOCR-VL 本地 llama-server 服务(macOS),使用 GGUF 格式模型 # 适用于 Mac M4 Pro 48G,使用 Metal GPU 加速 # 模型下载地址: https://huggingface.co/PaddlePaddle/PaddleOCR-VL-1.5-GGUF # curl -X POST http://localhost:8081/v1/chat/completions -d @payload.json LOGDIR="$HOME/workspace/logs" mkdir -p $LOGDIR PIDFILE="$LOGDIR/paddleocr_llamaserver.pid" LOGFILE="$LOGDIR/paddleocr_llamaserver.log" # 配置参数 CONDA_ENV="mineru2" PORT="8081" HOST="0.0.0.0" # 本地 GGUF 模型路径 MODEL_PATH="$HOME/Library/Caches/llama.cpp/PaddlePaddle_PaddleOCR-VL-1.5-GGUF_PaddleOCR-VL-1.5.gguf" MMPROJ_PATH="$HOME/Library/Caches/llama.cpp/PaddlePaddle_PaddleOCR-VL-1.5-GGUF_PaddleOCR-VL-1.5-mmproj.gguf" # llama-server 参数 CONTEXT_SIZE="16384" # 上下文长度(需 >= max_tokens,推荐 8192-16384) GPU_LAYERS="99" # Metal GPU 层数(99 表示全部) THREADS="8" # CPU 线程数(M4 Pro 建议值) BATCH_SIZE="512" # 批处理大小 UBATCH_SIZE="128" # 微批处理大小 # conda 环境激活 if [ -f "$HOME/anaconda3/etc/profile.d/conda.sh" ]; then source "$HOME/anaconda3/etc/profile.d/conda.sh" conda activate $CONDA_ENV elif [ -f "$HOME/miniconda3/etc/profile.d/conda.sh" ]; then source "$HOME/miniconda3/etc/profile.d/conda.sh" conda activate $CONDA_ENV elif [ -f "/opt/miniconda3/etc/profile.d/conda.sh" ]; then source /opt/miniconda3/etc/profile.d/conda.sh conda activate $CONDA_ENV else echo "Warning: conda initialization file not found, trying direct path" export PATH="/opt/miniconda3/envs/$CONDA_ENV/bin:$PATH" fi start() { if [ -f $PIDFILE ] && kill -0 $(cat $PIDFILE) 2>/dev/null; then echo "PaddleOCR-VL llama-server 已在运行" return 1 fi echo "启动 PaddleOCR-VL llama-server 守护进程..." echo "Host: $HOST, Port: $PORT" echo "主模型: $MODEL_PATH" echo "多模态投影器: $MMPROJ_PATH" echo "上下文长度: $CONTEXT_SIZE" echo "GPU 层数: $GPU_LAYERS (Metal)" echo "线程数: $THREADS" # 检查模型文件是否存在 if [ ! -f "$MODEL_PATH" ]; then echo "❌ 主模型文件不存在: $MODEL_PATH" echo "请确认模型已下载到 llama.cpp 缓存目录" return 1 fi if [ ! -f "$MMPROJ_PATH" ]; then echo "❌ 多模态投影器文件不存在: $MMPROJ_PATH" echo "请确认 mmproj 文件已下载" return 1 fi # 检查 llama-server 命令 if ! command -v llama-server >/dev/null 2>&1; then echo "❌ llama-server 未找到" echo "请安装: brew install llama.cpp" return 1 fi echo "🔧 使用 llama-server: $(which llama-server)" echo "🔧 llama.cpp 版本: $(llama-server --version 2>&1 | head -1 || echo 'Unknown')" echo "💻 系统信息:" echo " 架构: $(uname -m)" echo " 系统: $(uname -s)" echo " 内存: $(sysctl -n hw.memsize | awk '{printf "%.1f GB", $1/1024/1024/1024}')" # 启动 llama-server nohup llama-server \ -m "$MODEL_PATH" \ --mmproj "$MMPROJ_PATH" \ --host $HOST \ --port $PORT \ --media-path /Users/zhch158/workspace \ -c $CONTEXT_SIZE \ -ngl $GPU_LAYERS \ -t $THREADS \ -b $BATCH_SIZE \ -ub $UBATCH_SIZE \ --temp 0 \ > $LOGFILE 2>&1 & echo $! > $PIDFILE echo "✅ PaddleOCR-VL llama-server 已启动,PID: $(cat $PIDFILE)" echo "📋 日志文件: $LOGFILE" echo "🌐 服务 URL: http://$HOST:$PORT" echo "📖 OpenAI 兼容 API: http://localhost:$PORT/v1 (chat/completions, models)" echo "" echo "等待服务启动..." sleep 5 status } stop() { if [ ! -f $PIDFILE ]; then echo "PaddleOCR-VL llama-server 未在运行" return 1 fi PID=$(cat $PIDFILE) echo "停止 PaddleOCR-VL llama-server (PID: $PID)..." kill $PID for i in {1..30}; do if ! kill -0 $PID 2>/dev/null; then break fi echo "等待进程停止... ($i/30)" sleep 1 done if kill -0 $PID 2>/dev/null; then echo "强制终止进程..." kill -9 $PID fi rm -f $PIDFILE echo "✅ PaddleOCR-VL llama-server 已停止" } status() { if [ -f $PIDFILE ] && kill -0 $(cat $PIDFILE) 2>/dev/null; then PID=$(cat $PIDFILE) echo "✅ PaddleOCR-VL llama-server 正在运行 (PID: $PID)" echo "🌐 服务 URL: http://$HOST:$PORT" echo "📋 日志文件: $LOGFILE" # 检查端口监听状态 if lsof -nP -iTCP:$PORT -sTCP:LISTEN >/dev/null 2>&1; then echo "🔗 端口 $PORT 正在监听" else echo "⚠️ 端口 $PORT 未在监听(服务可能正在启动)" fi # 检查 API 响应 if command -v curl >/dev/null 2>&1; then if curl -s --connect-timeout 2 http://127.0.0.1:$PORT/v1/models > /dev/null 2>&1; then echo "🎯 API 响应正常" else echo "⚠️ API 无响应(服务可能正在启动)" fi fi # 显示进程内存使用 if command -v ps >/dev/null 2>&1; then MEM=$(ps -o rss= -p $PID 2>/dev/null | awk '{printf "%.2f GB", $1/1024/1024}') if [ -n "$MEM" ]; then echo "💾 内存使用: $MEM" fi fi if [ -f $LOGFILE ]; then echo "📄 最近日志(最后 3 行):" tail -3 $LOGFILE | sed 's/^/ /' fi else echo "❌ PaddleOCR-VL llama-server 未在运行" if [ -f $PIDFILE ]; then echo "删除过期的 PID 文件..." rm -f $PIDFILE fi fi } logs() { if [ -f $LOGFILE ]; then echo "📄 PaddleOCR-VL llama-server 日志:" echo "=====================" tail -f $LOGFILE else echo "❌ 日志文件不存在: $LOGFILE" fi } config() { echo "📋 当前配置:" echo " Conda 环境: $CONDA_ENV" echo " Host: $HOST" echo " Port: $PORT" echo " 主模型路径: $MODEL_PATH" echo " 多模态投影器: $MMPROJ_PATH" echo " 上下文长度: $CONTEXT_SIZE" echo " GPU 层数: $GPU_LAYERS" echo " 线程数: $THREADS" echo " 批处理大小: $BATCH_SIZE" echo " 微批处理大小: $UBATCH_SIZE" echo " PID 文件: $PIDFILE" echo " 日志文件: $LOGFILE" echo "" echo "📦 模型文件检查:" if [ -f "$MODEL_PATH" ]; then SIZE=$(du -h "$MODEL_PATH" | cut -f1) echo " ✅ 主模型存在 ($SIZE)" else echo " ❌ 主模型不存在" fi if [ -f "$MMPROJ_PATH" ]; then SIZE=$(du -h "$MMPROJ_PATH" | cut -f1) echo " ✅ 多模态投影器存在 ($SIZE)" else echo " ❌ 多模态投影器不存在" fi echo "" echo "🔧 环境检查:" echo " llama-server: $(which llama-server 2>/dev/null || echo '未安装')" if command -v llama-server >/dev/null 2>&1; then LLAMA_VERSION=$(llama-server --version 2>&1 | head -1 || echo 'Unknown') echo " 版本: $LLAMA_VERSION" fi echo " Conda: $(which conda 2>/dev/null || echo '未找到')" echo " 当前 Python: $(which python 2>/dev/null || echo '未找到')" echo "" echo "💻 系统信息:" echo " 架构: $(uname -m)" echo " 系统版本: $(sw_vers -productVersion 2>/dev/null || echo 'Unknown')" echo " 总内存: $(sysctl -n hw.memsize 2>/dev/null | awk '{printf "%.1f GB", $1/1024/1024/1024}' || echo 'Unknown')" echo " CPU 核心: $(sysctl -n hw.ncpu 2>/dev/null || echo 'Unknown')" } test_api() { echo "🧪 测试 PaddleOCR-VL llama-server API..." if [ ! -f $PIDFILE ] || ! kill -0 $(cat $PIDFILE) 2>/dev/null; then echo "❌ PaddleOCR-VL llama-server 服务未在运行" return 1 fi if ! command -v curl >/dev/null 2>&1; then echo "❌ curl 命令未找到" return 1 fi echo "📡 测试 /v1/models 端点..." response=$(curl -s --connect-timeout 10 http://127.0.0.1:$PORT/v1/models) if [ $? -eq 0 ]; then echo "✅ Models 端点可访问" echo "$response" | python -m json.tool 2>/dev/null || echo "$response" else echo "❌ Models 端点不可访问" fi echo "" echo "📡 测试 /health 端点..." health=$(curl -s --connect-timeout 5 http://127.0.0.1:$PORT/health) if [ $? -eq 0 ]; then echo "✅ Health 端点: $health" else echo "⚠️ Health 端点不可访问" fi } test_client() { echo "🧪 测试 PaddleOCR-VL 与 llama-server 集成..." if [ ! -f $PIDFILE ] || ! kill -0 $(cat $PIDFILE) 2>/dev/null; then echo "❌ PaddleOCR-VL llama-server 服务未在运行,请先启动: $0 start" return 1 fi CONFIG_FILE="/Users/zhch158/workspace/repository.git/ocr_platform/ocr_tools/universal_doc_parser/config/bank_statement_paddleocr_local.yaml" echo "📄 配置文件: $CONFIG_FILE" echo "" echo "确保配置文件中 vl_recognition.api_url 指向: http://localhost:$PORT/v1/chat/completions" echo "" echo "测试命令示例:" echo " cd /Users/zhch158/workspace/repository.git/ocr_platform/ocr_tools/universal_doc_parser" echo " conda activate mineru2" echo " python parse.py --input /path/to/test/image.png --config $CONFIG_FILE --debug" echo "" echo "或者使用 curl 直接测试 API:" echo " curl -X POST http://localhost:$PORT/v1/chat/completions \\" echo " -H 'Content-Type: application/json' \\" echo " -d '{" echo " \"model\": \"paddleocr-vl\"," echo " \"messages\": [" echo " {" echo " \"role\": \"user\"," echo " \"content\": [" echo " {\"type\": \"text\", \"text\": \"Table Recognition:\"}," echo " {\"type\": \"image_url\", \"image_url\": {\"url\": \"file:///path/to/image.png\"}}" echo " ]" echo " }" echo " ]," echo " \"max_tokens\": 4096" echo " }'" } usage() { echo "PaddleOCR-VL llama-server 服务守护进程(macOS)" echo "===========================================" echo "用法: $0 {start|stop|restart|status|logs|config|test|test-client}" echo "" echo "命令:" echo " start - 启动 PaddleOCR-VL llama-server 服务" echo " stop - 停止 PaddleOCR-VL llama-server 服务" echo " restart - 重启 PaddleOCR-VL llama-server 服务" echo " status - 显示服务状态和资源使用" echo " logs - 显示服务日志(跟踪模式)" echo " config - 显示当前配置" echo " test - 测试 /v1/models API 端点" echo " test-client - 显示如何测试与配置文件集成" echo "" echo "配置(编辑脚本修改):" echo " Host: $HOST" echo " Port: $PORT" echo " 主模型: $MODEL_PATH" echo " 多模态投影器: $MMPROJ_PATH" echo " 上下文长度: $CONTEXT_SIZE" echo " GPU 层数: $GPU_LAYERS (Metal)" echo "" echo "示例:" echo " ./paddleocr_local_daemon.sh start" echo " ./paddleocr_local_daemon.sh status" echo " ./paddleocr_local_daemon.sh logs" echo " ./paddleocr_local_daemon.sh test" echo "" echo "前置要求:" echo " 1. 安装 llama.cpp: brew install llama.cpp" echo " 2. 模型文件位于: ~/Library/Caches/llama.cpp/" echo " 3. conda 环境 mineru2 已配置" } case "$1" in start) start ;; stop) stop ;; restart) stop sleep 3 start ;; status) status ;; logs) logs ;; config) config ;; test) test_api ;; test-client) test_client ;; *) usage exit 1 ;; esac