|
|
@@ -23,13 +23,16 @@ PORT="8103"
|
|
|
HOST="0.0.0.0"
|
|
|
|
|
|
# 本地 GGUF 模型路径(llama-server -hf 下载后的实际路径)
|
|
|
-HF_CACHE="$HOME/models/hf_home/hub/models--mradermacher--MinerU2.5-Pro-2604-1.2B-GGUF/snapshots/70429e9c728b6a5e904f358a9936c17bd3f5f4b8"
|
|
|
-MODEL_PATH="$HF_CACHE/MinerU2.5-Pro-2604-1.2B.Q8_0.gguf"
|
|
|
-MMPROJ_PATH="$HF_CACHE/MinerU2.5-Pro-2604-1.2B.mmproj-Q8_0.gguf"
|
|
|
+HF_CACHE="$HOME/models/MinerU2.5-Pro-2604-1.2B-GGUF"
|
|
|
+MODEL_PATH="$HF_CACHE/MinerU2.5-Pro-2604-1.2B-F16.gguf"
|
|
|
+MMPROJ_PATH="$HF_CACHE/MinerU2.5-Pro-2604-1.2B-F16-mmproj.gguf"
|
|
|
|
|
|
# 模型别名(对外暴露的模型 ID,对应 yaml 中的 model 字段)
|
|
|
MODEL_NAME="MinerU2.5-Pro-2604-1.2B"
|
|
|
|
|
|
+# llama-server 执行文件
|
|
|
+LLAMA_SERVER_EXECUTABLE="/Users/zhch158/workspace/repository.git/llama.cpp/build/bin/llama-server"
|
|
|
+
|
|
|
# llama-server 参数
|
|
|
# 注意:MinerU2.5-Pro n_ctx_train=8192,设置 8192 即可
|
|
|
CONTEXT_SIZE="8192" # 上下文长度(对齐模型 n_ctx_train=8192)
|
|
|
@@ -70,9 +73,7 @@ start() {
|
|
|
# 检查模型文件是否存在
|
|
|
if [ ! -f "$MODEL_PATH" ]; then
|
|
|
echo "❌ 主模型文件不存在: $MODEL_PATH"
|
|
|
- echo "请先运行以下命令下载模型:"
|
|
|
- echo " llama-server -hf mradermacher/MinerU2.5-Pro-2604-1.2B-GGUF:Q8_0"
|
|
|
- echo "下载完成后更新脚本中的 HF_CACHE 路径(快照 hash 可能不同)"
|
|
|
+ echo "请确认模型已下载到 llama.cpp 缓存目录"
|
|
|
return 1
|
|
|
fi
|
|
|
|
|
|
@@ -82,15 +83,15 @@ start() {
|
|
|
return 1
|
|
|
fi
|
|
|
|
|
|
- # 检查 llama-server 命令
|
|
|
- if ! command -v llama-server >/dev/null 2>&1; then
|
|
|
- echo "❌ llama-server 未找到"
|
|
|
- echo "请安装: brew install llama.cpp"
|
|
|
+ # 检查 llama-server 执行文件(本机编译版本)
|
|
|
+ if [ ! -x "$LLAMA_SERVER_EXECUTABLE" ]; then
|
|
|
+ echo "❌ llama-server 执行文件不存在或不可执行: $LLAMA_SERVER_EXECUTABLE"
|
|
|
+ echo "请确认已在本机编译 llama.cpp(cmake --build build)"
|
|
|
return 1
|
|
|
fi
|
|
|
|
|
|
- echo "🔧 使用 llama-server: $(which llama-server)"
|
|
|
- echo "🔧 llama.cpp 版本: $(llama-server --version 2>&1 | head -1 || echo 'Unknown')"
|
|
|
+ echo "🔧 使用 llama-server: $LLAMA_SERVER_EXECUTABLE"
|
|
|
+ echo "🔧 llama.cpp 版本: $("$LLAMA_SERVER_EXECUTABLE" --version 2>&1 | head -1 || echo 'Unknown')"
|
|
|
|
|
|
echo "💻 系统信息:"
|
|
|
echo " 架构: $(uname -m)"
|
|
|
@@ -100,7 +101,7 @@ start() {
|
|
|
# 启动 llama-server
|
|
|
# 注意:MinerU2.5-Pro GGUF 内嵌推荐采样参数(top_k=1, top_p=0.001, temp=0.01),
|
|
|
# llama-server 会自动应用,此处只设 --temp 0 确保确定性解码
|
|
|
- nohup llama-server \
|
|
|
+ nohup "$LLAMA_SERVER_EXECUTABLE" \
|
|
|
-m "$MODEL_PATH" \
|
|
|
--mmproj "$MMPROJ_PATH" \
|
|
|
--alias $MODEL_NAME \
|
|
|
@@ -242,10 +243,12 @@ config() {
|
|
|
|
|
|
echo ""
|
|
|
echo "🔧 环境检查:"
|
|
|
- echo " llama-server: $(which llama-server 2>/dev/null || echo '未安装')"
|
|
|
- if command -v llama-server >/dev/null 2>&1; then
|
|
|
- LLAMA_VERSION=$(llama-server --version 2>&1 | head -1 || echo 'Unknown')
|
|
|
+ echo " llama-server: $LLAMA_SERVER_EXECUTABLE"
|
|
|
+ if [ -x "$LLAMA_SERVER_EXECUTABLE" ]; then
|
|
|
+ LLAMA_VERSION=$("$LLAMA_SERVER_EXECUTABLE" --version 2>&1 | head -1 || echo 'Unknown')
|
|
|
echo " 版本: $LLAMA_VERSION"
|
|
|
+ else
|
|
|
+ echo " ⚠️ 执行文件不存在或不可执行"
|
|
|
fi
|
|
|
echo " Conda: $(which conda 2>/dev/null || echo '未找到')"
|
|
|
echo " 当前 Python: $(which python 2>/dev/null || echo '未找到')"
|
|
|
@@ -275,7 +278,7 @@ test_api() {
|
|
|
response=$(curl -s --connect-timeout 10 http://127.0.0.1:$PORT/v1/models)
|
|
|
if [ $? -eq 0 ]; then
|
|
|
echo "✅ Models 端点可访问"
|
|
|
- echo "$response" | python3 -m json.tool 2>/dev/null || echo "$response"
|
|
|
+ echo "$response" | python -m json.tool 2>/dev/null || echo "$response"
|
|
|
else
|
|
|
echo "❌ Models 端点不可访问"
|
|
|
fi
|
|
|
@@ -357,8 +360,8 @@ usage() {
|
|
|
echo " ./mineru_local_daemon.sh test"
|
|
|
echo ""
|
|
|
echo "前置要求:"
|
|
|
- echo " 1. 安装 llama.cpp: brew install llama.cpp"
|
|
|
- echo " 2. 首次下载模型: llama-server -hf mradermacher/MinerU2.5-Pro-2604-1.2B-GGUF:Q8_0"
|
|
|
+ echo " 1. 本机编译 llama.cpp,执行文件: $LLAMA_SERVER_EXECUTABLE"
|
|
|
+ echo " 2. 模型文件位于: $HF_CACHE"
|
|
|
echo " 3. conda 环境 mineru 已配置"
|
|
|
}
|
|
|
|