#!/bin/bash # filepath: zhch/launch_model_vllm_macos.sh # Apple Silicon 优化的 vLLM 启动脚本 export model_path="./weights/DotsOCR_CPU_bfloat16" # 使用 bfloat16 优化的模型路径 export model_name="DotsOCR" export PYTHONPATH=$(dirname "$model_path"):$PYTHONPATH echo "🍎 Starting vLLM on Apple Silicon with optimizations..." echo "Model path: $model_path" echo "PyTorch version: $(python -c 'import torch; print(torch.__version__)')" echo "MPS available: $(python -c 'import torch; print(torch.backends.mps.is_available())')" # Apple Silicon 优化参数 vllm serve ${model_path} \ --host 0.0.0.0 \ --port 8101 \ --max-model-len 32768 \ --max-num-batched-tokens 4096 \ --max-num-seqs 8 \ --gpu-memory-utilization 0.8 \ --trust-remote-code \ --chat-template-content-format string \ --served-model-name ${model_name} \ --uvicorn-log-level info \ --limit-mm-per-prompt image=5 \ --tensor-parallel-size 1 \ --disable-log-stats \ --disable-log-requests