launch_model_vllm_macos.sh 1021 B

1234567891011121314151617181920212223242526272829
  1. #!/bin/bash
  2. # filepath: zhch/launch_model_vllm_macos.sh
  3. # Apple Silicon 优化的 vLLM 启动脚本
  4. export model_path="./weights/DotsOCR_CPU_bfloat16" # 使用 bfloat16 优化的模型路径
  5. export model_name="DotsOCR"
  6. export PYTHONPATH=$(dirname "$model_path"):$PYTHONPATH
  7. echo "🍎 Starting vLLM on Apple Silicon with optimizations..."
  8. echo "Model path: $model_path"
  9. echo "PyTorch version: $(python -c 'import torch; print(torch.__version__)')"
  10. echo "MPS available: $(python -c 'import torch; print(torch.backends.mps.is_available())')"
  11. # Apple Silicon 优化参数
  12. vllm serve ${model_path} \
  13. --host 0.0.0.0 \
  14. --port 8101 \
  15. --max-model-len 32768 \
  16. --max-num-batched-tokens 4096 \
  17. --max-num-seqs 8 \
  18. --gpu-memory-utilization 0.8 \
  19. --trust-remote-code \
  20. --chat-template-content-format string \
  21. --served-model-name ${model_name} \
  22. --uvicorn-log-level info \
  23. --limit-mm-per-prompt image=5 \
  24. --tensor-parallel-size 1 \
  25. --disable-log-stats \
  26. --disable-log-requests