launch_model_vllm.sh 688 B

1234567891011121314151617
  1. # download model to /path/to/model
  2. if [ -z "$NODOWNLOAD" ]; then
  3. python3 tools/download_model.py
  4. fi
  5. # register model to vllm
  6. hf_model_path=./weights/DotsOCR # Path to your downloaded model weights
  7. export PYTHONPATH=$(dirname "$hf_model_path"):$PYTHONPATH
  8. sed -i '/^from vllm\.entrypoints\.cli\.main import main$/a\
  9. from DotsOCR import modeling_dots_ocr_vllm' `which vllm`
  10. # launch vllm server
  11. model_name=model
  12. CUDA_VISIBLE_DEVICES=0 vllm serve ${hf_model_path} --tensor-parallel-size 1 --gpu-memory-utilization 0.95 --chat-template-content-format string --served-model-name ${model_name} --trust-remote-code
  13. # # run python demo after launch vllm server
  14. # python demo/demo_vllm.py