|
|
@@ -8,6 +8,7 @@ LOGFILE="$LOGDIR/vllm.log"
|
|
|
conda activate dots.ocr
|
|
|
hf_model_path="/home/ubuntu/zhch/dots.ocr/weights/DotsOCR" # Path to your downloaded model weights
|
|
|
export PYTHONPATH=$(dirname "$hf_model_path"):$PYTHONPATH
|
|
|
+
|
|
|
# launch vllm server
|
|
|
model_name="DotsOCR"
|
|
|
|
|
|
@@ -21,13 +22,13 @@ start() {
|
|
|
nohup vllm serve $hf_model_path \
|
|
|
--host 0.0.0.0 \
|
|
|
--port 8101 \
|
|
|
- --gpu-memory-utilization 0.90 \
|
|
|
+ --gpu-memory-utilization 0.85 \
|
|
|
--max-log-len 1000 \
|
|
|
--trust-remote-code \
|
|
|
--max-model-len 65536 \
|
|
|
--max-num-batched-tokens 65536 \
|
|
|
--uvicorn-log-level info \
|
|
|
- --limit-mm-per-prompt '{"image": 5}' \
|
|
|
+ --limit-mm-per-prompt '{"image": 3}' \
|
|
|
--chat-template-content-format string \
|
|
|
--data-parallel-size 3 \
|
|
|
--max-num-seqs 32 \
|
|
|
@@ -60,6 +61,17 @@ status() {
|
|
|
fi
|
|
|
}
|
|
|
|
|
|
+# 修复后的 launch_model_vllm.sh
|
|
|
+echo "🔧 注册 DotsOCR 模型到 vLLM..."
|
|
|
+vllm_path=$(which vllm)
|
|
|
+if ! grep -q "from DotsOCR import modeling_dots_ocr_vllm" "$vllm_path"; then
|
|
|
+ sed -i '/^from vllm\.entrypoints\.cli\.main import main$/a\
|
|
|
+from DotsOCR import modeling_dots_ocr_vllm' "$vllm_path"
|
|
|
+ echo "✅ DotsOCR 模型已注册到 vLLM"
|
|
|
+else
|
|
|
+ echo "✅ DotsOCR 模型已经注册过了"
|
|
|
+fi
|
|
|
+
|
|
|
case "$1" in
|
|
|
start)
|
|
|
start
|