| 1234567891011121314151617181920212223242526272829303132333435363738394041424344 |
- version: '3.8'
- services:
- dots-ocr-server:
- image: dots-ocr:latest
- container_name: dots-ocr-container
- ports:
- - "8000:8000"
- volumes:
- #download model to local,model url:https://www.modelscope.cn/models/rednote-hilab/dots.ocr
- - ./model/dots.ocr:/workspace/weights/DotsOCR
- environment:
- - PYTHONPATH=/workspace/weights:$PYTHONPATH
- deploy:
- resources:
- reservations:
- devices:
- - capabilities: [gpu]
- device_ids: ['0']
- entrypoint: /bin/bash
- command:
- - -c
- - |
- set -ex;
- echo '--- Starting setup and server ---';
- echo 'Modifying vllm entrypoint...';
- # This sed command patches the vllm entrypoint script to import the custom modeling code.
- sed -i '/^from vllm\.entrypoints\.cli\.main import main/a from DotsOCR import modeling_dots_ocr_vllm' $(which vllm) && \
- echo 'vllm script after patch:';
- # Show the patched part of the vllm script for verification.
- grep -A 1 'from vllm.entrypoints.cli.main import main' $(which vllm) && \
- echo 'Starting server...';
- # Use 'exec' to replace the current shell process with the vllm server,
- # ensuring logs are properly forwarded to Docker's standard output.
- exec vllm serve /workspace/weights/DotsOCR \
- --tensor-parallel-size 1 \
- --gpu-memory-utilization 0.8 \
- --chat-template-content-format string \
- --served-model-name dotsocr-model \
- --trust-remote-code
|