|
|
@@ -0,0 +1,40 @@
|
|
|
+version: '3.8'
|
|
|
+
|
|
|
+services:
|
|
|
+ dots-ocr-server:
|
|
|
+ image: dots-ocr:20250801
|
|
|
+ container_name: dots-ocr-container
|
|
|
+ ports:
|
|
|
+ - "8001:8000"
|
|
|
+ volumes:
|
|
|
+ - /app/data/gpt/source-model/dots.ocr:/workspace/weights/DotsOCR
|
|
|
+ environment:
|
|
|
+ - CUDA_VISIBLE_DEVICES=0
|
|
|
+ - PYTHONPATH=/workspace/weights:$PYTHONPATH
|
|
|
+ deploy:
|
|
|
+ resources:
|
|
|
+ reservations:
|
|
|
+ devices:
|
|
|
+ - capabilities: [gpu]
|
|
|
+ device_ids: ['0']
|
|
|
+ entrypoint: /bin/bash
|
|
|
+ command:
|
|
|
+ - -c
|
|
|
+ - |
|
|
|
+ set -ex;
|
|
|
+ echo '--- Starting setup and server ---';
|
|
|
+ echo 'Modifying vllm entrypoint...';
|
|
|
+ # This sed command patches the vllm entrypoint script to import the custom modeling code.
|
|
|
+ sed -i '/^from vllm\.entrypoints\.cli\.main import main/a from DotsOCR import modeling_dots_ocr_vllm' $(which vllm) && \
|
|
|
+ echo 'vllm script after patch:';
|
|
|
+ # Show the patched part of the vllm script for verification.
|
|
|
+ grep -A 1 'from vllm.entrypoints.cli.main import main' $(which vllm) && \
|
|
|
+ echo 'Starting server...';
|
|
|
+ # Use 'exec' to replace the current shell process with the vllm server,
|
|
|
+ # ensuring logs are properly forwarded to Docker's standard output.
|
|
|
+ exec vllm serve /workspace/weights/DotsOCR \
|
|
|
+ --tensor-parallel-size 1 \
|
|
|
+ --gpu-memory-utilization 0.8 \
|
|
|
+ --chat-template-content-format string \
|
|
|
+ --served-model-name dotsocr-model \
|
|
|
+ --trust-remote-code
|