compose.yaml 1.2 KB

1234567891011121314151617181920212223242526272829303132
  1. # Documentation:
  2. # https://docs.sglang.ai/backend/server_arguments.html#common-launch-commands
  3. services:
  4. mineru-sglang:
  5. image: mineru-sglang:latest
  6. container_name: mineru-sglang
  7. restart: always
  8. ports:
  9. - 30000:30000
  10. environment:
  11. MINERU_MODEL_SOURCE: local
  12. entrypoint: mineru-sglang-server
  13. command:
  14. --host 0.0.0.0
  15. --port 30000
  16. # --enable-torch-compile # You can also enable torch.compile to accelerate inference speed by approximately 15%
  17. # --dp 2 # If you have more than two GPUs with 24GB VRAM or above, you can use sglang's multi-GPU parallel mode to increase throughput
  18. # --tp 2 # If you have two GPUs with 12GB or 16GB VRAM, you can use the Tensor Parallel (TP) mode
  19. # --mem-fraction-static 0.7 # If you have two GPUs with 11GB VRAM, in addition to Tensor Parallel mode, you need to reduce the KV cache size
  20. ulimits:
  21. memlock: -1
  22. stack: 67108864
  23. ipc: host
  24. healthcheck:
  25. test: ["CMD-SHELL", "curl -f http://localhost:30000/health || exit 1"]
  26. deploy:
  27. resources:
  28. reservations:
  29. devices:
  30. - driver: nvidia
  31. device_ids: ["0"]
  32. capabilities: [gpu]