compose.yaml 1.2 KB

1234567891011121314151617181920212223242526272829303132
  1. # Documentation:
  2. # https://docs.sglang.ai/backend/server_arguments.html#common-launch-commands
  3. services:
  4. mineru-sglang:
  5. image: mineru-sglang:latest
  6. container_name: mineru-sglang
  7. restart: always
  8. ports:
  9. - 30000:30000
  10. environment:
  11. MINERU_MODEL_SOURCE: local
  12. entrypoint: mineru-sglang-server
  13. command:
  14. --host 0.0.0.0
  15. --port 30000
  16. # --enable-torch-compile # You can also enable torch.compile to accelerate inference speed by approximately 15%
  17. # --dp-size 2 # If using multiple GPUs, increase throughput using sglang's multi-GPU parallel mode
  18. # --tp-size 2 # If you have more than one GPU, you can expand available VRAM using tensor parallelism (TP) mode.
  19. # --mem-fraction-static 0.5 # If running on a single GPU and encountering VRAM shortage, reduce the KV cache size by this parameter, if VRAM issues persist, try lowering it further to `0.4` or below.
  20. ulimits:
  21. memlock: -1
  22. stack: 67108864
  23. ipc: host
  24. healthcheck:
  25. test: ["CMD-SHELL", "curl -f http://localhost:30000/health || exit 1"]
  26. deploy:
  27. resources:
  28. reservations:
  29. devices:
  30. - driver: nvidia
  31. device_ids: ["0"]
  32. capabilities: [gpu]