compose.yaml 2.9 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788
  1. services:
  2. mineru-vllm-server:
  3. image: mineru-vllm:latest
  4. container_name: mineru-vllm-server
  5. restart: always
  6. profiles: ["vllm-server"]
  7. ports:
  8. - 30000:30000
  9. environment:
  10. MINERU_MODEL_SOURCE: local
  11. entrypoint: mineru-vllm-server
  12. command:
  13. --host 0.0.0.0
  14. --port 30000
  15. # --data-parallel-size 2 # If using multiple GPUs, increase throughput using vllm's multi-GPU parallel mode
  16. # --gpu-memory-utilization 0.5 # If running on a single GPU and encountering VRAM shortage, reduce the KV cache size by this parameter, if VRAM issues persist, try lowering it further to `0.4` or below.
  17. ulimits:
  18. memlock: -1
  19. stack: 67108864
  20. ipc: host
  21. healthcheck:
  22. test: ["CMD-SHELL", "curl -f http://localhost:30000/health || exit 1"]
  23. deploy:
  24. resources:
  25. reservations:
  26. devices:
  27. - driver: nvidia
  28. device_ids: ["0"]
  29. capabilities: [gpu]
  30. mineru-api:
  31. image: mineru-vllm:latest
  32. container_name: mineru-api
  33. restart: always
  34. profiles: ["api"]
  35. ports:
  36. - 8000:8000
  37. environment:
  38. MINERU_MODEL_SOURCE: local
  39. entrypoint: mineru-api
  40. command:
  41. --host 0.0.0.0
  42. --port 8000
  43. # parameters for vllm-engine
  44. # --data-parallel-size 2 # If using multiple GPUs, increase throughput using vllm's multi-GPU parallel mode
  45. # --gpu-memory-utilization 0.5 # If running on a single GPU and encountering VRAM shortage, reduce the KV cache size by this parameter, if VRAM issues persist, try lowering it further to `0.4` or below.
  46. ulimits:
  47. memlock: -1
  48. stack: 67108864
  49. ipc: host
  50. deploy:
  51. resources:
  52. reservations:
  53. devices:
  54. - driver: nvidia
  55. device_ids: [ "0" ]
  56. capabilities: [ gpu ]
  57. mineru-gradio:
  58. image: mineru-vllm:latest
  59. container_name: mineru-gradio
  60. restart: always
  61. profiles: ["gradio"]
  62. ports:
  63. - 7860:7860
  64. environment:
  65. MINERU_MODEL_SOURCE: local
  66. entrypoint: mineru-gradio
  67. command:
  68. --server-name 0.0.0.0
  69. --server-port 7860
  70. --enable-vllm-engine true # Enable the vllm engine for Gradio
  71. # --enable-api false # If you want to disable the API, set this to false
  72. # --max-convert-pages 20 # If you want to limit the number of pages for conversion, set this to a specific number
  73. # parameters for vllm-engine
  74. # --data-parallel-size 2 # If using multiple GPUs, increase throughput using vllm's multi-GPU parallel mode
  75. # --gpu-memory-utilization 0.5 # If running on a single GPU and encountering VRAM shortage, reduce the KV cache size by this parameter, if VRAM issues persist, try lowering it further to `0.4` or below.
  76. ulimits:
  77. memlock: -1
  78. stack: 67108864
  79. ipc: host
  80. deploy:
  81. resources:
  82. reservations:
  83. devices:
  84. - driver: nvidia
  85. device_ids: [ "0" ]
  86. capabilities: [ gpu ]