4 月之前 · 3104bc2c9a
--- a/docker/compose.yaml
+++ b/docker/compose.yaml
@@ -1,26 +1,22 @@
 
				 # Documentation:
			
 
				-# https://github.com/sgl-project/sglang/blob/main/python/sglang/srt/server_args.py
			
 
				-# https://github.com/opendatalab/MinerU/tree/master?tab=readme-ov-file#23-using-sglang-to-accelerate-vlm-model-inference
			
 
				+# https://docs.sglang.ai/backend/server_arguments.html#common-launch-commands
			
 
				 services:
			
 
				   mineru-sglang:
			
 
				     image: mineru-sglang:latest
			
 
				     container_name: mineru-sglang
			
 
				-    volumes:
			
 
				-      # - ${HF_HOME}:/root/.cache/huggingface
			
 
				-      # - ${MODELSCOPE_CACHE}:/root/.cache/modelscope
			
 
				-      - ./inductor_root_cache:/root/inductor_root_cache
			
 
				     restart: always
			
 
				     ports:
			
 
				       - 30000:30000
			
 
				     environment:
			
 
				       MINERU_MODEL_SOURCE: local
			
 
				-      # TORCHINDUCTOR_CACHE_DIR: /root/inductor_root_cache
			
 
				-      # NO_PROXY: 0.0.0.0,localhost,127.0.0.1
			
 
				     entrypoint: mineru-sglang-server
			
 
				     command:
			
 
				       --host 0.0.0.0
			
 
				       --port 30000
			
 
				-      # --enable-torch-compile
			
 
				+      # --enable-torch-compile  # You can also enable torch.compile to accelerate inference speed by approximately 15%
			
 
				+      # --dp 2  # If you have more than two GPUs with 24GB VRAM or above, you can use sglang's multi-GPU parallel mode to increase throughput  
			
 
				+      # --tp 2  # If you have two GPUs with 12GB or 16GB VRAM, you can use the Tensor Parallel (TP) mode
			
 
				+      # --mem-fraction-static 0.7  # If you have two GPUs with 11GB VRAM, in addition to Tensor Parallel mode, you need to reduce the KV cache size
			
 
				     ulimits:
			
 
				       memlock: -1
			
 
				       stack: 67108864
			
@@ -33,4 +29,4 @@ services:
 
				           devices:
			
 
				             - driver: nvidia
			
 
				               device_ids: ["0"]
			
 
				-              capabilities: [gpu]
			
 
				+              capabilities: [gpu]