3 周之前 · d67be0c7de
--- a/docker/compose.yaml
+++ b/docker/compose.yaml
@@ -73,6 +73,9 @@ services:
 
				       # parameters for vllm-engine
			
 
				       # --data-parallel-size 2  # If using multiple GPUs, increase throughput using vllm's multi-GPU parallel mode
			
 
				       # --gpu-memory-utilization 0.5  # If running on a single GPU and encountering VRAM shortage, reduce the KV cache size by this parameter, if VRAM issues persist, try lowering it further to `0.4` or below.
			
 
				+      # parameters for lmdeploy-engine
			
 
				+      # --dp 2  # If using multiple GPUs, increase throughput using lmdeploy's multi-GPU parallel mode
			
 
				+      # --cache-max-entry-count 0.5  # If running on a single GPU and encountering VRAM shortage, reduce the KV cache size by this parameter, if VRAM issues persist, try lowering it further to `0.4` or below.
			
 
				     ulimits:
			
 
				       memlock: -1
			
 
				       stack: 67108864
			
@@ -98,12 +101,17 @@ services:
 
				     command:
			
 
				       --server-name 0.0.0.0
			
 
				       --server-port 7860
			
 
				-      --enable-vllm-engine true  # Enable the vllm engine for Gradio
			
 
				       # --enable-api false  # If you want to disable the API, set this to false
			
 
				       # --max-convert-pages 20  # If you want to limit the number of pages for conversion, set this to a specific number
			
 
				       # parameters for vllm-engine
			
 
				+      --enable-vllm-engine true  # Enable the vllm engine for Gradio
			
 
				       # --data-parallel-size 2  # If using multiple GPUs, increase throughput using vllm's multi-GPU parallel mode
			
 
				       # --gpu-memory-utilization 0.5  # If running on a single GPU and encountering VRAM shortage, reduce the KV cache size by this parameter, if VRAM issues persist, try lowering it further to `0.4` or below.
			
 
				+      # parameters for lmdeploy-engine
			
 
				+      # !!!The lmdeploy and vLLM engines cannot be enabled simultaneously. Please ensure that at most only one engine is active at any given time.!!!
			
 
				+      # --enable-lmdeploy-engine true  # Enable the lmdeploy engine for Gradio
			
 
				+      # --dp 2  # If using multiple GPUs, increase throughput using lmdeploy's multi-GPU parallel mode
			
 
				+      # --cache-max-entry-count 0.5  # If running on a single GPU and encountering VRAM shortage, reduce the KV cache size by this parameter, if VRAM issues persist, try lowering it further to `0.4` or below.
			
 
				     ulimits:
			
 
				       memlock: -1
			
 
				       stack: 67108864