2 月之前 · 745954ca08
--- a/docker/china/Dockerfile
+++ b/docker/china/Dockerfile
@@ -1,12 +1,8 @@
 
				-# Use DaoCloud mirrored sglang image for China region
			
 
				-FROM docker.m.daocloud.io/lmsysorg/sglang:v0.4.10.post2-cu126
			
 
				-# For blackwell GPU, use the following line instead:
			
 
				-# FROM docker.m.daocloud.io/lmsysorg/sglang:v0.4.10.post2-cu128-b200
			
 
				+# Use DaoCloud mirrored vllm image for China region
			
 
				+FROM docker.m.daocloud.io/vllm/vllm-openai:v0.10.1.1
			
 
				 
			
 
				-# Use the official sglang image
			
 
				-# FROM lmsysorg/sglang:v0.4.10.post2-cu126
			
 
				-# For blackwell GPU, use the following line instead:
			
 
				-# FROM lmsysorg/sglang:v0.4.10.post2-cu128-b200
			
 
				+# Use the official vllm image
			
 
				+# FROM vllm/vllm-openai:v0.10.1.1
			
 
				 
			
 
				 # Install libgl for opencv support & Noto fonts for Chinese characters
			
 
				 RUN apt-get update && \
			
--- a/docker/compose.yaml
+++ b/docker/compose.yaml
@@ -1,21 +1,19 @@
 
				 services:
			
 
				-  mineru-sglang-server:
			
 
				-    image: mineru-sglang:latest
			
 
				-    container_name: mineru-sglang-server
			
 
				+  mineru-vllm-server:
			
 
				+    image: mineru-vllm:latest
			
 
				+    container_name: mineru-vllm-server
			
 
				     restart: always
			
 
				-    profiles: ["sglang-server"]
			
 
				+    profiles: ["vllm-server"]
			
 
				     ports:
			
 
				       - 30000:30000
			
 
				     environment:
			
 
				       MINERU_MODEL_SOURCE: local
			
 
				-    entrypoint: mineru-sglang-server
			
 
				+    entrypoint: mineru-vllm-server
			
 
				     command:
			
 
				       --host 0.0.0.0
			
 
				       --port 30000
			
 
				-      # --enable-torch-compile  # You can also enable torch.compile to accelerate inference speed by approximately 15%
			
 
				-      # --dp-size 2  # If using multiple GPUs, increase throughput using sglang's multi-GPU parallel mode
			
 
				-      # --tp-size 2  # If you have more than one GPU, you can expand available VRAM using tensor parallelism (TP) mode.
			
 
				-      # --mem-fraction-static 0.5  # If running on a single GPU and encountering VRAM shortage, reduce the KV cache size by this parameter, if VRAM issues persist, try lowering it further to `0.4` or below.
			
 
				+      # --data-parallel-size 2  # If using multiple GPUs, increase throughput using vllm's multi-GPU parallel mode
			
 
				+      # --gpu-memory-utilization 0.5  # If running on a single GPU and encountering VRAM shortage, reduce the KV cache size by this parameter, if VRAM issues persist, try lowering it further to `0.4` or below.
			
 
				     ulimits:
			
 
				       memlock: -1
			
 
				       stack: 67108864
			
@@ -31,7 +29,7 @@ services:
 
				               capabilities: [gpu]
			
 
				 
			
 
				   mineru-api:
			
 
				-    image: mineru-sglang:latest
			
 
				+    image: mineru-vllm:latest
			
 
				     container_name: mineru-api
			
 
				     restart: always
			
 
				     profiles: ["api"]
			
@@ -43,11 +41,9 @@ services:
 
				     command:
			
 
				       --host 0.0.0.0
			
 
				       --port 8000
			
 
				-      # parameters for sglang-engine
			
 
				-      # --enable-torch-compile  # You can also enable torch.compile to accelerate inference speed by approximately 15%
			
 
				-      # --dp-size 2  # If using multiple GPUs, increase throughput using sglang's multi-GPU parallel mode
			
 
				-      # --tp-size 2  # If you have more than one GPU, you can expand available VRAM using tensor parallelism (TP) mode.
			
 
				-      # --mem-fraction-static 0.5  # If running on a single GPU and encountering VRAM shortage, reduce the KV cache size by this parameter, if VRAM issues persist, try lowering it further to `0.4` or below.
			
 
				+      # parameters for vllm-engine
			
 
				+      # --data-parallel-size 2  # If using multiple GPUs, increase throughput using vllm's multi-GPU parallel mode
			
 
				+      # --gpu-memory-utilization 0.5  # If running on a single GPU and encountering VRAM shortage, reduce the KV cache size by this parameter, if VRAM issues persist, try lowering it further to `0.4` or below.
			
 
				     ulimits:
			
 
				       memlock: -1
			
 
				       stack: 67108864
			
@@ -61,7 +57,7 @@ services:
 
				               capabilities: [ gpu ]
			
 
				 
			
 
				   mineru-gradio:
			
 
				-    image: mineru-sglang:latest
			
 
				+    image: mineru-vllm:latest
			
 
				     container_name: mineru-gradio
			
 
				     restart: always
			
 
				     profiles: ["gradio"]
			
@@ -73,14 +69,12 @@ services:
 
				     command:
			
 
				       --server-name 0.0.0.0
			
 
				       --server-port 7860
			
 
				-      --enable-sglang-engine true  # Enable the sglang engine for Gradio
			
 
				+      --enable-vllm-engine true  # Enable the vllm engine for Gradio
			
 
				       # --enable-api false  # If you want to disable the API, set this to false
			
 
				       # --max-convert-pages 20  # If you want to limit the number of pages for conversion, set this to a specific number
			
 
				-      # parameters for sglang-engine
			
 
				-      # --enable-torch-compile  # You can also enable torch.compile to accelerate inference speed by approximately 15%
			
 
				-      # --dp-size 2  # If using multiple GPUs, increase throughput using sglang's multi-GPU parallel mode
			
 
				-      # --tp-size 2  # If you have more than one GPU, you can expand available VRAM using tensor parallelism (TP) mode.
			
 
				-      # --mem-fraction-static 0.5  # If running on a single GPU and encountering VRAM shortage, reduce the KV cache size by this parameter, if VRAM issues persist, try lowering it further to `0.4` or below.
			
 
				+      # parameters for vllm-engine
			
 
				+      # --data-parallel-size 2  # If using multiple GPUs, increase throughput using vllm's multi-GPU parallel mode
			
 
				+      # --gpu-memory-utilization 0.5  # If running on a single GPU and encountering VRAM shortage, reduce the KV cache size by this parameter, if VRAM issues persist, try lowering it further to `0.4` or below.
			
 
				     ulimits:
			
 
				       memlock: -1
			
 
				       stack: 67108864
			
--- a/docker/global/Dockerfile
+++ b/docker/global/Dockerfile
@@ -1,7 +1,5 @@
 
				-# Use the official sglang image
			
 
				-FROM lmsysorg/sglang:v0.4.10.post2-cu126
			
 
				-# For blackwell GPU, use the following line instead:
			
 
				-# FROM lmsysorg/sglang:v0.4.10.post2-cu128-b200
			
 
				+# Use the official vllm image
			
 
				+FROM vllm/vllm-openai:v0.10.1.1
			
 
				 
			
 
				 # Install libgl for opencv support & Noto fonts for Chinese characters
			
 
				 RUN apt-get update && \
			
--- a/docs/en/quick_start/docker_deployment.md
+++ b/docs/en/quick_start/docker_deployment.md
@@ -35,7 +35,7 @@ docker run --gpus all \
 
				 ```
			
 
				 
			
 
				 After executing this command, you will enter the Docker container's interactive terminal with some ports mapped for potential services. You can directly run MinerU-related commands within the container to use MinerU's features.
			
 
				-You can also directly start MinerU services by replacing `/bin/bash` with service startup commands. For detailed instructions, please refer to the [Start the service via command](https://opendatalab.github.io/MinerU/usage/quick_usage/#advanced-usage-via-api-webui-sglang-clientserver).
			
 
				+You can also directly start MinerU services by replacing `/bin/bash` with service startup commands. For detailed instructions, please refer to the [Start the service via command](https://opendatalab.github.io/MinerU/usage/quick_usage/#advanced-usage-via-api-webui-http-clientserver).
			
 
				 
			
 
				 ## Start Services Directly with Docker Compose
			
 
				 
			
--- a/docs/en/quick_start/extension_modules.md
+++ b/docs/en/quick_start/extension_modules.md
@@ -22,7 +22,7 @@ uv pip install mineru[all]
 
				 
			
 
				 ---
			
 
				 
			
 
				-### Installing Lightweight Client to Connect to sglang-server
			
 
				+### Installing Lightweight Client to Connect to vllm-server
			
 
				 If you need to install a lightweight client on edge devices to connect to `vllm-server`, you can install the basic mineru package, which is very lightweight and suitable for devices with only CPU and network connectivity.
			
 
				 ```bash
			
 
				 uv pip install mineru
			
--- a/docs/en/usage/advanced_cli_parameters.md
+++ b/docs/en/usage/advanced_cli_parameters.md
@@ -40,7 +40,7 @@
 
				 > 
			
 
				 > - If you have multiple graphics cards and need to specify cards 0 and 1, using multi-card parallelism to start `vllm-server`, you can use the following command:
			
 
				 >   ```bash
			
 
				->   CUDA_VISIBLE_DEVICES=0,1 mineru-sglang-server --port 30000 --data-parallel-size 2
			
 
				+>   CUDA_VISIBLE_DEVICES=0,1 mineru-vllm-server --port 30000 --data-parallel-size 2
			
 
				 >   ```
			
 
				 >       
			
 
				 > - If you have multiple graphics cards and need to start two `fastapi` services on cards 0 and 1, listening on different ports respectively, you can use the following commands:
			
--- a/docs/en/usage/quick_usage.md
+++ b/docs/en/usage/quick_usage.md
@@ -29,7 +29,7 @@ mineru -p <input_path> -o <output_path>
 
				 mineru -p <input_path> -o <output_path> -b vlm-transformers
			
 
				 ```
			
 
				 > [!TIP]
			
 
				-> The vlm backend additionally supports `sglang` acceleration. Compared to the `transformers` backend, `sglang` can achieve 20-30x speedup. You can check the installation method for the complete package supporting `sglang` acceleration in the [Extension Modules Installation Guide](../quick_start/extension_modules.md).
			
 
				+> The vlm backend additionally supports `vllm` acceleration. Compared to the `transformers` backend, `vllm` can achieve 20-30x speedup. You can check the installation method for the complete package supporting `vllm` acceleration in the [Extension Modules Installation Guide](../quick_start/extension_modules.md).
			
 
				 
			
 
				 If you need to adjust parsing options through custom parameters, you can also check the more detailed [Command Line Tools Usage Instructions](./cli_tools.md) in the documentation.
			
 
				 
			
--- a/docs/zh/quick_start/docker_deployment.md
+++ b/docs/zh/quick_start/docker_deployment.md
@@ -35,7 +35,7 @@ docker run --gpus all \
 
				 ```
			
 
				 
			
 
				 执行该命令后，您将进入到Docker容器的交互式终端，并映射了一些端口用于可能会使用的服务，您可以直接在容器内运行MinerU相关命令来使用MinerU的功能。
			
 
				-您也可以直接通过替换`/bin/bash`为服务启动命令来启动MinerU服务，详细说明请参考[通过命令启动服务](https://opendatalab.github.io/MinerU/zh/usage/quick_usage/#apiwebuisglang-clientserver)。
			
 
				+您也可以直接通过替换`/bin/bash`为服务启动命令来启动MinerU服务，详细说明请参考[通过命令启动服务](https://opendatalab.github.io/MinerU/zh/usage/quick_usage/#apiwebuihttp-clientserver)。
			
 
				 
			
 
				 ## 通过 Docker Compose 直接启动服务