2 months ago · 745954ca08
--- a/docker/china/Dockerfile
+++ b/docker/china/Dockerfile
@@ -1,12 +1,8 @@
 
															-# Use DaoCloud mirrored sglang image for China region
														
 
															-FROM docker.m.daocloud.io/lmsysorg/sglang:v0.4.10.post2-cu126
														
 
															-# For blackwell GPU, use the following line instead:
														
 
															-# FROM docker.m.daocloud.io/lmsysorg/sglang:v0.4.10.post2-cu128-b200
														
 
															+# Use DaoCloud mirrored vllm image for China region
														
 
															+FROM docker.m.daocloud.io/vllm/vllm-openai:v0.10.1.1
														
 
															-# Use the official sglang image
														
 
															-# FROM lmsysorg/sglang:v0.4.10.post2-cu126
														
 
															-# For blackwell GPU, use the following line instead:
														
 
															-# FROM lmsysorg/sglang:v0.4.10.post2-cu128-b200
														
 
															+# Use the official vllm image
														
 
															+# FROM vllm/vllm-openai:v0.10.1.1
														
 
															 # Install libgl for opencv support & Noto fonts for Chinese characters
														
 
															 RUN apt-get update && \
														
--- a/docker/compose.yaml
+++ b/docker/compose.yaml
@@ -1,21 +1,19 @@
 
															 services:
														
 
															-  mineru-sglang-server:
														
 
															-    image: mineru-sglang:latest
														
 
															-    container_name: mineru-sglang-server
														
 
															+  mineru-vllm-server:
														
 
															+    image: mineru-vllm:latest
														
 
															+    container_name: mineru-vllm-server
														
 
															     restart: always
														
 
															-    profiles: ["sglang-server"]
														
 
															+    profiles: ["vllm-server"]
														
 
															     ports:
														
 
															       - 30000:30000
														
 
															     environment:
														
 
															       MINERU_MODEL_SOURCE: local
														
 
															-    entrypoint: mineru-sglang-server
														
 
															+    entrypoint: mineru-vllm-server
														
 
															     command:
														
 
															       --host 0.0.0.0
														
 
															       --port 30000
														
 
															-      # --enable-torch-compile  # You can also enable torch.compile to accelerate inference speed by approximately 15%
														
 
															-      # --dp-size 2  # If using multiple GPUs, increase throughput using sglang's multi-GPU parallel mode
														
 
															-      # --tp-size 2  # If you have more than one GPU, you can expand available VRAM using tensor parallelism (TP) mode.
														
 
															-      # --mem-fraction-static 0.5  # If running on a single GPU and encountering VRAM shortage, reduce the KV cache size by this parameter, if VRAM issues persist, try lowering it further to `0.4` or below.
														
 
															+      # --data-parallel-size 2  # If using multiple GPUs, increase throughput using vllm's multi-GPU parallel mode
														
 
															+      # --gpu-memory-utilization 0.5  # If running on a single GPU and encountering VRAM shortage, reduce the KV cache size by this parameter, if VRAM issues persist, try lowering it further to `0.4` or below.
														
 
															     ulimits:
														
 
															       memlock: -1
														
 
															       stack: 67108864
														
@@ -31,7 +29,7 @@ services:
 
															               capabilities: [gpu]
														
 
															   mineru-api:
														
 
															-    image: mineru-sglang:latest
														
 
															+    image: mineru-vllm:latest
														
 
															     container_name: mineru-api
														
 
															     restart: always
														
 
															     profiles: ["api"]
														
@@ -43,11 +41,9 @@ services:
 
															     command:
														
 
															       --host 0.0.0.0
														
 
															       --port 8000
														
 
															-      # parameters for sglang-engine
														
 
															-      # --enable-torch-compile  # You can also enable torch.compile to accelerate inference speed by approximately 15%
														
 
															-      # --dp-size 2  # If using multiple GPUs, increase throughput using sglang's multi-GPU parallel mode
														
 
															-      # --tp-size 2  # If you have more than one GPU, you can expand available VRAM using tensor parallelism (TP) mode.
														
 
															-      # --mem-fraction-static 0.5  # If running on a single GPU and encountering VRAM shortage, reduce the KV cache size by this parameter, if VRAM issues persist, try lowering it further to `0.4` or below.
														
 
															+      # parameters for vllm-engine
														
 
															+      # --data-parallel-size 2  # If using multiple GPUs, increase throughput using vllm's multi-GPU parallel mode
														
 
															+      # --gpu-memory-utilization 0.5  # If running on a single GPU and encountering VRAM shortage, reduce the KV cache size by this parameter, if VRAM issues persist, try lowering it further to `0.4` or below.
														
 
															     ulimits:
														
 
															       memlock: -1
														
 
															       stack: 67108864
														
@@ -61,7 +57,7 @@ services:
 
															               capabilities: [ gpu ]
														
 
															   mineru-gradio:
														
 
															-    image: mineru-sglang:latest
														
 
															+    image: mineru-vllm:latest
														
 
															     container_name: mineru-gradio
														
 
															     restart: always
														
 
															     profiles: ["gradio"]
														
@@ -73,14 +69,12 @@ services:
 
															     command:
														
 
															       --server-name 0.0.0.0
														
 
															       --server-port 7860
														
 
															-      --enable-sglang-engine true  # Enable the sglang engine for Gradio
														
 
															+      --enable-vllm-engine true  # Enable the vllm engine for Gradio
														
 
															       # --enable-api false  # If you want to disable the API, set this to false
														
 
															       # --max-convert-pages 20  # If you want to limit the number of pages for conversion, set this to a specific number
														
 
															-      # parameters for sglang-engine
														
 
															-      # --enable-torch-compile  # You can also enable torch.compile to accelerate inference speed by approximately 15%
														
 
															-      # --dp-size 2  # If using multiple GPUs, increase throughput using sglang's multi-GPU parallel mode
														
 
															-      # --tp-size 2  # If you have more than one GPU, you can expand available VRAM using tensor parallelism (TP) mode.
														
 
															-      # --mem-fraction-static 0.5  # If running on a single GPU and encountering VRAM shortage, reduce the KV cache size by this parameter, if VRAM issues persist, try lowering it further to `0.4` or below.
														
 
															+      # parameters for vllm-engine
														
 
															+      # --data-parallel-size 2  # If using multiple GPUs, increase throughput using vllm's multi-GPU parallel mode
														
 
															+      # --gpu-memory-utilization 0.5  # If running on a single GPU and encountering VRAM shortage, reduce the KV cache size by this parameter, if VRAM issues persist, try lowering it further to `0.4` or below.
														
 
															     ulimits:
														
 
															       memlock: -1
														
 
															       stack: 67108864
														
--- a/docker/global/Dockerfile
+++ b/docker/global/Dockerfile
@@ -1,7 +1,5 @@
 
															-# Use the official sglang image
														
 
															-FROM lmsysorg/sglang:v0.4.10.post2-cu126
														
 
															-# For blackwell GPU, use the following line instead:
														
 
															-# FROM lmsysorg/sglang:v0.4.10.post2-cu128-b200
														
 
															+# Use the official vllm image
														
 
															+FROM vllm/vllm-openai:v0.10.1.1
														
 
															 # Install libgl for opencv support & Noto fonts for Chinese characters
														
 
															 RUN apt-get update && \
														
--- a/docs/en/quick_start/docker_deployment.md
+++ b/docs/en/quick_start/docker_deployment.md
@@ -35,7 +35,7 @@ docker run --gpus all \
 
															 ```
														
 
															 After executing this command, you will enter the Docker container's interactive terminal with some ports mapped for potential services. You can directly run MinerU-related commands within the container to use MinerU's features.
														
 
															-You can also directly start MinerU services by replacing `/bin/bash` with service startup commands. For detailed instructions, please refer to the [Start the service via command](https://opendatalab.github.io/MinerU/usage/quick_usage/#advanced-usage-via-api-webui-sglang-clientserver).
														
 
															+You can also directly start MinerU services by replacing `/bin/bash` with service startup commands. For detailed instructions, please refer to the [Start the service via command](https://opendatalab.github.io/MinerU/usage/quick_usage/#advanced-usage-via-api-webui-http-clientserver).
														
 
															 ## Start Services Directly with Docker Compose
														
--- a/docs/en/quick_start/extension_modules.md
+++ b/docs/en/quick_start/extension_modules.md
@@ -22,7 +22,7 @@ uv pip install mineru[all]
 
															 ---
														
 
															-### Installing Lightweight Client to Connect to sglang-server
														
 
															+### Installing Lightweight Client to Connect to vllm-server
														
 
															 If you need to install a lightweight client on edge devices to connect to `vllm-server`, you can install the basic mineru package, which is very lightweight and suitable for devices with only CPU and network connectivity.
														
 
															 ```bash
														
 
															 uv pip install mineru
														
--- a/docs/en/usage/advanced_cli_parameters.md
+++ b/docs/en/usage/advanced_cli_parameters.md
@@ -40,7 +40,7 @@
 
															 > 
														
 
															 > - If you have multiple graphics cards and need to specify cards 0 and 1, using multi-card parallelism to start `vllm-server`, you can use the following command:
														
 
															 >   ```bash
														
 
															->   CUDA_VISIBLE_DEVICES=0,1 mineru-sglang-server --port 30000 --data-parallel-size 2
														
 
															+>   CUDA_VISIBLE_DEVICES=0,1 mineru-vllm-server --port 30000 --data-parallel-size 2
														
 
															 >   ```
														
 
															 >       
														
 
															 > - If you have multiple graphics cards and need to start two `fastapi` services on cards 0 and 1, listening on different ports respectively, you can use the following commands:
														
--- a/docs/en/usage/quick_usage.md
+++ b/docs/en/usage/quick_usage.md
@@ -29,7 +29,7 @@ mineru -p <input_path> -o <output_path>
 
															 mineru -p <input_path> -o <output_path> -b vlm-transformers
														
 
															 ```
														
 
															 > [!TIP]
														
 
															-> The vlm backend additionally supports `sglang` acceleration. Compared to the `transformers` backend, `sglang` can achieve 20-30x speedup. You can check the installation method for the complete package supporting `sglang` acceleration in the [Extension Modules Installation Guide](../quick_start/extension_modules.md).
														
 
															+> The vlm backend additionally supports `vllm` acceleration. Compared to the `transformers` backend, `vllm` can achieve 20-30x speedup. You can check the installation method for the complete package supporting `vllm` acceleration in the [Extension Modules Installation Guide](../quick_start/extension_modules.md).
														
 
															 If you need to adjust parsing options through custom parameters, you can also check the more detailed [Command Line Tools Usage Instructions](./cli_tools.md) in the documentation.
														
--- a/docs/zh/quick_start/docker_deployment.md
+++ b/docs/zh/quick_start/docker_deployment.md
@@ -35,7 +35,7 @@ docker run --gpus all \
 
															 ```
														
 
															 执行该命令后，您将进入到Docker容器的交互式终端，并映射了一些端口用于可能会使用的服务，您可以直接在容器内运行MinerU相关命令来使用MinerU的功能。
														
 
															-您也可以直接通过替换`/bin/bash`为服务启动命令来启动MinerU服务，详细说明请参考[通过命令启动服务](https://opendatalab.github.io/MinerU/zh/usage/quick_usage/#apiwebuisglang-clientserver)。
														
 
															+您也可以直接通过替换`/bin/bash`为服务启动命令来启动MinerU服务，详细说明请参考[通过命令启动服务](https://opendatalab.github.io/MinerU/zh/usage/quick_usage/#apiwebuihttp-clientserver)。
														
 
															 ## 通过 Docker Compose 直接启动服务