Browse Source

Merge pull request #3529 from opendatalab/release-2.5.1

Release 2.5.1
Xiaomeng Zhao 1 month ago
parent
commit
6586c7c01e
5 changed files with 20 additions and 4 deletions
  1. 1 1
      README.md
  2. 1 1
      README_zh-CN.md
  3. 9 1
      mineru/backend/vlm/vlm_analyze.py
  4. 8 0
      mineru/model/vlm_vllm_model/server.py
  5. 1 1
      pyproject.toml

+ 1 - 1
README.md

@@ -44,7 +44,7 @@
 
 
 # Changelog
 # Changelog
 
 
-- 2025/09/19 2.5.0 Released
+- 2025/09/19 2.5.1 Released
 
 
   We are officially releasing MinerU2.5, currently the most powerful multimodal large model for document parsing.
   We are officially releasing MinerU2.5, currently the most powerful multimodal large model for document parsing.
   With only 1.2B parameters, MinerU2.5's accuracy on the OmniDocBench benchmark comprehensively surpasses top-tier multimodal models like Gemini 2.5 Pro, GPT-4o, and Qwen2.5-VL-72B. It also significantly outperforms leading specialized models such as dots.ocr, MonkeyOCR, and PP-StructureV3.
   With only 1.2B parameters, MinerU2.5's accuracy on the OmniDocBench benchmark comprehensively surpasses top-tier multimodal models like Gemini 2.5 Pro, GPT-4o, and Qwen2.5-VL-72B. It also significantly outperforms leading specialized models such as dots.ocr, MonkeyOCR, and PP-StructureV3.

+ 1 - 1
README_zh-CN.md

@@ -44,7 +44,7 @@
 
 
 # 更新记录
 # 更新记录
 
 
-- 2025/09/19 2.5.0 发布
+- 2025/09/19 2.5.1 发布
   我们正式发布 MinerU2.5,当前最强文档解析多模态大模型。仅凭 1.2B 参数,MinerU2.5 在 OmniDocBench 文档解析评测中,精度已全面超越 Gemini2.5-Pro、GPT-4o、Qwen2.5-VL-72B等顶级多模态大模型,并显著领先于主流文档解析专用模型(如 dots.ocr, MonkeyOCR, PP-StructureV3 等)。
   我们正式发布 MinerU2.5,当前最强文档解析多模态大模型。仅凭 1.2B 参数,MinerU2.5 在 OmniDocBench 文档解析评测中,精度已全面超越 Gemini2.5-Pro、GPT-4o、Qwen2.5-VL-72B等顶级多模态大模型,并显著领先于主流文档解析专用模型(如 dots.ocr, MonkeyOCR, PP-StructureV3 等)。
   模型已发布至[HuggingFace](https://huggingface.co/opendatalab/MinerU2.5-2509-1.2B)和[ModelScope](https://huggingface.co/opendatalab/MinerU2.5-2509-1.2B)平台,欢迎大家下载使用!
   模型已发布至[HuggingFace](https://huggingface.co/opendatalab/MinerU2.5-2509-1.2B)和[ModelScope](https://huggingface.co/opendatalab/MinerU2.5-2509-1.2B)平台,欢迎大家下载使用!
   - 核心亮点
   - 核心亮点

+ 9 - 1
mineru/backend/vlm/vlm_analyze.py

@@ -14,6 +14,7 @@ from ...utils.model_utils import get_vram
 from ...utils.models_download_utils import auto_download_and_get_model_root_path
 from ...utils.models_download_utils import auto_download_and_get_model_root_path
 
 
 from mineru_vl_utils import MinerUClient
 from mineru_vl_utils import MinerUClient
+from packaging import version
 
 
 
 
 class ModelSingleton:
 class ModelSingleton:
@@ -52,7 +53,6 @@ class ModelSingleton:
                     except ImportError:
                     except ImportError:
                         raise ImportError("Please install transformers to use the transformers backend.")
                         raise ImportError("Please install transformers to use the transformers backend.")
 
 
-                    from packaging import version
                     if version.parse(transformers_version) >= version.parse("4.56.0"):
                     if version.parse(transformers_version) >= version.parse("4.56.0"):
                         dtype_key = "dtype"
                         dtype_key = "dtype"
                     else:
                     else:
@@ -88,24 +88,32 @@ class ModelSingleton:
                 elif backend == "vllm-engine":
                 elif backend == "vllm-engine":
                     try:
                     try:
                         import vllm
                         import vllm
+                        vllm_version = vllm.__version__
+                        from mineru_vl_utils import MinerULogitsProcessor
                     except ImportError:
                     except ImportError:
                         raise ImportError("Please install vllm to use the vllm-engine backend.")
                         raise ImportError("Please install vllm to use the vllm-engine backend.")
                     if "gpu_memory_utilization" not in kwargs:
                     if "gpu_memory_utilization" not in kwargs:
                         kwargs["gpu_memory_utilization"] = 0.5
                         kwargs["gpu_memory_utilization"] = 0.5
                     if "model" not in kwargs:
                     if "model" not in kwargs:
                         kwargs["model"] = model_path
                         kwargs["model"] = model_path
+                    if version.parse(vllm_version) >= version.parse("0.10.1") and "logits_processors" not in kwargs:
+                        kwargs["logits_processors"] = [MinerULogitsProcessor]
                     # 使用kwargs为 vllm初始化参数
                     # 使用kwargs为 vllm初始化参数
                     vllm_llm = vllm.LLM(**kwargs)
                     vllm_llm = vllm.LLM(**kwargs)
                 elif backend == "vllm-async-engine":
                 elif backend == "vllm-async-engine":
                     try:
                     try:
                         from vllm.engine.arg_utils import AsyncEngineArgs
                         from vllm.engine.arg_utils import AsyncEngineArgs
                         from vllm.v1.engine.async_llm import AsyncLLM
                         from vllm.v1.engine.async_llm import AsyncLLM
+                        from vllm import __version__ as vllm_version
+                        from mineru_vl_utils import MinerULogitsProcessor
                     except ImportError:
                     except ImportError:
                         raise ImportError("Please install vllm to use the vllm-async-engine backend.")
                         raise ImportError("Please install vllm to use the vllm-async-engine backend.")
                     if "gpu_memory_utilization" not in kwargs:
                     if "gpu_memory_utilization" not in kwargs:
                         kwargs["gpu_memory_utilization"] = 0.5
                         kwargs["gpu_memory_utilization"] = 0.5
                     if "model" not in kwargs:
                     if "model" not in kwargs:
                         kwargs["model"] = model_path
                         kwargs["model"] = model_path
+                    if version.parse(vllm_version) >= version.parse("0.10.1") and "logits_processors" not in kwargs:
+                        kwargs["logits_processors"] = [MinerULogitsProcessor]
                     # 使用kwargs为 vllm初始化参数
                     # 使用kwargs为 vllm初始化参数
                     vllm_async_llm = AsyncLLM.from_engine_args(AsyncEngineArgs(**kwargs))
                     vllm_async_llm = AsyncLLM.from_engine_args(AsyncEngineArgs(**kwargs))
             self._models[key] = MinerUClient(
             self._models[key] = MinerUClient(

+ 8 - 0
mineru/model/vlm_vllm_model/server.py

@@ -1,7 +1,10 @@
 import sys
 import sys
 
 
 from mineru.utils.models_download_utils import auto_download_and_get_model_root_path
 from mineru.utils.models_download_utils import auto_download_and_get_model_root_path
+
 from vllm.entrypoints.cli.main import main as vllm_main
 from vllm.entrypoints.cli.main import main as vllm_main
+from vllm import __version__ as vllm_version
+from packaging import version
 
 
 
 
 def main():
 def main():
@@ -9,6 +12,7 @@ def main():
 
 
     has_port_arg = False
     has_port_arg = False
     has_gpu_memory_utilization_arg = False
     has_gpu_memory_utilization_arg = False
+    has_logits_processors_arg = False
     model_path = None
     model_path = None
     model_arg_indices = []
     model_arg_indices = []
 
 
@@ -18,6 +22,8 @@ def main():
             has_port_arg = True
             has_port_arg = True
         if arg == "--gpu-memory-utilization" or arg.startswith("--gpu-memory-utilization="):
         if arg == "--gpu-memory-utilization" or arg.startswith("--gpu-memory-utilization="):
             has_gpu_memory_utilization_arg = True
             has_gpu_memory_utilization_arg = True
+        if arg == "--logits-processors" or arg.startswith("--logits-processors="):
+            has_logits_processors_arg = True
         if arg == "--model":
         if arg == "--model":
             if i + 1 < len(args):
             if i + 1 < len(args):
                 model_path = args[i + 1]
                 model_path = args[i + 1]
@@ -38,6 +44,8 @@ def main():
         args.extend(["--gpu-memory-utilization", "0.5"])
         args.extend(["--gpu-memory-utilization", "0.5"])
     if not model_path:
     if not model_path:
         model_path = auto_download_and_get_model_root_path("/", "vlm")
         model_path = auto_download_and_get_model_root_path("/", "vlm")
+    if not has_logits_processors_arg and version.parse(vllm_version) >= version.parse("0.10.1"):
+        args.extend(["--logits-processors", "mineru_vl_utils:MinerULogitsProcessor"])
 
 
     # 重构参数,将模型路径作为位置参数
     # 重构参数,将模型路径作为位置参数
     sys.argv = [sys.argv[0]] + ["serve", model_path] + args
     sys.argv = [sys.argv[0]] + ["serve", model_path] + args

+ 1 - 1
pyproject.toml

@@ -39,7 +39,7 @@ dependencies = [
     "openai>=1.70.0,<2",
     "openai>=1.70.0,<2",
     "beautifulsoup4>=4.13.5,<5",
     "beautifulsoup4>=4.13.5,<5",
     "magika>=0.6.2,<0.7.0",
     "magika>=0.6.2,<0.7.0",
-    "mineru-vl-utils>=0.1.6,<1",
+    "mineru-vl-utils>=0.1.7,<1",
 ]
 ]
 
 
 [project.optional-dependencies]
 [project.optional-dependencies]