Bladeren bron

Merge pull request #3529 from opendatalab/release-2.5.1

Release 2.5.1
Xiaomeng Zhao 1 maand geleden
bovenliggende
commit
6586c7c01e
5 gewijzigde bestanden met toevoegingen van 20 en 4 verwijderingen
  1. 1 1
      README.md
  2. 1 1
      README_zh-CN.md
  3. 9 1
      mineru/backend/vlm/vlm_analyze.py
  4. 8 0
      mineru/model/vlm_vllm_model/server.py
  5. 1 1
      pyproject.toml

+ 1 - 1
README.md

@@ -44,7 +44,7 @@
 
 # Changelog
 
-- 2025/09/19 2.5.0 Released
+- 2025/09/19 2.5.1 Released
 
   We are officially releasing MinerU2.5, currently the most powerful multimodal large model for document parsing.
   With only 1.2B parameters, MinerU2.5's accuracy on the OmniDocBench benchmark comprehensively surpasses top-tier multimodal models like Gemini 2.5 Pro, GPT-4o, and Qwen2.5-VL-72B. It also significantly outperforms leading specialized models such as dots.ocr, MonkeyOCR, and PP-StructureV3.

+ 1 - 1
README_zh-CN.md

@@ -44,7 +44,7 @@
 
 # 更新记录
 
-- 2025/09/19 2.5.0 发布
+- 2025/09/19 2.5.1 发布
   我们正式发布 MinerU2.5,当前最强文档解析多模态大模型。仅凭 1.2B 参数,MinerU2.5 在 OmniDocBench 文档解析评测中,精度已全面超越 Gemini2.5-Pro、GPT-4o、Qwen2.5-VL-72B等顶级多模态大模型,并显著领先于主流文档解析专用模型(如 dots.ocr, MonkeyOCR, PP-StructureV3 等)。
   模型已发布至[HuggingFace](https://huggingface.co/opendatalab/MinerU2.5-2509-1.2B)和[ModelScope](https://huggingface.co/opendatalab/MinerU2.5-2509-1.2B)平台,欢迎大家下载使用!
   - 核心亮点

+ 9 - 1
mineru/backend/vlm/vlm_analyze.py

@@ -14,6 +14,7 @@ from ...utils.model_utils import get_vram
 from ...utils.models_download_utils import auto_download_and_get_model_root_path
 
 from mineru_vl_utils import MinerUClient
+from packaging import version
 
 
 class ModelSingleton:
@@ -52,7 +53,6 @@ class ModelSingleton:
                     except ImportError:
                         raise ImportError("Please install transformers to use the transformers backend.")
 
-                    from packaging import version
                     if version.parse(transformers_version) >= version.parse("4.56.0"):
                         dtype_key = "dtype"
                     else:
@@ -88,24 +88,32 @@ class ModelSingleton:
                 elif backend == "vllm-engine":
                     try:
                         import vllm
+                        vllm_version = vllm.__version__
+                        from mineru_vl_utils import MinerULogitsProcessor
                     except ImportError:
                         raise ImportError("Please install vllm to use the vllm-engine backend.")
                     if "gpu_memory_utilization" not in kwargs:
                         kwargs["gpu_memory_utilization"] = 0.5
                     if "model" not in kwargs:
                         kwargs["model"] = model_path
+                    if version.parse(vllm_version) >= version.parse("0.10.1") and "logits_processors" not in kwargs:
+                        kwargs["logits_processors"] = [MinerULogitsProcessor]
                     # 使用kwargs为 vllm初始化参数
                     vllm_llm = vllm.LLM(**kwargs)
                 elif backend == "vllm-async-engine":
                     try:
                         from vllm.engine.arg_utils import AsyncEngineArgs
                         from vllm.v1.engine.async_llm import AsyncLLM
+                        from vllm import __version__ as vllm_version
+                        from mineru_vl_utils import MinerULogitsProcessor
                     except ImportError:
                         raise ImportError("Please install vllm to use the vllm-async-engine backend.")
                     if "gpu_memory_utilization" not in kwargs:
                         kwargs["gpu_memory_utilization"] = 0.5
                     if "model" not in kwargs:
                         kwargs["model"] = model_path
+                    if version.parse(vllm_version) >= version.parse("0.10.1") and "logits_processors" not in kwargs:
+                        kwargs["logits_processors"] = [MinerULogitsProcessor]
                     # 使用kwargs为 vllm初始化参数
                     vllm_async_llm = AsyncLLM.from_engine_args(AsyncEngineArgs(**kwargs))
             self._models[key] = MinerUClient(

+ 8 - 0
mineru/model/vlm_vllm_model/server.py

@@ -1,7 +1,10 @@
 import sys
 
 from mineru.utils.models_download_utils import auto_download_and_get_model_root_path
+
 from vllm.entrypoints.cli.main import main as vllm_main
+from vllm import __version__ as vllm_version
+from packaging import version
 
 
 def main():
@@ -9,6 +12,7 @@ def main():
 
     has_port_arg = False
     has_gpu_memory_utilization_arg = False
+    has_logits_processors_arg = False
     model_path = None
     model_arg_indices = []
 
@@ -18,6 +22,8 @@ def main():
             has_port_arg = True
         if arg == "--gpu-memory-utilization" or arg.startswith("--gpu-memory-utilization="):
             has_gpu_memory_utilization_arg = True
+        if arg == "--logits-processors" or arg.startswith("--logits-processors="):
+            has_logits_processors_arg = True
         if arg == "--model":
             if i + 1 < len(args):
                 model_path = args[i + 1]
@@ -38,6 +44,8 @@ def main():
         args.extend(["--gpu-memory-utilization", "0.5"])
     if not model_path:
         model_path = auto_download_and_get_model_root_path("/", "vlm")
+    if not has_logits_processors_arg and version.parse(vllm_version) >= version.parse("0.10.1"):
+        args.extend(["--logits-processors", "mineru_vl_utils:MinerULogitsProcessor"])
 
     # 重构参数,将模型路径作为位置参数
     sys.argv = [sys.argv[0]] + ["serve", model_path] + args

+ 1 - 1
pyproject.toml

@@ -39,7 +39,7 @@ dependencies = [
     "openai>=1.70.0,<2",
     "beautifulsoup4>=4.13.5,<5",
     "magika>=0.6.2,<0.7.0",
-    "mineru-vl-utils>=0.1.6,<1",
+    "mineru-vl-utils>=0.1.7,<1",
 ]
 
 [project.optional-dependencies]