Explorar o código

Merge pull request #2686 from opendatalab/release-2.0.4

Release 2.0.4
Xiaomeng Zhao hai 5 meses
pai
achega
e9175b1937

+ 11 - 0
README.md

@@ -51,6 +51,9 @@ Easier to use: Just grab MinerU Desktop. No coding, no login, just a simple inte
 </div>
 
 # Changelog
+- 2025/06/17 2.0.4 Released 
+  - Fixed the issue where models were still required to be downloaded in the `sglang-client` mode  
+  - Fixed the issue where only the first instance would take effect when attempting to launch multiple `sglang-client` instances via multiple URLs within the same process
 - 2025/06/15 2.0.3 released
   - Fixed a configuration file key-value update error that occurred when downloading model type was set to `all`
   - Fixed the issue where the formula and table feature toggle switches were not working in `command line mode`, causing the features to remain enabled.
@@ -533,6 +536,14 @@ If you need to use **sglang to accelerate VLM model inference**, you can choose
 > [!TIP]
 > The Dockerfile uses `lmsysorg/sglang:v0.4.7-cu124` as the default base image. If necessary, you can modify it to another platform version.
 
+
+#### 1.4 Install client  (for connecting to sglang-server on edge devices that require only CPU and network connectivity)
+
+```bash
+uv pip install -U mineru
+mineru -p <input_path> -o <output_path> -b vlm-sglang-client -u http://<host_ip>:<port>
+```
+
 ---
 
 ### 2. Using MinerU

+ 10 - 0
README_zh-CN.md

@@ -50,6 +50,9 @@
 </div>
 
 # 更新记录
+- 2025/06/17 2.0.4发布
+  - 修复了`sglang-client`模式下依然需要下载模型的问题
+  - 修复了同一进程内尝试通过多个url启动多个`sglang-client`实例时,只有第一个生效的问题
 - 2025/06/15 2.0.3发布
   - 修复了当下载模型类型设置为`all`时,配置文件出现键值更新错误的问题
   - 修复了命令行模式下公式和表格功能开关不生效导致功能无法关闭的问题
@@ -522,6 +525,13 @@ uv pip install -e .[core] -i https://mirrors.aliyun.com/pypi/simple
   
 > [!TIP]
 > Dockerfile默认使用`lmsysorg/sglang:v0.4.7-cu124`作为基础镜像,如有需要,您可以自行修改为其他平台版本。
+  
+#### 1.4 安装client(用于在仅需 CPU 和网络连接的边缘设备上连接 sglang-server)
+
+```bash
+uv pip install -U mineru -i https://mirrors.aliyun.com/pypi/simple
+mineru -p <input_path> -o <output_path> -b vlm-sglang-client -u http://<host_ip>:<port>
+```
 
 ---
 

+ 6 - 2
mineru/backend/pipeline/pipeline_analyze.py

@@ -76,7 +76,11 @@ def doc_analyze(
         formula_enable=True,
         table_enable=True,
 ):
-    MIN_BATCH_INFERENCE_SIZE = int(os.environ.get('MINERU_MIN_BATCH_INFERENCE_SIZE', 100))
+    """
+    适当调大MIN_BATCH_INFERENCE_SIZE可以提高性能,可能会增加显存使用量,
+    可通过环境变量MINERU_MIN_BATCH_INFERENCE_SIZE设置,默认值为100。
+    """
+    min_batch_inference_size = int(os.environ.get('MINERU_MIN_BATCH_INFERENCE_SIZE', 100))
 
     # 收集所有页面信息
     all_pages_info = []  # 存储(dataset_index, page_index, img, ocr, lang, width, height)
@@ -109,7 +113,7 @@ def doc_analyze(
 
     # 准备批处理
     images_with_extra_info = [(info[2], info[3], info[4]) for info in all_pages_info]
-    batch_size = MIN_BATCH_INFERENCE_SIZE
+    batch_size = min_batch_inference_size
     batch_images = [
         images_with_extra_info[i:i + batch_size]
         for i in range(0, len(images_with_extra_info), batch_size)

+ 2 - 2
mineru/backend/vlm/vlm_analyze.py

@@ -27,9 +27,9 @@ class ModelSingleton:
         model_path: str | None,
         server_url: str | None,
     ) -> BasePredictor:
-        key = (backend,)
+        key = (backend, model_path, server_url)
         if key not in self._models:
-            if not model_path:
+            if backend in ['transformers', 'sglang-engine'] and not model_path:
                 model_path = auto_download_and_get_model_root_path("/","vlm")
             self._models[key] = get_predictor(
                 backend=backend,

+ 1 - 1
mineru/model/mfr/unimernet/Unimernet.py

@@ -21,7 +21,7 @@ class MathDataset(Dataset):
 class UnimernetModel(object):
     def __init__(self, weight_dir, _device_="cpu"):
         from .unimernet_hf import UnimernetModel
-        if _device_.startswith("mps"):
+        if _device_.startswith("mps") or _device_.startswith("npu"):
             self.model = UnimernetModel.from_pretrained(weight_dir, attn_implementation="eager")
         else:
             self.model = UnimernetModel.from_pretrained(weight_dir)

+ 17 - 0
mineru/model/vlm_sglang_model/server.py

@@ -6,10 +6,26 @@ from sglang.srt.entrypoints.http_server import app, generate_request, launch_ser
 from sglang.srt.managers.io_struct import GenerateReqInput
 from sglang.srt.server_args import prepare_server_args
 from sglang.srt.utils import kill_process_tree
+from sglang.srt.conversation import Conversation
 
 from mineru.utils.models_download_utils import auto_download_and_get_model_root_path
 from .logit_processor import Mineru2LogitProcessor
 
+# mineru2.0的chat_template与chatml在换行上有微小区别
+def custom_get_prompt(self) -> str:
+    system_prompt = self.system_template.format(system_message=self.system_message)
+    if self.system_message == "":
+        ret = ""
+    else:
+        ret = system_prompt + self.sep
+
+    for role, message in self.messages:
+        if message:
+            ret += role + "\n" + message + self.sep
+        else:
+            ret += role + "\n"
+    return ret
+
 _custom_logit_processor_str = Mineru2LogitProcessor().to_str()
 
 # remote the existing /generate route
@@ -45,6 +61,7 @@ def main():
 
     if server_args.chat_template is None:
         server_args.chat_template = "chatml"
+        Conversation.get_prompt = custom_get_prompt
 
     server_args.enable_custom_logit_processor = True
 

+ 2 - 0
mineru/utils/enum_class.py

@@ -33,9 +33,11 @@ class CategoryId:
     TableCaption = 6
     TableFootnote = 7
     InterlineEquation_Layout = 8
+    InterlineEquationNumber_Layout = 9
     InlineEquation = 13
     InterlineEquation_YOLO = 14
     OcrText = 15
+    LowScoreText = 16
     ImageFootnote = 101