فهرست منبع

Merge pull request #2751 from myhloli/dev

Dev
Xiaomeng Zhao 5 ماه پیش
والد
کامیت
3c8385c2c6

+ 2 - 5
.github/ISSUE_TEMPLATE/bug_report.yml

@@ -109,14 +109,11 @@ body:
   - type: dropdown
     id: software_version
     attributes:
-      label: Software version | 软件版本 (magic-pdf --version)
+      label: Software version | 软件版本 (mineru --version)
       #multiple: false
       options:
         -
-        - "1.0.x"
-        - "1.1.x"
-        - "1.2.x"
-        - "1.3.x"
+        - "2.0.x"
     validations:
       required: true
 

+ 5 - 1
README.md

@@ -502,7 +502,11 @@ cd MinerU
 uv pip install -e .[core]
 ```
 
-#### 1.3 Install the Full Version (Supports sglang Acceleration)
+> [!TIP]  
+> Linux and macOS systems automatically support CUDA/MPS acceleration after installation. For Windows users who want to use CUDA acceleration, 
+> please visit the [PyTorch official website](https://pytorch.org/get-started/locally/) to install PyTorch with the appropriate CUDA version.
+
+#### 1.3 Install Full Version (supports sglang acceleration) (requires device with Ampere or newer architecture and at least 24GB GPU memory)
 
 If you need to use **sglang to accelerate VLM model inference**, you can choose any of the following methods to install the full version:
 

+ 5 - 1
README_zh-CN.md

@@ -492,7 +492,11 @@ cd MinerU
 uv pip install -e .[core] -i https://mirrors.aliyun.com/pypi/simple
 ```
 
-#### 1.3 安装完整版(支持 sglang 加速)
+> [!TIP]
+> Linux和macOS系统安装后自动支持cuda/mps加速,Windows用户如需使用cuda加速,请前往 [Pytorch官网](https://pytorch.org/get-started/locally/)
+> 选择合适的cuda版本安装pytorch。
+
+#### 1.3 安装完整版(支持 sglang 加速)(需确保设备有Ampere及以后架构,24G显存及以上显卡)
 
 如需使用 **sglang 加速 VLM 模型推理**,请选择合适的方式安装完整版本:
 

+ 26 - 19
mineru/backend/vlm/vlm_magic_model.py

@@ -1,6 +1,8 @@
 import re
 from typing import Literal
 
+from loguru import logger
+
 from mineru.utils.boxbase import bbox_distance, is_in
 from mineru.utils.enum_class import ContentType, BlockType, SplitFlag
 from mineru.backend.vlm.vlm_middle_json_mkcontent import merge_para_with_text
@@ -22,25 +24,30 @@ class MagicModel:
         # 解析每个块
         for index, block_info in enumerate(block_infos):
             block_bbox = block_info[0].strip()
-            x1, y1, x2, y2 = map(int, block_bbox.split())
-            x_1, y_1, x_2, y_2 = (
-                int(x1 * width / 1000),
-                int(y1 * height / 1000),
-                int(x2 * width / 1000),
-                int(y2 * height / 1000),
-            )
-            if x_2 < x_1:
-                x_1, x_2 = x_2, x_1
-            if y_2 < y_1:
-                y_1, y_2 = y_2, y_1
-            block_bbox = (x_1, y_1, x_2, y_2)
-            block_type = block_info[1].strip()
-            block_content = block_info[2].strip()
-
-            # print(f"坐标: {block_bbox}")
-            # print(f"类型: {block_type}")
-            # print(f"内容: {block_content}")
-            # print("-" * 50)
+            try:
+                x1, y1, x2, y2 = map(int, block_bbox.split())
+                x_1, y_1, x_2, y_2 = (
+                    int(x1 * width / 1000),
+                    int(y1 * height / 1000),
+                    int(x2 * width / 1000),
+                    int(y2 * height / 1000),
+                )
+                if x_2 < x_1:
+                    x_1, x_2 = x_2, x_1
+                if y_2 < y_1:
+                    y_1, y_2 = y_2, y_1
+                block_bbox = (x_1, y_1, x_2, y_2)
+                block_type = block_info[1].strip()
+                block_content = block_info[2].strip()
+
+                # print(f"坐标: {block_bbox}")
+                # print(f"类型: {block_type}")
+                # print(f"内容: {block_content}")
+                # print("-" * 50)
+            except Exception as e:
+                # 如果解析失败,可能是因为格式不正确,跳过这个块
+                logger.warning(f"Invalid block format: {block_info}, error: {e}")
+                continue
 
             span_type = "unknown"
             if block_type in [

+ 1 - 1
mineru/model/ocr/paddleocr2pytorch/pytorch_paddle.py

@@ -58,7 +58,7 @@ class PytorchPaddleOCR(TextSystem):
 
         device = get_device()
         if device == 'cpu' and self.lang in ['ch', 'ch_server', 'japan', 'chinese_cht']:
-            logger.warning("The current device in use is CPU. To ensure the speed of parsing, the language is automatically switched to ch_lite.")
+            # logger.warning("The current device in use is CPU. To ensure the speed of parsing, the language is automatically switched to ch_lite.")
             self.lang = 'ch_lite'
 
         if self.lang in latin_lang: