Browse Source

Merge pull request #3891 from myhloli/dev

Dev
Xiaomeng Zhao 2 weeks ago
parent
commit
d06b105102

+ 19 - 0
docs/en/usage/quick_usage.md

@@ -86,6 +86,25 @@ Here are some available configuration options:
     * Compatible with all LLM models supporting `openai protocol`, defaults to using Alibaba Cloud Bailian's `qwen3-next-80b-a3b-instruct` model. 
     * You need to configure your own API key and set `enable` to `true` to enable this feature.
     * If your API provider does not support the `enable_thinking` parameter, please manually remove it.
+        * For example, in your configuration file, the `llm-aided-config` section may look like:
+          ```json
+          "llm-aided-config": {
+             "api_key": "your_api_key",
+             "base_url": "https://dashscope.aliyuncs.com/compatible-mode/v1",
+             "model": "qwen3-next-80b-a3b-instruct",
+             "enable_thinking": false,
+             "enable": false
+          }
+          ```
+        * To remove the `enable_thinking` parameter, simply delete the line containing `"enable_thinking": false`, resulting in:
+          ```json
+          "llm-aided-config": {
+             "api_key": "your_api_key",
+             "base_url": "https://dashscope.aliyuncs.com/compatible-mode/v1",
+             "model": "qwen3-next-80b-a3b-instruct",
+             "enable": false
+          }
+          ```
   
 - `models-dir`: 
     * Used to specify local model storage directory

+ 19 - 0
docs/zh/usage/quick_usage.md

@@ -85,6 +85,25 @@ MinerU 现已实现开箱即用,但也支持通过配置文件扩展功能。
     * 默认使用`阿里云百炼`的`qwen3-next-80b-a3b-instruct`模型
     * 您需要自行配置 API 密钥并将`enable`设置为`true`来启用此功能
     * 如果您的api供应商不支持`enable_thinking`参数,请手动将该参数删除
+        * 例如,在您的配置文件中,`llm-aided-config` 部分可能如下所示:
+          ```json
+          "llm-aided-config": {
+             "api_key": "your_api_key",
+             "base_url": "https://dashscope.aliyuncs.com/compatible-mode/v1",
+             "model": "qwen3-next-80b-a3b-instruct",
+             "enable_thinking": false,
+             "enable": false
+          }
+          ```
+        * 要移除`enable_thinking`参数,只需删除包含`"enable_thinking": false`的那一行,结果如下:
+          ```json
+          "llm-aided-config": {
+             "api_key": "your_api_key",
+             "base_url": "https://dashscope.aliyuncs.com/compatible-mode/v1",
+             "model": "qwen3-next-80b-a3b-instruct",
+             "enable": false
+          }
+          ```
   
 - `models-dir`:
     * 用于指定本地模型存储目录,请为`pipeline`和`vlm`后端分别指定模型目录,

+ 3 - 2
mineru/backend/pipeline/pipeline_middle_json_mkcontent.py

@@ -286,9 +286,10 @@ def union_make(pdf_info_dict: list,
             page_markdown = make_blocks_to_markdown(paras_of_layout, make_mode, img_buket_path)
             output_content.extend(page_markdown)
         elif make_mode == MakeMode.CONTENT_LIST:
-            if not paras_of_layout + paras_of_discarded:
+            para_blocks = (paras_of_layout or []) + (paras_of_discarded or [])
+            if not para_blocks:
                 continue
-            for para_block in paras_of_layout + paras_of_discarded:
+            for para_block in para_blocks:
                 para_content = make_blocks_to_content_list(para_block, img_buket_path, page_idx, page_size)
                 if para_content:
                     output_content.append(para_content)

+ 3 - 2
mineru/backend/vlm/vlm_middle_json_mkcontent.py

@@ -254,9 +254,10 @@ def union_make(pdf_info_dict: list,
             page_markdown = mk_blocks_to_markdown(paras_of_layout, make_mode, formula_enable, table_enable, img_buket_path)
             output_content.extend(page_markdown)
         elif make_mode == MakeMode.CONTENT_LIST:
-            if not paras_of_layout + paras_of_discarded:
+            para_blocks = (paras_of_layout or []) + (paras_of_discarded or [])
+            if not para_blocks:
                 continue
-            for para_block in paras_of_layout + paras_of_discarded:
+            for para_block in para_blocks:
                 para_content = make_blocks_to_content_list(para_block, img_buket_path, page_idx, page_size)
                 output_content.append(para_content)
 

+ 2 - 0
mineru/utils/check_mac_env.py

@@ -19,6 +19,8 @@ def is_mac_os_version_supported(min_version: str = "13.5") -> bool:
     if not is_mac_environment() or not is_apple_silicon_cpu():
         return False
     mac_version = platform.mac_ver()[0]
+    if not mac_version:
+        return False
     # print("Mac OS Version:", mac_version)
     return version.parse(mac_version) >= version.parse(min_version)