فهرست منبع

feat: add Mac environment checks and support for Apple Silicon in backend selection

myhloli 2 هفته پیش
والد
کامیت
2f1369a877
6فایلهای تغییر یافته به همراه63 افزوده شده و 14 حذف شده
  1. 1 0
      demo/demo.py
  2. 7 1
      mineru/backend/vlm/vlm_analyze.py
  3. 19 12
      mineru/cli/client.py
  4. 4 1
      mineru/cli/gradio_app.py
  5. 28 0
      mineru/utils/check_mac_env.py
  6. 4 0
      pyproject.toml

+ 1 - 0
demo/demo.py

@@ -235,5 +235,6 @@ if __name__ == '__main__':
 
     """To enable VLM mode, change the backend to 'vlm-xxx'"""
     # parse_doc(doc_path_list, output_dir, backend="vlm-transformers")  # more general.
+    # parse_doc(doc_path_list, output_dir, backend="vlm-mlx-engine")  # faster than transformers in macOS 13.5+.
     # parse_doc(doc_path_list, output_dir, backend="vlm-vllm-engine")  # faster(engine).
     # parse_doc(doc_path_list, output_dir, backend="vlm-http-client", server_url="http://127.0.0.1:30000")  # faster(client).

+ 7 - 1
mineru/backend/vlm/vlm_analyze.py

@@ -47,7 +47,7 @@ class ModelSingleton:
             for param in ["batch_size", "max_concurrency", "http_timeout"]:
                 if param in kwargs:
                     del kwargs[param]
-            if backend in ['transformers', 'vllm-engine', "vllm-async-engine"] and not model_path:
+            if backend in ['transformers', 'vllm-engine', "vllm-async-engine", "mlx-engine"] and not model_path:
                 model_path = auto_download_and_get_model_root_path("/","vlm")
                 if backend == "transformers":
                     try:
@@ -75,6 +75,12 @@ class ModelSingleton:
                     )
                     if batch_size == 0:
                         batch_size = set_default_batch_size()
+                elif backend == "mlx-engine":
+                    try:
+                        from mlx_vlm import load as mlx_load
+                    except ImportError:
+                        raise ImportError("Please install mlx-vlm to use the mlx-engine backend.")
+                    model, processor = mlx_load(model_path)
                 else:
                     if os.getenv('OMP_NUM_THREADS') is None:
                         os.environ["OMP_NUM_THREADS"] = "1"

+ 19 - 12
mineru/cli/client.py

@@ -4,6 +4,7 @@ import click
 from pathlib import Path
 from loguru import logger
 
+from mineru.utils.check_mac_env import is_mac_os_version_supported
 from mineru.utils.cli_parser import arg_parse
 from mineru.utils.config_reader import get_device
 from mineru.utils.guess_suffix_or_lang import guess_suffix_by_path
@@ -11,6 +12,11 @@ from mineru.utils.model_utils import get_vram
 from ..version import __version__
 from .common import do_parse, read_fn, pdf_suffixes, image_suffixes
 
+
+backends = ['pipeline', 'vlm-transformers', 'vlm-vllm-engine', 'vlm-http-client']
+if is_mac_os_version_supported():
+    backends.append("vlm-mlx-engine")
+
 @click.command(context_settings=dict(ignore_unknown_options=True, allow_extra_args=True))
 @click.pass_context
 @click.version_option(__version__,
@@ -38,11 +44,11 @@ from .common import do_parse, read_fn, pdf_suffixes, image_suffixes
     '--method',
     'method',
     type=click.Choice(['auto', 'txt', 'ocr']),
-    help="""the method for parsing pdf:
-    auto: Automatically determine the method based on the file type.
-    txt: Use text extraction method.
-    ocr: Use OCR method for image-based PDFs.
-    Without method specified, 'auto' will be used by default.
+    help="""the method for parsing pdf:\n
+    auto: Automatically determine the method based on the file type.\n
+    txt: Use text extraction method.\n
+    ocr: Use OCR method for image-based PDFs.\n
+    Without method specified, 'auto' will be used by default.\n
     Adapted only for the case where the backend is set to "pipeline".""",
     default='auto',
 )
@@ -50,12 +56,13 @@ from .common import do_parse, read_fn, pdf_suffixes, image_suffixes
     '-b',
     '--backend',
     'backend',
-    type=click.Choice(['pipeline', 'vlm-transformers', 'vlm-vllm-engine', 'vlm-http-client']),
-    help="""the backend for parsing pdf:
-    pipeline: More general.
-    vlm-transformers: More general.
-    vlm-vllm-engine: Faster(engine).
-    vlm-http-client: Faster(client).
+    type=click.Choice(backends),
+    help="""the backend for parsing pdf:\n
+    pipeline: More general.\n
+    vlm-transformers: More general.\n
+    vlm-mlx-engine: Faster than transformers (macOS 13.5+).\n
+    vlm-vllm-engine: Faster(engine).\n
+    vlm-http-client: Faster(client).\n
     without method specified, pipeline will be used by default.""",
     default='pipeline',
 )
@@ -66,7 +73,7 @@ from .common import do_parse, read_fn, pdf_suffixes, image_suffixes
     type=click.Choice(['ch', 'ch_server', 'ch_lite', 'en', 'korean', 'japan', 'chinese_cht', 'ta', 'te', 'ka', 'th', 'el',
                        'latin', 'arabic', 'east_slavic', 'cyrillic', 'devanagari']),
     help="""
-    Input the languages in the pdf (if known) to improve OCR accuracy.  Optional.
+    Input the languages in the pdf (if known) to improve OCR accuracy.
     Without languages specified, 'ch' will be used by default.
     Adapted only for the case where the backend is set to "pipeline".
     """,

+ 4 - 1
mineru/cli/gradio_app.py

@@ -13,6 +13,7 @@ from gradio_pdf import PDF
 from loguru import logger
 
 from mineru.cli.common import prepare_env, read_fn, aio_do_parse, pdf_suffixes, image_suffixes
+from mineru.utils.check_mac_env import is_mac_os_version_supported
 from mineru.utils.cli_parser import arg_parse
 from mineru.utils.hash_utils import str_sha256
 
@@ -273,7 +274,7 @@ def to_pdf(file_path):
 
 # 更新界面函数
 def update_interface(backend_choice):
-    if backend_choice in ["vlm-transformers", "vlm-vllm-async-engine"]:
+    if backend_choice in ["vlm-transformers", "vlm-vllm-async-engine", "vlm-mlx-engine"]:
         return gr.update(visible=False), gr.update(visible=False)
     elif backend_choice in ["vlm-http-client"]:
         return gr.update(visible=True), gr.update(visible=False)
@@ -381,6 +382,8 @@ def main(ctx,
                         preferred_option = "vlm-vllm-async-engine"
                     else:
                         drop_list = ["pipeline", "vlm-transformers", "vlm-http-client"]
+                        if is_mac_os_version_supported():
+                            drop_list.append("vlm-mlx-engine")
                         preferred_option = "pipeline"
                     backend = gr.Dropdown(drop_list, label="Backend", value=preferred_option)
                 with gr.Row(visible=False) as client_options:

+ 28 - 0
mineru/utils/check_mac_env.py

@@ -0,0 +1,28 @@
+# Copyright (c) Opendatalab. All rights reserved.
+import platform
+
+from packaging import version
+
+
+# 检测当前环境是否为Mac电脑
+def is_mac_environment() -> bool:
+    return platform.system() == "Darwin"
+
+
+# 检测cpu是否为Apple Silicon架构
+def is_apple_silicon_cpu() -> bool:
+    return platform.machine() in ["arm64", "aarch64"]
+
+
+#如果是Mac电脑且为Apple Silicon架构,检测macos版本是否在13.5以上
+def is_mac_os_version_supported(min_version: str = "13.5") -> bool:
+    if not is_mac_environment() or not is_apple_silicon_cpu():
+        return False
+    mac_version = platform.mac_ver()[0]
+    # print("Mac OS Version:", mac_version)
+    return version.parse(mac_version) >= version.parse(min_version)
+
+if __name__ == '__main__':
+    print("Is Mac Environment:", is_mac_environment())
+    print("Is Apple Silicon CPU:", is_apple_silicon_cpu())
+    print("Is Mac OS Version Supported (>=13.5):", is_mac_os_version_supported())

+ 4 - 0
pyproject.toml

@@ -58,6 +58,9 @@ vlm = [
 vllm = [
     "vllm>=0.10.1.1,<0.12",
 ]
+mlx = [
+    "mlx-vlm>=0.3.3,<0.4",
+]
 pipeline = [
     "matplotlib>=3.10,<4",
     "ultralytics>=8.3.48,<9",
@@ -87,6 +90,7 @@ core = [
     "mineru[pipeline]",
     "mineru[api]",
     "mineru[gradio]",
+    "mineru[mlx] ; sys_platform == 'darwin'",
 ]
 all = [
     "mineru[core]",