Quellcode durchsuchen

Merge pull request #3855 from myhloli/dev

feat: add Mac environment checks and support for Apple Silicon in backend selection
Xiaomeng Zhao vor 2 Wochen
Ursprung
Commit
1f5db12adb
6 geänderte Dateien mit 63 neuen und 14 gelöschten Zeilen
  1. 1 0
      demo/demo.py
  2. 7 1
      mineru/backend/vlm/vlm_analyze.py
  3. 19 12
      mineru/cli/client.py
  4. 4 1
      mineru/cli/gradio_app.py
  5. 28 0
      mineru/utils/check_mac_env.py
  6. 4 0
      pyproject.toml

+ 1 - 0
demo/demo.py

@@ -235,5 +235,6 @@ if __name__ == '__main__':
 
     """To enable VLM mode, change the backend to 'vlm-xxx'"""
     # parse_doc(doc_path_list, output_dir, backend="vlm-transformers")  # more general.
+    # parse_doc(doc_path_list, output_dir, backend="vlm-mlx-engine")  # faster than transformers in macOS 13.5+.
     # parse_doc(doc_path_list, output_dir, backend="vlm-vllm-engine")  # faster(engine).
     # parse_doc(doc_path_list, output_dir, backend="vlm-http-client", server_url="http://127.0.0.1:30000")  # faster(client).

+ 7 - 1
mineru/backend/vlm/vlm_analyze.py

@@ -47,7 +47,7 @@ class ModelSingleton:
             for param in ["batch_size", "max_concurrency", "http_timeout"]:
                 if param in kwargs:
                     del kwargs[param]
-            if backend in ['transformers', 'vllm-engine', "vllm-async-engine"] and not model_path:
+            if backend in ['transformers', 'vllm-engine', "vllm-async-engine", "mlx-engine"] and not model_path:
                 model_path = auto_download_and_get_model_root_path("/","vlm")
                 if backend == "transformers":
                     try:
@@ -75,6 +75,12 @@ class ModelSingleton:
                     )
                     if batch_size == 0:
                         batch_size = set_default_batch_size()
+                elif backend == "mlx-engine":
+                    try:
+                        from mlx_vlm import load as mlx_load
+                    except ImportError:
+                        raise ImportError("Please install mlx-vlm to use the mlx-engine backend.")
+                    model, processor = mlx_load(model_path)
                 else:
                     if os.getenv('OMP_NUM_THREADS') is None:
                         os.environ["OMP_NUM_THREADS"] = "1"

+ 19 - 12
mineru/cli/client.py

@@ -4,6 +4,7 @@ import click
 from pathlib import Path
 from loguru import logger
 
+from mineru.utils.check_mac_env import is_mac_os_version_supported
 from mineru.utils.cli_parser import arg_parse
 from mineru.utils.config_reader import get_device
 from mineru.utils.guess_suffix_or_lang import guess_suffix_by_path
@@ -11,6 +12,11 @@ from mineru.utils.model_utils import get_vram
 from ..version import __version__
 from .common import do_parse, read_fn, pdf_suffixes, image_suffixes
 
+
+backends = ['pipeline', 'vlm-transformers', 'vlm-vllm-engine', 'vlm-http-client']
+if is_mac_os_version_supported():
+    backends.append("vlm-mlx-engine")
+
 @click.command(context_settings=dict(ignore_unknown_options=True, allow_extra_args=True))
 @click.pass_context
 @click.version_option(__version__,
@@ -38,11 +44,11 @@ from .common import do_parse, read_fn, pdf_suffixes, image_suffixes
     '--method',
     'method',
     type=click.Choice(['auto', 'txt', 'ocr']),
-    help="""the method for parsing pdf:
-    auto: Automatically determine the method based on the file type.
-    txt: Use text extraction method.
-    ocr: Use OCR method for image-based PDFs.
-    Without method specified, 'auto' will be used by default.
+    help="""the method for parsing pdf:\n
+    auto: Automatically determine the method based on the file type.\n
+    txt: Use text extraction method.\n
+    ocr: Use OCR method for image-based PDFs.\n
+    Without method specified, 'auto' will be used by default.\n
     Adapted only for the case where the backend is set to "pipeline".""",
     default='auto',
 )
@@ -50,12 +56,13 @@ from .common import do_parse, read_fn, pdf_suffixes, image_suffixes
     '-b',
     '--backend',
     'backend',
-    type=click.Choice(['pipeline', 'vlm-transformers', 'vlm-vllm-engine', 'vlm-http-client']),
-    help="""the backend for parsing pdf:
-    pipeline: More general.
-    vlm-transformers: More general.
-    vlm-vllm-engine: Faster(engine).
-    vlm-http-client: Faster(client).
+    type=click.Choice(backends),
+    help="""the backend for parsing pdf:\n
+    pipeline: More general.\n
+    vlm-transformers: More general.\n
+    vlm-mlx-engine: Faster than transformers (macOS 13.5+).\n
+    vlm-vllm-engine: Faster(engine).\n
+    vlm-http-client: Faster(client).\n
     without method specified, pipeline will be used by default.""",
     default='pipeline',
 )
@@ -66,7 +73,7 @@ from .common import do_parse, read_fn, pdf_suffixes, image_suffixes
     type=click.Choice(['ch', 'ch_server', 'ch_lite', 'en', 'korean', 'japan', 'chinese_cht', 'ta', 'te', 'ka', 'th', 'el',
                        'latin', 'arabic', 'east_slavic', 'cyrillic', 'devanagari']),
     help="""
-    Input the languages in the pdf (if known) to improve OCR accuracy.  Optional.
+    Input the languages in the pdf (if known) to improve OCR accuracy.
     Without languages specified, 'ch' will be used by default.
     Adapted only for the case where the backend is set to "pipeline".
     """,

+ 4 - 1
mineru/cli/gradio_app.py

@@ -13,6 +13,7 @@ from gradio_pdf import PDF
 from loguru import logger
 
 from mineru.cli.common import prepare_env, read_fn, aio_do_parse, pdf_suffixes, image_suffixes
+from mineru.utils.check_mac_env import is_mac_os_version_supported
 from mineru.utils.cli_parser import arg_parse
 from mineru.utils.hash_utils import str_sha256
 
@@ -273,7 +274,7 @@ def to_pdf(file_path):
 
 # 更新界面函数
 def update_interface(backend_choice):
-    if backend_choice in ["vlm-transformers", "vlm-vllm-async-engine"]:
+    if backend_choice in ["vlm-transformers", "vlm-vllm-async-engine", "vlm-mlx-engine"]:
         return gr.update(visible=False), gr.update(visible=False)
     elif backend_choice in ["vlm-http-client"]:
         return gr.update(visible=True), gr.update(visible=False)
@@ -381,6 +382,8 @@ def main(ctx,
                         preferred_option = "vlm-vllm-async-engine"
                     else:
                         drop_list = ["pipeline", "vlm-transformers", "vlm-http-client"]
+                        if is_mac_os_version_supported():
+                            drop_list.append("vlm-mlx-engine")
                         preferred_option = "pipeline"
                     backend = gr.Dropdown(drop_list, label="Backend", value=preferred_option)
                 with gr.Row(visible=False) as client_options:

+ 28 - 0
mineru/utils/check_mac_env.py

@@ -0,0 +1,28 @@
+# Copyright (c) Opendatalab. All rights reserved.
+import platform
+
+from packaging import version
+
+
+# Detect if the current environment is a Mac computer
+def is_mac_environment() -> bool:
+    return platform.system() == "Darwin"
+
+
+# Detect if CPU is Apple Silicon architecture
+def is_apple_silicon_cpu() -> bool:
+    return platform.machine() in ["arm64", "aarch64"]
+
+
+# If Mac computer with Apple Silicon architecture, check if macOS version is 13.5 or above
+def is_mac_os_version_supported(min_version: str = "13.5") -> bool:
+    if not is_mac_environment() or not is_apple_silicon_cpu():
+        return False
+    mac_version = platform.mac_ver()[0]
+    # print("Mac OS Version:", mac_version)
+    return version.parse(mac_version) >= version.parse(min_version)
+
+if __name__ == "__main__":
+    print("Is Mac Environment:", is_mac_environment())
+    print("Is Apple Silicon CPU:", is_apple_silicon_cpu())
+    print("Is Mac OS Version Supported (>=13.5):", is_mac_os_version_supported())

+ 4 - 0
pyproject.toml

@@ -58,6 +58,9 @@ vlm = [
 vllm = [
     "vllm>=0.10.1.1,<0.12",
 ]
+mlx = [
+    "mlx-vlm>=0.3.3,<0.4",
+]
 pipeline = [
     "matplotlib>=3.10,<4",
     "ultralytics>=8.3.48,<9",
@@ -87,6 +90,7 @@ core = [
     "mineru[pipeline]",
     "mineru[api]",
     "mineru[gradio]",
+    "mineru[mlx] ; sys_platform == 'darwin'",
 ]
 all = [
     "mineru[core]",