2 هفته پیش · 2f1369a877
--- a/demo/demo.py
+++ b/demo/demo.py
@@ -235,5 +235,6 @@ if __name__ == '__main__':
 
				 
			
 
				     """To enable VLM mode, change the backend to 'vlm-xxx'"""
			
 
				     # parse_doc(doc_path_list, output_dir, backend="vlm-transformers")  # more general.
			
 
				+    # parse_doc(doc_path_list, output_dir, backend="vlm-mlx-engine")  # faster than transformers in macOS 13.5+.
			
 
				     # parse_doc(doc_path_list, output_dir, backend="vlm-vllm-engine")  # faster(engine).
			
 
				     # parse_doc(doc_path_list, output_dir, backend="vlm-http-client", server_url="http://127.0.0.1:30000")  # faster(client).
			
--- a/mineru/backend/vlm/vlm_analyze.py
+++ b/mineru/backend/vlm/vlm_analyze.py
@@ -47,7 +47,7 @@ class ModelSingleton:
 
				             for param in ["batch_size", "max_concurrency", "http_timeout"]:
			
 
				                 if param in kwargs:
			
 
				                     del kwargs[param]
			
 
				-            if backend in ['transformers', 'vllm-engine', "vllm-async-engine"] and not model_path:
			
 
				+            if backend in ['transformers', 'vllm-engine', "vllm-async-engine", "mlx-engine"] and not model_path:
			
 
				                 model_path = auto_download_and_get_model_root_path("/","vlm")
			
 
				                 if backend == "transformers":
			
 
				                     try:
			
@@ -75,6 +75,12 @@ class ModelSingleton:
 
				                     )
			
 
				                     if batch_size == 0:
			
 
				                         batch_size = set_default_batch_size()
			
 
				+                elif backend == "mlx-engine":
			
 
				+                    try:
			
 
				+                        from mlx_vlm import load as mlx_load
			
 
				+                    except ImportError:
			
 
				+                        raise ImportError("Please install mlx-vlm to use the mlx-engine backend.")
			
 
				+                    model, processor = mlx_load(model_path)
			
 
				                 else:
			
 
				                     if os.getenv('OMP_NUM_THREADS') is None:
			
 
				                         os.environ["OMP_NUM_THREADS"] = "1"
			
--- a/mineru/cli/client.py
+++ b/mineru/cli/client.py
@@ -4,6 +4,7 @@ import click
 
				 from pathlib import Path
			
 
				 from loguru import logger
			
 
				 
			
 
				+from mineru.utils.check_mac_env import is_mac_os_version_supported
			
 
				 from mineru.utils.cli_parser import arg_parse
			
 
				 from mineru.utils.config_reader import get_device
			
 
				 from mineru.utils.guess_suffix_or_lang import guess_suffix_by_path
			
@@ -11,6 +12,11 @@ from mineru.utils.model_utils import get_vram
 
				 from ..version import __version__
			
 
				 from .common import do_parse, read_fn, pdf_suffixes, image_suffixes
			
 
				 
			
 
				+
			
 
				+backends = ['pipeline', 'vlm-transformers', 'vlm-vllm-engine', 'vlm-http-client']
			
 
				+if is_mac_os_version_supported():
			
 
				+    backends.append("vlm-mlx-engine")
			
 
				+
			
 
				 @click.command(context_settings=dict(ignore_unknown_options=True, allow_extra_args=True))
			
 
				 @click.pass_context
			
 
				 @click.version_option(__version__,
			
@@ -38,11 +44,11 @@ from .common import do_parse, read_fn, pdf_suffixes, image_suffixes
 
				     '--method',
			
 
				     'method',
			
 
				     type=click.Choice(['auto', 'txt', 'ocr']),
			
 
				-    help="""the method for parsing pdf:
			
 
				-    auto: Automatically determine the method based on the file type.
			
 
				-    txt: Use text extraction method.
			
 
				-    ocr: Use OCR method for image-based PDFs.
			
 
				-    Without method specified, 'auto' will be used by default.
			
 
				+    help="""the method for parsing pdf:\n
			
 
				+    auto: Automatically determine the method based on the file type.\n
			
 
				+    txt: Use text extraction method.\n
			
 
				+    ocr: Use OCR method for image-based PDFs.\n
			
 
				+    Without method specified, 'auto' will be used by default.\n
			
 
				     Adapted only for the case where the backend is set to "pipeline".""",
			
 
				     default='auto',
			
 
				 )
			
@@ -50,12 +56,13 @@ from .common import do_parse, read_fn, pdf_suffixes, image_suffixes
 
				     '-b',
			
 
				     '--backend',
			
 
				     'backend',
			
 
				-    type=click.Choice(['pipeline', 'vlm-transformers', 'vlm-vllm-engine', 'vlm-http-client']),
			
 
				-    help="""the backend for parsing pdf:
			
 
				-    pipeline: More general.
			
 
				-    vlm-transformers: More general.
			
 
				-    vlm-vllm-engine: Faster(engine).
			
 
				-    vlm-http-client: Faster(client).
			
 
				+    type=click.Choice(backends),
			
 
				+    help="""the backend for parsing pdf:\n
			
 
				+    pipeline: More general.\n
			
 
				+    vlm-transformers: More general.\n
			
 
				+    vlm-mlx-engine: Faster than transformers (macOS 13.5+).\n
			
 
				+    vlm-vllm-engine: Faster(engine).\n
			
 
				+    vlm-http-client: Faster(client).\n
			
 
				     without method specified, pipeline will be used by default.""",
			
 
				     default='pipeline',
			
 
				 )
			
@@ -66,7 +73,7 @@ from .common import do_parse, read_fn, pdf_suffixes, image_suffixes
 
				     type=click.Choice(['ch', 'ch_server', 'ch_lite', 'en', 'korean', 'japan', 'chinese_cht', 'ta', 'te', 'ka', 'th', 'el',
			
 
				                        'latin', 'arabic', 'east_slavic', 'cyrillic', 'devanagari']),
			
 
				     help="""
			
 
				-    Input the languages in the pdf (if known) to improve OCR accuracy.  Optional.
			
 
				+    Input the languages in the pdf (if known) to improve OCR accuracy.
			
 
				     Without languages specified, 'ch' will be used by default.
			
 
				     Adapted only for the case where the backend is set to "pipeline".
			
 
				     """,
			
--- a/mineru/cli/gradio_app.py
+++ b/mineru/cli/gradio_app.py
@@ -13,6 +13,7 @@ from gradio_pdf import PDF
 
				 from loguru import logger
			
 
				 
			
 
				 from mineru.cli.common import prepare_env, read_fn, aio_do_parse, pdf_suffixes, image_suffixes
			
 
				+from mineru.utils.check_mac_env import is_mac_os_version_supported
			
 
				 from mineru.utils.cli_parser import arg_parse
			
 
				 from mineru.utils.hash_utils import str_sha256
			
 
				 
			
@@ -273,7 +274,7 @@ def to_pdf(file_path):
 
				 
			
 
				 # 更新界面函数
			
 
				 def update_interface(backend_choice):
			
 
				-    if backend_choice in ["vlm-transformers", "vlm-vllm-async-engine"]:
			
 
				+    if backend_choice in ["vlm-transformers", "vlm-vllm-async-engine", "vlm-mlx-engine"]:
			
 
				         return gr.update(visible=False), gr.update(visible=False)
			
 
				     elif backend_choice in ["vlm-http-client"]:
			
 
				         return gr.update(visible=True), gr.update(visible=False)
			
@@ -381,6 +382,8 @@ def main(ctx,
 
				                         preferred_option = "vlm-vllm-async-engine"
			
 
				                     else:
			
 
				                         drop_list = ["pipeline", "vlm-transformers", "vlm-http-client"]
			
 
				+                        if is_mac_os_version_supported():
			
 
				+                            drop_list.append("vlm-mlx-engine")
			
 
				                         preferred_option = "pipeline"
			
 
				                     backend = gr.Dropdown(drop_list, label="Backend", value=preferred_option)
			
 
				                 with gr.Row(visible=False) as client_options:
			
--- a/mineru/utils/check_mac_env.py
+++ b/mineru/utils/check_mac_env.py
@@ -0,0 +1,28 @@
 
				+# Copyright (c) Opendatalab. All rights reserved.
			
 
				+import platform
			
 
				+
			
 
				+from packaging import version
			
 
				+
			
 
				+
			
 
				+# 检测当前环境是否为Mac电脑
			
 
				+def is_mac_environment() -> bool:
			
 
				+    return platform.system() == "Darwin"
			
 
				+
			
 
				+
			
 
				+# 检测cpu是否为Apple Silicon架构
			
 
				+def is_apple_silicon_cpu() -> bool:
			
 
				+    return platform.machine() in ["arm64", "aarch64"]
			
 
				+
			
 
				+
			
 
				+#如果是Mac电脑且为Apple Silicon架构，检测macos版本是否在13.5以上
			
 
				+def is_mac_os_version_supported(min_version: str = "13.5") -> bool:
			
 
				+    if not is_mac_environment() or not is_apple_silicon_cpu():
			
 
				+        return False
			
 
				+    mac_version = platform.mac_ver()[0]
			
 
				+    # print("Mac OS Version:", mac_version)
			
 
				+    return version.parse(mac_version) >= version.parse(min_version)
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    print("Is Mac Environment:", is_mac_environment())
			
 
				+    print("Is Apple Silicon CPU:", is_apple_silicon_cpu())
			
 
				+    print("Is Mac OS Version Supported (>=13.5):", is_mac_os_version_supported())
			
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -58,6 +58,9 @@ vlm = [
 
				 vllm = [
			
 
				     "vllm>=0.10.1.1,<0.12",
			
 
				 ]
			
 
				+mlx = [
			
 
				+    "mlx-vlm>=0.3.3,<0.4",
			
 
				+]
			
 
				 pipeline = [
			
 
				     "matplotlib>=3.10,<4",
			
 
				     "ultralytics>=8.3.48,<9",
			
@@ -87,6 +90,7 @@ core = [
 
				     "mineru[pipeline]",
			
 
				     "mineru[api]",
			
 
				     "mineru[gradio]",
			
 
				+    "mineru[mlx] ; sys_platform == 'darwin'",
			
 
				 ]
			
 
				 all = [
			
 
				     "mineru[core]",