|
|
@@ -4,6 +4,7 @@ import click
|
|
|
from pathlib import Path
|
|
|
from loguru import logger
|
|
|
|
|
|
+from mineru.utils.check_mac_env import is_mac_os_version_supported
|
|
|
from mineru.utils.cli_parser import arg_parse
|
|
|
from mineru.utils.config_reader import get_device
|
|
|
from mineru.utils.guess_suffix_or_lang import guess_suffix_by_path
|
|
|
@@ -11,6 +12,11 @@ from mineru.utils.model_utils import get_vram
|
|
|
from ..version import __version__
|
|
|
from .common import do_parse, read_fn, pdf_suffixes, image_suffixes
|
|
|
|
|
|
+
|
|
|
+backends = ['pipeline', 'vlm-transformers', 'vlm-vllm-engine', 'vlm-http-client']
|
|
|
+if is_mac_os_version_supported():
|
|
|
+ backends.append("vlm-mlx-engine")
|
|
|
+
|
|
|
@click.command(context_settings=dict(ignore_unknown_options=True, allow_extra_args=True))
|
|
|
@click.pass_context
|
|
|
@click.version_option(__version__,
|
|
|
@@ -38,25 +44,28 @@ from .common import do_parse, read_fn, pdf_suffixes, image_suffixes
|
|
|
'--method',
|
|
|
'method',
|
|
|
type=click.Choice(['auto', 'txt', 'ocr']),
|
|
|
- help="""the method for parsing pdf:
|
|
|
- auto: Automatically determine the method based on the file type.
|
|
|
- txt: Use text extraction method.
|
|
|
- ocr: Use OCR method for image-based PDFs.
|
|
|
+ help="""\b
|
|
|
+ the method for parsing pdf:
|
|
|
+ auto: Automatically determine the method based on the file type.
|
|
|
+ txt: Use text extraction method.
|
|
|
+ ocr: Use OCR method for image-based PDFs.
|
|
|
Without method specified, 'auto' will be used by default.
|
|
|
- Adapted only for the case where the backend is set to "pipeline".""",
|
|
|
+ Adapted only for the case where the backend is set to 'pipeline'.""",
|
|
|
default='auto',
|
|
|
)
|
|
|
@click.option(
|
|
|
'-b',
|
|
|
'--backend',
|
|
|
'backend',
|
|
|
- type=click.Choice(['pipeline', 'vlm-transformers', 'vlm-vllm-engine', 'vlm-http-client']),
|
|
|
- help="""the backend for parsing pdf:
|
|
|
- pipeline: More general.
|
|
|
- vlm-transformers: More general.
|
|
|
- vlm-vllm-engine: Faster(engine).
|
|
|
- vlm-http-client: Faster(client).
|
|
|
- without method specified, pipeline will be used by default.""",
|
|
|
+ type=click.Choice(backends),
|
|
|
+ help="""\b
|
|
|
+ the backend for parsing pdf:
|
|
|
+ pipeline: More general.
|
|
|
+ vlm-transformers: More general, but slower.
|
|
|
+ vlm-mlx-engine: Faster than transformers.
|
|
|
+ vlm-vllm-engine: Faster(engine).
|
|
|
+ vlm-http-client: Faster(client).
|
|
|
+ Without method specified, pipeline will be used by default.""",
|
|
|
default='pipeline',
|
|
|
)
|
|
|
@click.option(
|
|
|
@@ -66,7 +75,7 @@ from .common import do_parse, read_fn, pdf_suffixes, image_suffixes
|
|
|
type=click.Choice(['ch', 'ch_server', 'ch_lite', 'en', 'korean', 'japan', 'chinese_cht', 'ta', 'te', 'ka', 'th', 'el',
|
|
|
'latin', 'arabic', 'east_slavic', 'cyrillic', 'devanagari']),
|
|
|
help="""
|
|
|
- Input the languages in the pdf (if known) to improve OCR accuracy. Optional.
|
|
|
+ Input the languages in the pdf (if known) to improve OCR accuracy.
|
|
|
Without languages specified, 'ch' will be used by default.
|
|
|
Adapted only for the case where the backend is set to "pipeline".
|
|
|
""",
|
|
|
@@ -119,7 +128,8 @@ from .common import do_parse, read_fn, pdf_suffixes, image_suffixes
|
|
|
'--device',
|
|
|
'device_mode',
|
|
|
type=str,
|
|
|
- help='Device mode for model inference, e.g., "cpu", "cuda", "cuda:0", "npu", "npu:0", "mps". Adapted only for the case where the backend is set to "pipeline". ',
|
|
|
+ help="""Device mode for model inference, e.g., "cpu", "cuda", "cuda:0", "npu", "npu:0", "mps".
|
|
|
+ Adapted only for the case where the backend is set to "pipeline" and "vlm-transformers". """,
|
|
|
default=None,
|
|
|
)
|
|
|
@click.option(
|