[build-system] requires = ["setuptools>=61.0", "wheel"] build-backend = "setuptools.build_meta" [project] name = "mineru" dynamic = ["version"] license = { text = "AGPL-3.0" } description = "A practical tool for converting PDF to Markdown" readme = "README.md" requires-python = ">=3.10,<3.14" keywords = ["magic-pdf", "mineru", "MinerU", "convert", "pdf", "markdown"] classifiers = [ "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", ] dependencies = [ "boto3>=1.28.43", "click>=8.1.7", "loguru>=0.7.2", "numpy>=1.21.6", "pdfminer.six==20250506", "tqdm>=4.67.1", "requests", "httpx", "pillow>=11.0.0", "pypdfium2>=4.30.0", "pypdf>=5.6.0", "reportlab", "pdftext>=0.6.2", "modelscope>=1.26.0", "huggingface-hub>=0.32.4", "json-repair>=0.46.2", "opencv-python>=4.11.0.86", "fast-langdetect>=0.2.3,<0.3.0", "scikit-image>=0.25.0,<1.0.0", "openai>=1.70.0,<2", "beautifulsoup4>=4.13.5,<5", "magika>=0.6.2,<0.7.0", "mineru_vl_utils", ] [project.optional-dependencies] test = [ "mineru[core]", "pytest", "pytest-cov", "coverage", "fuzzywuzzy" ] vlm = [ "torch>=2.6.0,<2.8.0", "transformers>=4.51.1,<5.0.0", "accelerate>=1.5.1", ] vllm = [ "vllm==0.10.1.1", ] pipeline = [ "matplotlib>=3.10,<4", "ultralytics>=8.3.48,<9", "doclayout_yolo==0.0.4", "dill>=0.3.8,<1", "PyYAML>=6.0.2,<7", "ftfy>=6.3.1,<7", "shapely>=2.0.7,<3", "pyclipper>=1.3.0,<2", "omegaconf>=2.3.0,<3", "torch>=2.6.0,<2.8.0", "torchvision", "transformers>=4.49.0,!=4.51.0,<5.0.0", "onnxruntime>1.17.0", ] api = [ "fastapi", "python-multipart", "uvicorn", ] gradio = [ "gradio>=5.34,<6", "gradio-pdf>=0.0.22", ] core = [ "mineru[vlm]", "mineru[pipeline]", "mineru[api]", "mineru[gradio]", ] all = [ "mineru[core]", "mineru[vllm]", ] [project.urls] homepage = "https://mineru.net/" documentation = "https://opendatalab.github.io/MinerU/" repository = "https://github.com/opendatalab/MinerU" issues = "https://github.com/opendatalab/MinerU/issues" [project.scripts] mineru = "mineru.cli:client.main" mineru-vllm-server = "mineru.cli.vlm_vllm_server:main" mineru-models-download = "mineru.cli.models_download:download_models" mineru-api = "mineru.cli.fast_api:main" mineru-gradio = "mineru.cli.gradio_app:main" [tool.setuptools.dynamic] version = { attr = "mineru.version.__version__" } [tool.setuptools.packages.find] include = ["mineru*"] namespaces = false [tool.setuptools.package-data] "mineru" = ["resources/**"] "mineru.model.ocr.paddleocr2pytorch.pytorchocr.utils" = ["resources/**"] [tool.setuptools] include-package-data = true zip-safe = false [tool.pytest.ini_options] addopts = "-s --cov=mineru --cov-report html" [tool.coverage.run] command_line = "-m pytest tests/unittest/test_e2e.py" source = ["mineru/"] omit = [ "*/gradio_app.py", "*/models_download.py", "*/fast_api.py", "*/cli/client.py", "*/vlm_vllm_server.py", "*/cli_parser.py", ] [tool.coverage.html] directory = "htmlcov" [tool.coverage.report] exclude_also = [ 'def __repr__', 'if self.debug:', 'if settings.DEBUG', 'raise AssertionError', 'raise NotImplementedError', 'if 0:', 'if __name__ == .__main__.:', 'if TYPE_CHECKING:', 'class .*\bProtocol\):', '@(abc\.)?abstractmethod', ]