| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173 |
- [build-system]
- requires = ["setuptools>=61.0", "wheel"]
- build-backend = "setuptools.build_meta"
- [project]
- name = "mineru"
- dynamic = ["version"]
- license = { text = "AGPL-3.0" }
- description = "A practical tool for converting PDF to Markdown"
- readme = "README.md"
- requires-python = ">=3.10,<3.14"
- keywords = ["magic-pdf", "mineru", "MinerU", "convert", "pdf", "markdown"]
- classifiers = [
- "Programming Language :: Python :: 3.10",
- "Programming Language :: Python :: 3.11",
- "Programming Language :: Python :: 3.12",
- "Programming Language :: Python :: 3.13",
- ]
- dependencies = [
- "boto3>=1.28.43",
- "click>=8.1.7",
- "loguru>=0.7.2",
- "numpy>=1.21.6",
- "pdfminer.six==20250506",
- "tqdm>=4.67.1",
- "requests",
- "httpx",
- "pillow>=11.0.0",
- "pypdfium2>=4.30.0",
- "pypdf>=5.6.0",
- "reportlab",
- "pdftext>=0.6.2",
- "modelscope>=1.26.0",
- "huggingface-hub>=0.32.4",
- "json-repair>=0.46.2",
- "opencv-python>=4.11.0.86",
- "fast-langdetect>=0.2.3,<0.3.0",
- "scikit-image>=0.25.0,<1.0.0",
- ]
- [project.optional-dependencies]
- test = [
- "mineru[core]",
- "pytest",
- "pytest-cov",
- "coverage",
- "beautifulsoup4",
- "fuzzywuzzy"
- ]
- vlm = [
- "transformers>=4.51.1",
- "torch>=2.6.0",
- "accelerate>=1.5.1",
- "pydantic",
- ]
- sglang = [
- "sglang[all]>=0.4.7,<0.4.10",
- ]
- pipeline = [
- "matplotlib>=3.10,<4",
- "ultralytics>=8.3.48,<9",
- "doclayout_yolo==0.0.4",
- "dill>=0.3.8,<1",
- "rapid_table>=1.0.5,<2.0.0",
- "PyYAML>=6.0.2,<7",
- "ftfy>=6.3.1,<7",
- "openai>=1.70.0,<2",
- "shapely>=2.0.7,<3",
- "pyclipper>=1.3.0,<2",
- "omegaconf>=2.3.0,<3",
- "torch>=2.2.2,!=2.5.0,!=2.5.1,<3",
- "torchvision",
- "transformers>=4.49.0,!=4.51.0,<5.0.0",
- ]
- api = [
- "fastapi",
- "python-multipart",
- "uvicorn",
- ]
- gradio = [
- "gradio>=5.34,<6",
- "gradio-pdf>=0.0.22",
- ]
- core = [
- "mineru[vlm]",
- "mineru[pipeline]",
- "mineru[api]",
- "mineru[gradio]",
- ]
- all = [
- "mineru[core]",
- "mineru[sglang]",
- ]
- pipeline_old_linux = [
- "matplotlib>=3.10,<=3.10.1",
- "ultralytics>=8.3.48,<=8.3.104",
- "doclayout_yolo==0.0.4",
- "dill==0.3.8",
- "PyYAML==6.0.2",
- "ftfy==6.3.1",
- "openai==1.71.0",
- "shapely==2.1.0",
- "pyclipper==1.3.0.post6",
- "omegaconf==2.3.0",
- "albumentations==1.4.20",
- "rapid_table==1.0.3",
- "torch>=2.2.2,!=2.5.0,!=2.5.1,<3",
- "torchvision",
- "transformers>=4.49.0,!=4.51.0,<5.0.0",
- ]
- [project.urls]
- homepage = "https://mineru.net/"
- documentation = "https://opendatalab.github.io/MinerU/"
- repository = "https://github.com/opendatalab/MinerU"
- issues = "https://github.com/opendatalab/MinerU/issues"
- [project.scripts]
- mineru = "mineru.cli:client.main"
- mineru-sglang-server = "mineru.cli.vlm_sglang_server:main"
- mineru-models-download = "mineru.cli.models_download:download_models"
- mineru-api = "mineru.cli.fast_api:main"
- mineru-gradio = "mineru.cli.gradio_app:main"
- [tool.setuptools.dynamic]
- version = { attr = "mineru.version.__version__" }
- [tool.setuptools.packages.find]
- include = ["mineru*"]
- namespaces = false
- [tool.setuptools.package-data]
- "mineru" = ["resources/**"]
- "mineru.model.ocr.paddleocr2pytorch.pytorchocr.utils" = ["resources/**"]
- [tool.setuptools]
- include-package-data = true
- zip-safe = false
- [tool.pytest.ini_options]
- addopts = "-s --cov=mineru --cov-report html"
- [tool.coverage.run]
- command_line = "-m pytest tests/unittest/test_e2e.py"
- source = ["mineru/"]
- omit = [
- "*/vlm_sglang_model/*",
- "*/gradio_app.py",
- "*/models_download.py",
- "*/fast_api.py",
- "*/cli/client.py",
- "*/sglang_engine_predictor.py",
- "*/vlm_sglang_server.py",
- "*/cli_parser.py",
- "*/run_async.py"
- ]
- [tool.coverage.html]
- directory = "htmlcov"
- [tool.coverage.report]
- exclude_also = [
- 'def __repr__',
- 'if self.debug:',
- 'if settings.DEBUG',
- 'raise AssertionError',
- 'raise NotImplementedError',
- 'if 0:',
- 'if __name__ == .__main__.:',
- 'if TYPE_CHECKING:',
- 'class .*\bProtocol\):',
- '@(abc\.)?abstractmethod',
- ]
|