pyproject.toml 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173
  1. [build-system]
  2. requires = ["setuptools>=61.0", "wheel"]
  3. build-backend = "setuptools.build_meta"
  4. [project]
  5. name = "mineru"
  6. dynamic = ["version"]
  7. license = { text = "AGPL-3.0" }
  8. description = "A practical tool for converting PDF to Markdown"
  9. readme = "README.md"
  10. requires-python = ">=3.10,<3.14"
  11. keywords = ["magic-pdf", "mineru", "MinerU", "convert", "pdf", "markdown"]
  12. classifiers = [
  13. "Programming Language :: Python :: 3.10",
  14. "Programming Language :: Python :: 3.11",
  15. "Programming Language :: Python :: 3.12",
  16. "Programming Language :: Python :: 3.13",
  17. ]
  18. dependencies = [
  19. "boto3>=1.28.43",
  20. "click>=8.1.7",
  21. "loguru>=0.7.2",
  22. "numpy>=1.21.6",
  23. "pdfminer.six==20250506",
  24. "tqdm>=4.67.1",
  25. "requests",
  26. "httpx",
  27. "pillow>=11.0.0",
  28. "pypdfium2>=4.30.0",
  29. "pypdf>=5.6.0",
  30. "reportlab",
  31. "pdftext>=0.6.2",
  32. "modelscope>=1.26.0",
  33. "huggingface-hub>=0.32.4",
  34. "json-repair>=0.46.2",
  35. "opencv-python>=4.11.0.86",
  36. "fast-langdetect>=0.2.3,<0.3.0",
  37. "scikit-image>=0.25.0,<1.0.0",
  38. ]
  39. [project.optional-dependencies]
  40. test = [
  41. "mineru[core]",
  42. "pytest",
  43. "pytest-cov",
  44. "coverage",
  45. "beautifulsoup4",
  46. "fuzzywuzzy"
  47. ]
  48. vlm = [
  49. "transformers>=4.51.1",
  50. "torch>=2.6.0",
  51. "accelerate>=1.5.1",
  52. "pydantic",
  53. ]
  54. sglang = [
  55. "sglang[all]>=0.4.7,<0.4.10",
  56. ]
  57. pipeline = [
  58. "matplotlib>=3.10,<4",
  59. "ultralytics>=8.3.48,<9",
  60. "doclayout_yolo==0.0.4",
  61. "dill>=0.3.8,<1",
  62. "rapid_table>=1.0.5,<2.0.0",
  63. "PyYAML>=6.0.2,<7",
  64. "ftfy>=6.3.1,<7",
  65. "openai>=1.70.0,<2",
  66. "shapely>=2.0.7,<3",
  67. "pyclipper>=1.3.0,<2",
  68. "omegaconf>=2.3.0,<3",
  69. "torch>=2.2.2,!=2.5.0,!=2.5.1,<3",
  70. "torchvision",
  71. "transformers>=4.49.0,!=4.51.0,<5.0.0",
  72. ]
  73. api = [
  74. "fastapi",
  75. "python-multipart",
  76. "uvicorn",
  77. ]
  78. gradio = [
  79. "gradio>=5.34,<6",
  80. "gradio-pdf>=0.0.22",
  81. ]
  82. core = [
  83. "mineru[vlm]",
  84. "mineru[pipeline]",
  85. "mineru[api]",
  86. "mineru[gradio]",
  87. ]
  88. all = [
  89. "mineru[core]",
  90. "mineru[sglang]",
  91. ]
  92. pipeline_old_linux = [
  93. "matplotlib>=3.10,<=3.10.1",
  94. "ultralytics>=8.3.48,<=8.3.104",
  95. "doclayout_yolo==0.0.4",
  96. "dill==0.3.8",
  97. "PyYAML==6.0.2",
  98. "ftfy==6.3.1",
  99. "openai==1.71.0",
  100. "shapely==2.1.0",
  101. "pyclipper==1.3.0.post6",
  102. "omegaconf==2.3.0",
  103. "albumentations==1.4.20",
  104. "rapid_table==1.0.3",
  105. "torch>=2.2.2,!=2.5.0,!=2.5.1,<3",
  106. "torchvision",
  107. "transformers>=4.49.0,!=4.51.0,<5.0.0",
  108. ]
  109. [project.urls]
  110. homepage = "https://mineru.net/"
  111. documentation = "https://opendatalab.github.io/MinerU/"
  112. repository = "https://github.com/opendatalab/MinerU"
  113. issues = "https://github.com/opendatalab/MinerU/issues"
  114. [project.scripts]
  115. mineru = "mineru.cli:client.main"
  116. mineru-sglang-server = "mineru.cli.vlm_sglang_server:main"
  117. mineru-models-download = "mineru.cli.models_download:download_models"
  118. mineru-api = "mineru.cli.fast_api:main"
  119. mineru-gradio = "mineru.cli.gradio_app:main"
  120. [tool.setuptools.dynamic]
  121. version = { attr = "mineru.version.__version__" }
  122. [tool.setuptools.packages.find]
  123. include = ["mineru*"]
  124. namespaces = false
  125. [tool.setuptools.package-data]
  126. "mineru" = ["resources/**"]
  127. "mineru.model.ocr.paddleocr2pytorch.pytorchocr.utils" = ["resources/**"]
  128. [tool.setuptools]
  129. include-package-data = true
  130. zip-safe = false
  131. [tool.pytest.ini_options]
  132. addopts = "-s --cov=mineru --cov-report html"
  133. [tool.coverage.run]
  134. command_line = "-m pytest tests/unittest/test_e2e.py"
  135. source = ["mineru/"]
  136. omit = [
  137. "*/vlm_sglang_model/*",
  138. "*/gradio_app.py",
  139. "*/models_download.py",
  140. "*/fast_api.py",
  141. "*/cli/client.py",
  142. "*/sglang_engine_predictor.py",
  143. "*/vlm_sglang_server.py",
  144. "*/cli_parser.py",
  145. "*/run_async.py"
  146. ]
  147. [tool.coverage.html]
  148. directory = "htmlcov"
  149. [tool.coverage.report]
  150. exclude_also = [
  151. 'def __repr__',
  152. 'if self.debug:',
  153. 'if settings.DEBUG',
  154. 'raise AssertionError',
  155. 'raise NotImplementedError',
  156. 'if 0:',
  157. 'if __name__ == .__main__.:',
  158. 'if TYPE_CHECKING:',
  159. 'class .*\bProtocol\):',
  160. '@(abc\.)?abstractmethod',
  161. ]