setup.py_back 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104
  1. from pathlib import Path
  2. from setuptools import setup, find_packages
  3. from mineru.version import __version__
  4. if __name__ == '__main__':
  5. with Path(Path(__file__).parent,
  6. 'README.md').open(encoding='utf-8') as file:
  7. long_description = file.read()
  8. setup(
  9. name="mineru", # 项目名
  10. version=__version__, # 自动从tag中获取版本号
  11. license="AGPL-3.0",
  12. packages=find_packages() + ["mineru.resources"] + ["mineru.model.ocr.paddleocr2pytorch.pytorchocr.utils.resources"], # 包含所有的包
  13. package_data={
  14. "mineru.resources": ["**"], # 包含magic_pdf.resources目录下的所有文件
  15. "mineru.model.ocr.paddleocr2pytorch.pytorchocr.utils.resources": ["**"], # pytorchocr.resources目录下的所有文件
  16. },
  17. install_requires=[
  18. "boto3>=1.28.43",
  19. "click>=8.1.7",
  20. "loguru>=0.6.0",
  21. "numpy>=1.21.6",
  22. "pdfminer.six==20250506",
  23. "tqdm>=4.67.1",
  24. "requests",
  25. "httpx",
  26. "pillow",
  27. "pypdfium2",
  28. "loguru",
  29. "pypdf",
  30. "reportlab",
  31. ], # 项目依赖的第三方库
  32. extras_require={
  33. "vlm":[
  34. "transformers>=4.51.1",
  35. "torch>=2.6.0",
  36. "accelerate>=1.5.1"
  37. "pydantic>=2.7.2,<2.11",
  38. ],
  39. "sglang": [
  40. "sglang[all]==0.4.6.post5",
  41. ],
  42. "pipeline": [
  43. "matplotlib>=3.10,<4",
  44. "ultralytics>=8.3.48,<9", # yolov8,公式检测
  45. "doclayout_yolo==0.0.4", # doclayout_yolo
  46. "dill>=0.3.8,<1", # doclayout_yolo
  47. "rapid_table>=1.0.5,<2.0.0", # rapid_table
  48. "PyYAML>=6.0.2,<7", # yaml
  49. "ftfy>=6.3.1,<7", # unimernet_hf
  50. "openai>=1.70.0,<2", # openai SDK
  51. "shapely>=2.0.7,<3", # imgaug-paddleocr2pytorch
  52. "pyclipper>=1.3.0,<2", # paddleocr2pytorch
  53. "omegaconf>=2.3.0,<3", # paddleocr2pytorch
  54. "torch>=2.2.2,!=2.5.0,!=2.5.1,<3",
  55. "torchvision",
  56. "transformers>=4.49.0,!=4.51.0,<5.0.0",
  57. "fast-langdetect>=0.2.3,<0.3.0",
  58. ],
  59. "pipeline_old_linux": [
  60. "matplotlib>=3.10,<=3.10.1",
  61. "ultralytics>=8.3.48,<=8.3.104", # yolov8,公式检测
  62. "doclayout_yolo==0.0.4", # doclayout_yolo
  63. "dill==0.3.8", # doclayout_yolo
  64. "PyYAML==6.0.2", # yaml
  65. "ftfy==6.3.1", # unimernet_hf
  66. "openai==1.71.0", # openai SDK
  67. "shapely==2.1.0", # imgaug-paddleocr2pytorch
  68. "pyclipper==1.3.0.post6", # paddleocr2pytorch
  69. "omegaconf==2.3.0", # paddleocr2pytorch
  70. "albumentations==1.4.20", # 1.4.21引入的simsimd不支持2019年及更早的linux系统
  71. "rapid_table==1.0.3", # rapid_table新版本依赖的onnxruntime不支持2019年及更早的linux系统
  72. "torch>=2.2.2,!=2.5.0,!=2.5.1,<3",
  73. "torchvision",
  74. "transformers>=4.49.0,!=4.51.0,<5.0.0",
  75. "fast-langdetect>=0.2.3,<0.3.0",
  76. ],
  77. },
  78. description="A practical tool for converting PDF to Markdown", # 简短描述
  79. long_description=long_description, # 详细描述
  80. long_description_content_type="text/markdown", # 如果README是Markdown格式
  81. project_urls={
  82. "Home": "https://mineru.net/",
  83. "Repository": "https://github.com/opendatalab/MinerU",
  84. },
  85. keywords=["magic-pdf, mineru, MinerU, convert, pdf, markdown"],
  86. classifiers=[
  87. "Programming Language :: Python :: 3.10",
  88. "Programming Language :: Python :: 3.11",
  89. "Programming Language :: Python :: 3.12",
  90. "Programming Language :: Python :: 3.13",
  91. ],
  92. python_requires=">=3.10,<3.14", # 项目依赖的 Python 版本
  93. entry_points={
  94. "console_scripts": [
  95. "mineru = mineru.cli:client.main", # 命令行入口点,mineru命令将调用mineru.cli.client.main函数
  96. "mineru-sglang-server = mineru.cli.vlm_sglang_server:main", # sglang服务器入口点
  97. "mineru-models-download = mineru.cli.models_download:download_models", # 模型下载入口点
  98. ],
  99. }, # 项目提供的可执行命令
  100. include_package_data=True, # 是否包含非代码文件,如数据文件、配置文件等
  101. zip_safe=False, # 是否使用 zip 文件格式打包,一般设为 False
  102. )