setup.py 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293
  1. from pathlib import Path
  2. from setuptools import setup, find_packages
  3. from magic_pdf.libs.version import __version__
  4. def parse_requirements(filename):
  5. with open(filename) as f:
  6. lines = f.read().splitlines()
  7. requires = []
  8. for line in lines:
  9. if "http" in line:
  10. pkg_name_without_url = line.split('@')[0].strip()
  11. requires.append(pkg_name_without_url)
  12. else:
  13. requires.append(line)
  14. return requires
  15. if __name__ == '__main__':
  16. with Path(Path(__file__).parent,
  17. 'README.md').open(encoding='utf-8') as file:
  18. long_description = file.read()
  19. setup(
  20. name="magic_pdf", # 项目名
  21. version=__version__, # 自动从tag中获取版本号
  22. license="AGPL-3.0",
  23. packages=find_packages() + ["magic_pdf.resources"] + ["magic_pdf.model.sub_modules.ocr.paddleocr2pytorch.pytorchocr.utils.resources"], # 包含所有的包
  24. package_data={
  25. "magic_pdf.resources": ["**"], # 包含magic_pdf.resources目录下的所有文件
  26. "magic_pdf.model.sub_modules.ocr.paddleocr2pytorch.pytorchocr.utils.resources": ["**"], # pytorchocr.resources目录下的所有文件
  27. },
  28. install_requires=parse_requirements('requirements.txt'), # 项目依赖的第三方库
  29. extras_require={
  30. "lite": [
  31. "paddleocr==2.7.3",
  32. "paddlepaddle==3.0.0b1;platform_system=='Linux'",
  33. "paddlepaddle==2.6.1;platform_system=='Windows' or platform_system=='Darwin'",
  34. ],
  35. "full": [
  36. "matplotlib>=3.10,<4",
  37. "ultralytics>=8.3.48,<9", # yolov8,公式检测
  38. "doclayout_yolo==0.0.2b1", # doclayout_yolo
  39. "dill>=0.3.8,<1", # doclayout_yolo
  40. "rapid_table>=1.0.5,<2.0.0", # rapid_table
  41. "PyYAML>=6.0.2,<7", # yaml
  42. "ftfy>=6.3.1,<7", # unimernet_hf
  43. "openai>=1.70.0,<2", # openai SDK
  44. "shapely>=2.0.7,<3", # imgaug-paddleocr2pytorch
  45. "pyclipper>=1.3.0,<2", # paddleocr2pytorch
  46. "omegaconf>=2.3.0,<3", # paddleocr2pytorch
  47. ],
  48. "full_old_linux": [
  49. "matplotlib>=3.10,<=3.10.1",
  50. "ultralytics>=8.3.48,<=8.3.104", # yolov8,公式检测
  51. "doclayout_yolo==0.0.2b1", # doclayout_yolo
  52. "dill==0.3.8", # doclayout_yolo
  53. "PyYAML==6.0.2", # yaml
  54. "ftfy==6.3.1", # unimernet_hf
  55. "openai==1.71.0", # openai SDK
  56. "shapely==2.1.0", # imgaug-paddleocr2pytorch
  57. "pyclipper==1.3.0.post6", # paddleocr2pytorch
  58. "omegaconf==2.3.0", # paddleocr2pytorch
  59. "albumentations==1.4.20", # 1.4.21引入的simsimd不支持2019年及更早的linux系统
  60. "rapid_table==1.0.3", # rapid_table新版本依赖的onnxruntime不支持2019年及更早的linux系统
  61. ],
  62. },
  63. description="A practical tool for converting PDF to Markdown", # 简短描述
  64. long_description=long_description, # 详细描述
  65. long_description_content_type="text/markdown", # 如果README是Markdown格式
  66. project_urls={
  67. "Home": "https://mineru.net/",
  68. "Repository": "https://github.com/opendatalab/MinerU",
  69. },
  70. keywords=["magic-pdf, mineru, MinerU, convert, pdf, markdown"],
  71. classifiers=[
  72. "Programming Language :: Python :: 3.10",
  73. "Programming Language :: Python :: 3.11",
  74. "Programming Language :: Python :: 3.12",
  75. "Programming Language :: Python :: 3.13",
  76. ],
  77. python_requires=">=3.10,<3.14", # 项目依赖的 Python 版本
  78. entry_points={
  79. "console_scripts": [
  80. "magic-pdf = magic_pdf.tools.cli:cli",
  81. "magic-pdf-dev = magic_pdf.tools.cli_dev:cli"
  82. ],
  83. }, # 项目提供的可执行命令
  84. include_package_data=True, # 是否包含非代码文件,如数据文件、配置文件等
  85. zip_safe=False, # 是否使用 zip 文件格式打包,一般设为 False
  86. )