|
|
@@ -1,104 +0,0 @@
|
|
|
-from pathlib import Path
|
|
|
-from setuptools import setup, find_packages
|
|
|
-from mineru.version import __version__
|
|
|
-
|
|
|
-
|
|
|
-if __name__ == '__main__':
|
|
|
- with Path(Path(__file__).parent,
|
|
|
- 'README.md').open(encoding='utf-8') as file:
|
|
|
- long_description = file.read()
|
|
|
- setup(
|
|
|
- name="mineru", # 项目名
|
|
|
- version=__version__, # 自动从tag中获取版本号
|
|
|
- license="AGPL-3.0",
|
|
|
- packages=find_packages() + ["mineru.resources"] + ["mineru.model.ocr.paddleocr2pytorch.pytorchocr.utils.resources"], # 包含所有的包
|
|
|
- package_data={
|
|
|
- "mineru.resources": ["**"], # 包含magic_pdf.resources目录下的所有文件
|
|
|
- "mineru.model.ocr.paddleocr2pytorch.pytorchocr.utils.resources": ["**"], # pytorchocr.resources目录下的所有文件
|
|
|
- },
|
|
|
- install_requires=[
|
|
|
- "boto3>=1.28.43",
|
|
|
- "click>=8.1.7",
|
|
|
- "loguru>=0.6.0",
|
|
|
- "numpy>=1.21.6",
|
|
|
- "pdfminer.six==20250506",
|
|
|
- "tqdm>=4.67.1",
|
|
|
- "requests",
|
|
|
- "httpx",
|
|
|
- "pillow",
|
|
|
- "pypdfium2",
|
|
|
- "loguru",
|
|
|
- "pypdf",
|
|
|
- "reportlab",
|
|
|
- ], # 项目依赖的第三方库
|
|
|
- extras_require={
|
|
|
- "vlm":[
|
|
|
- "transformers>=4.51.1",
|
|
|
- "torch>=2.6.0",
|
|
|
- "accelerate>=1.5.1"
|
|
|
- "pydantic>=2.7.2,<2.11",
|
|
|
- ],
|
|
|
- "sglang": [
|
|
|
- "sglang[all]==0.4.6.post5",
|
|
|
- ],
|
|
|
- "pipeline": [
|
|
|
- "matplotlib>=3.10,<4",
|
|
|
- "ultralytics>=8.3.48,<9", # yolov8,公式检测
|
|
|
- "doclayout_yolo==0.0.4", # doclayout_yolo
|
|
|
- "dill>=0.3.8,<1", # doclayout_yolo
|
|
|
- "rapid_table>=1.0.5,<2.0.0", # rapid_table
|
|
|
- "PyYAML>=6.0.2,<7", # yaml
|
|
|
- "ftfy>=6.3.1,<7", # unimernet_hf
|
|
|
- "openai>=1.70.0,<2", # openai SDK
|
|
|
- "shapely>=2.0.7,<3", # imgaug-paddleocr2pytorch
|
|
|
- "pyclipper>=1.3.0,<2", # paddleocr2pytorch
|
|
|
- "omegaconf>=2.3.0,<3", # paddleocr2pytorch
|
|
|
- "torch>=2.2.2,!=2.5.0,!=2.5.1,<3",
|
|
|
- "torchvision",
|
|
|
- "transformers>=4.49.0,!=4.51.0,<5.0.0",
|
|
|
- "fast-langdetect>=0.2.3,<0.3.0",
|
|
|
- ],
|
|
|
- "pipeline_old_linux": [
|
|
|
- "matplotlib>=3.10,<=3.10.1",
|
|
|
- "ultralytics>=8.3.48,<=8.3.104", # yolov8,公式检测
|
|
|
- "doclayout_yolo==0.0.4", # doclayout_yolo
|
|
|
- "dill==0.3.8", # doclayout_yolo
|
|
|
- "PyYAML==6.0.2", # yaml
|
|
|
- "ftfy==6.3.1", # unimernet_hf
|
|
|
- "openai==1.71.0", # openai SDK
|
|
|
- "shapely==2.1.0", # imgaug-paddleocr2pytorch
|
|
|
- "pyclipper==1.3.0.post6", # paddleocr2pytorch
|
|
|
- "omegaconf==2.3.0", # paddleocr2pytorch
|
|
|
- "albumentations==1.4.20", # 1.4.21引入的simsimd不支持2019年及更早的linux系统
|
|
|
- "rapid_table==1.0.3", # rapid_table新版本依赖的onnxruntime不支持2019年及更早的linux系统
|
|
|
- "torch>=2.2.2,!=2.5.0,!=2.5.1,<3",
|
|
|
- "torchvision",
|
|
|
- "transformers>=4.49.0,!=4.51.0,<5.0.0",
|
|
|
- "fast-langdetect>=0.2.3,<0.3.0",
|
|
|
- ],
|
|
|
- },
|
|
|
- description="A practical tool for converting PDF to Markdown", # 简短描述
|
|
|
- long_description=long_description, # 详细描述
|
|
|
- long_description_content_type="text/markdown", # 如果README是Markdown格式
|
|
|
- project_urls={
|
|
|
- "Home": "https://mineru.net/",
|
|
|
- "Repository": "https://github.com/opendatalab/MinerU",
|
|
|
- },
|
|
|
- keywords=["magic-pdf, mineru, MinerU, convert, pdf, markdown"],
|
|
|
- classifiers=[
|
|
|
- "Programming Language :: Python :: 3.10",
|
|
|
- "Programming Language :: Python :: 3.11",
|
|
|
- "Programming Language :: Python :: 3.12",
|
|
|
- "Programming Language :: Python :: 3.13",
|
|
|
- ],
|
|
|
- python_requires=">=3.10,<3.14", # 项目依赖的 Python 版本
|
|
|
- entry_points={
|
|
|
- "console_scripts": [
|
|
|
- "mineru = mineru.cli:client.main", # 命令行入口点,mineru命令将调用mineru.cli.client.main函数
|
|
|
- "mineru-sglang-server = mineru.cli.vlm_sglang_server:main", # sglang服务器入口点
|
|
|
- "mineru-models-download = mineru.cli.models_download:download_models", # 模型下载入口点
|
|
|
- ],
|
|
|
- }, # 项目提供的可执行命令
|
|
|
- include_package_data=True, # 是否包含非代码文件,如数据文件、配置文件等
|
|
|
- zip_safe=False, # 是否使用 zip 文件格式打包,一般设为 False
|
|
|
- )
|