setup.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399
  1. # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import glob
  15. import itertools
  16. import os
  17. from pathlib import Path
  18. from setuptools import find_packages, setup
  19. BASE_DEP_SPECS = {
  20. "aiohttp": ">= 3.9",
  21. "aistudio-sdk": ">=0.3.5",
  22. "bce-python-sdk": ">= 0.9",
  23. "beautifulsoup4": "",
  24. "chardet": "",
  25. "chinese-calendar": "",
  26. "colorlog": "",
  27. "decord": "== 0.6.0; (platform_machine == 'x86_64' or platform_machine == 'AMD64') and sys_platform != 'darwin'",
  28. "einops": "",
  29. "faiss-cpu": "",
  30. "filelock": "",
  31. "ftfy": "",
  32. "GPUtil": ">= 1.4",
  33. "huggingface-hub": "",
  34. "imagesize": "",
  35. "Jinja2": "",
  36. "joblib": "",
  37. "langchain": ">= 0.2",
  38. "langchain-community": ">= 0.2",
  39. "langchain-core": "",
  40. "langchain-openai": ">= 0.1",
  41. "lxml": "",
  42. "matplotlib": "",
  43. "modelscope": ">=1.28.0",
  44. "numpy": ">= 1.24",
  45. "openai": ">= 1.63",
  46. "opencv-contrib-python": "== 4.10.0.84",
  47. "openpyxl": "",
  48. "packaging": "",
  49. "pandas": ">= 1.3",
  50. "pillow": "",
  51. "premailer": "",
  52. "prettytable": "",
  53. "py-cpuinfo": "",
  54. "pyclipper": "",
  55. "pycocotools": "<= 2.0.8", # pycocotools upgrade incompatible since 2.0.9
  56. "pydantic": ">= 2",
  57. "pypdfium2": ">= 4",
  58. "python-bidi": "",
  59. "PyYAML": "== 6.0.2",
  60. "regex": "",
  61. "requests": "",
  62. "ruamel.yaml": "",
  63. "safetensors": [
  64. "@ https://paddle-whl.bj.bcebos.com/nightly/cu126/safetensors/safetensors-0.6.2.dev0-cp38-abi3-linux_x86_64.whl ; sys_platform == 'linux'",
  65. "; sys_platform != 'linux'",
  66. ],
  67. "scikit-image": "",
  68. "scikit-learn": "",
  69. "sentencepiece": "",
  70. "shapely": "",
  71. "soundfile": "",
  72. "tiktoken": "",
  73. "tokenizers": ">= 0.19",
  74. "tqdm": "",
  75. "typing-extensions": "",
  76. "ujson": "",
  77. }
  78. REQUIRED_DEPS = [
  79. "aistudio-sdk",
  80. "chardet",
  81. "colorlog",
  82. "filelock",
  83. "huggingface-hub",
  84. "modelscope",
  85. "numpy",
  86. "packaging",
  87. # Currently it is not easy to make `pandas` optional
  88. "pandas",
  89. "pillow",
  90. "prettytable",
  91. "py-cpuinfo",
  92. "pydantic",
  93. "PyYAML",
  94. "requests",
  95. "ruamel.yaml",
  96. "typing-extensions",
  97. "ujson",
  98. ]
  99. EXTRAS = {
  100. "base": {
  101. "cv": [
  102. "faiss-cpu",
  103. "matplotlib",
  104. "opencv-contrib-python",
  105. "pycocotools",
  106. # Currently `pypdfium2` is required by the image batch sampler
  107. "pypdfium2",
  108. "scikit-image",
  109. ],
  110. "multimodal": [
  111. "einops",
  112. "ftfy",
  113. "GPUtil",
  114. "Jinja2",
  115. "opencv-contrib-python",
  116. # For the same reason as in `cv`
  117. "pypdfium2",
  118. "regex",
  119. "safetensors",
  120. "sentencepiece",
  121. "tiktoken",
  122. ],
  123. "ie": [
  124. "ftfy",
  125. "imagesize",
  126. "langchain",
  127. "langchain-community",
  128. "langchain-core",
  129. "langchain-openai",
  130. "lxml",
  131. "openai",
  132. "opencv-contrib-python",
  133. "openpyxl",
  134. "premailer",
  135. "pyclipper",
  136. "pypdfium2",
  137. "scikit-learn",
  138. "shapely",
  139. "tokenizers",
  140. ],
  141. "trans": [
  142. "beautifulsoup4",
  143. "ftfy",
  144. "imagesize",
  145. "lxml",
  146. "openai",
  147. "opencv-contrib-python",
  148. "openpyxl",
  149. "premailer",
  150. "pyclipper",
  151. "pypdfium2",
  152. "scikit-learn",
  153. "shapely",
  154. "tokenizers",
  155. ],
  156. "ocr-core": [
  157. "imagesize",
  158. "opencv-contrib-python",
  159. "pyclipper",
  160. "pypdfium2",
  161. "python-bidi",
  162. "shapely",
  163. ],
  164. "ocr": [
  165. "einops",
  166. "ftfy",
  167. "imagesize",
  168. "Jinja2",
  169. "lxml",
  170. "opencv-contrib-python",
  171. "openpyxl",
  172. "premailer",
  173. "pyclipper",
  174. "pypdfium2",
  175. "python-bidi",
  176. "regex",
  177. "safetensors",
  178. "scikit-learn",
  179. "sentencepiece",
  180. "shapely",
  181. "tiktoken",
  182. "tokenizers",
  183. ],
  184. "speech": [
  185. "ftfy",
  186. "Jinja2",
  187. "regex",
  188. "soundfile",
  189. "tqdm",
  190. ],
  191. "ts": [
  192. "chinese-calendar",
  193. "joblib",
  194. "matplotlib",
  195. "scikit-learn",
  196. ],
  197. "video": [
  198. "decord",
  199. "opencv-contrib-python",
  200. ],
  201. },
  202. "plugins": {
  203. "genai-client": [
  204. "openai >= 1.63",
  205. ],
  206. "genai-sglang-server": [
  207. "einops",
  208. "sglang [all] == 0.5.2",
  209. "torch == 2.8.0",
  210. "transformers",
  211. "xformers",
  212. ],
  213. "genai-vllm-server": [
  214. "einops",
  215. "torch == 2.8.0",
  216. "transformers",
  217. "uvloop",
  218. "vllm == 0.10.2",
  219. "xformers",
  220. ],
  221. "paddle2onnx": [
  222. "paddle2onnx == 2.0.2rc3",
  223. ],
  224. "serving": [
  225. "aiohttp >= 3.9",
  226. "bce-python-sdk >= 0.9",
  227. "fastapi >= 0.110",
  228. "filetype >= 1.2",
  229. "opencv-contrib-python == 4.10.0.84",
  230. "pypdfium2 >= 4",
  231. "starlette >= 0.36",
  232. "uvicorn >= 0.16",
  233. "yarl >= 1.9",
  234. ],
  235. },
  236. }
  237. def _get_dep_specs(deps):
  238. dep_specs = []
  239. for dep in deps:
  240. val = BASE_DEP_SPECS[dep]
  241. if not isinstance(val, list):
  242. val = [val]
  243. for v in val:
  244. if not v:
  245. dep_specs.append(dep)
  246. else:
  247. dep_specs.append(dep + " " + v)
  248. return dep_specs
  249. def _sort_dep_specs(dep_specs):
  250. return sorted(dep_specs, key=str.lower)
  251. def readme():
  252. """get readme"""
  253. with open("README.md", "r", encoding="utf-8") as file:
  254. return file.read()
  255. def dependencies():
  256. dep_specs = _get_dep_specs(REQUIRED_DEPS)
  257. return _sort_dep_specs(dep_specs)
  258. def extras():
  259. dic = {}
  260. base_dep_specs = set()
  261. for extra_name, extra_deps in EXTRAS["base"].items():
  262. extra_dep_specs = _get_dep_specs(extra_deps)
  263. dic[extra_name] = _sort_dep_specs(extra_dep_specs)
  264. base_dep_specs.update(extra_dep_specs)
  265. dic["base"] = _sort_dep_specs(base_dep_specs)
  266. for extra_name, extra_dep_specs in EXTRAS["plugins"].items():
  267. dic[extra_name] = _sort_dep_specs(extra_dep_specs)
  268. return dic
  269. def version():
  270. """get version"""
  271. with open(os.path.join("paddlex", ".version"), "r") as file:
  272. return file.read().rstrip()
  273. def get_data_files(directory: str, filetypes: list = None):
  274. all_files = []
  275. filetypes = filetypes or []
  276. for root, _, files in os.walk(directory):
  277. rel_root = os.path.relpath(root, directory)
  278. for file in files:
  279. filepath = os.path.join(rel_root, file)
  280. filetype = os.path.splitext(file)[1][1:]
  281. if filetype in filetypes:
  282. all_files.append(filepath)
  283. return all_files
  284. def packages_and_package_data():
  285. """get packages and package_data"""
  286. def _recursively_find(pattern, exts=None):
  287. for dir_ in glob.iglob(pattern):
  288. for root, _, files in os.walk(dir_):
  289. for f in files:
  290. if exts is not None:
  291. ext = os.path.splitext(f)[1]
  292. if ext not in exts:
  293. continue
  294. yield os.path.join(root, f)
  295. pkgs = find_packages(exclude=["tests", "*.tests", "*.tests.*", "tests.*"])
  296. pkg_data = []
  297. for p in itertools.chain(
  298. _recursively_find("paddlex/configs/*", exts=[".yml", ".yaml"]),
  299. ):
  300. pkg_data.append(Path(p).relative_to("paddlex").as_posix())
  301. pipeline_config = [
  302. Path(p).relative_to("paddlex").as_posix()
  303. for p in glob.glob("paddlex/pipelines/*.yaml")
  304. ]
  305. pkg_data.extend(pipeline_config)
  306. pkg_data.append("inference/pipelines/ppchatocrv3/ch_prompt.yaml")
  307. pkg_data.append(".version")
  308. pkg_data.append("hpip_links.html")
  309. pkg_data.append("hpip_links_cu12.html")
  310. pkg_data.append("inference/utils/hpi_model_info_collection.json")
  311. genai_chat_templates = [
  312. Path(p).relative_to("paddlex").as_posix()
  313. for p in glob.glob("paddlex/inference/genai/chat_templates/*.jinja")
  314. ]
  315. pkg_data.extend(genai_chat_templates)
  316. pkg_data.extend("inference/genai/models/")
  317. ops_file_dir = "paddlex/ops"
  318. ops_file_types = ["h", "hpp", "cpp", "cc", "cu"]
  319. return pkgs, {
  320. "paddlex.ops": get_data_files(ops_file_dir, ops_file_types),
  321. "paddlex": pkg_data,
  322. }
  323. if __name__ == "__main__":
  324. pkgs, pkg_data = packages_and_package_data()
  325. s = setup(
  326. name="paddlex",
  327. version=version(),
  328. description=("Low-code development tool based on PaddlePaddle."),
  329. long_description=readme(),
  330. long_description_content_type="text/markdown",
  331. author="PaddlePaddle Authors",
  332. author_email="",
  333. install_requires=dependencies(),
  334. extras_require=extras(),
  335. packages=pkgs,
  336. package_data=pkg_data,
  337. entry_points={
  338. "console_scripts": [
  339. "paddlex = paddlex.__main__:console_entry",
  340. "paddlex_genai_server = paddlex.inference.genai.server:main",
  341. ],
  342. "vllm.general_plugins": [
  343. "register_paddlex_genai_models = paddlex.inference.genai.backends.vllm:register_models"
  344. ],
  345. },
  346. # PyPI package information
  347. classifiers=[
  348. "Development Status :: 4 - Beta",
  349. "Intended Audience :: Developers",
  350. "Intended Audience :: Education",
  351. "Intended Audience :: Science/Research",
  352. "Programming Language :: Python :: 3.8",
  353. "Programming Language :: Python :: 3.9",
  354. "Programming Language :: Python :: 3.10",
  355. "Programming Language :: Python :: 3.11",
  356. "Programming Language :: Python :: 3.12",
  357. "Topic :: Scientific/Engineering",
  358. "Topic :: Scientific/Engineering :: Mathematics",
  359. "Topic :: Scientific/Engineering :: Artificial Intelligence",
  360. "Topic :: Software Development",
  361. "Topic :: Software Development :: Libraries",
  362. "Topic :: Software Development :: Libraries :: Python Modules",
  363. ],
  364. license="Apache-2.0",
  365. license_files=["LICENSE"],
  366. keywords=["paddlepaddle"],
  367. )