setup.py 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329
  1. # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import glob
  15. import itertools
  16. import os
  17. from pathlib import Path
  18. from setuptools import find_packages, setup
  19. DEP_SPECS = {
  20. "aiohttp": ">= 3.9",
  21. "bce-python-sdk": ">= 0.9",
  22. "chardet": "",
  23. "chinese-calendar": "",
  24. "colorlog": "",
  25. "decord": "== 0.6.0; (platform_machine == 'x86_64' or platform_machine == 'AMD64') and sys_platform != 'darwin'",
  26. "einops": "",
  27. "faiss-cpu": "",
  28. "fastapi": ">= 0.110",
  29. "filelock": "",
  30. "filetype": ">= 1.2",
  31. "ftfy": "",
  32. "GPUtil": ">= 1.4",
  33. "imagesize": "",
  34. "Jinja2": "",
  35. "joblib": "",
  36. "langchain": "== 0.2.17",
  37. "langchain-community": "== 0.2.17",
  38. "langchain-core": "",
  39. "langchain-openai": "== 0.1.25",
  40. "lxml": "",
  41. "matplotlib": "",
  42. "numpy": [
  43. "== 1.24.4; python_version < '3.12'",
  44. "== 1.26.4; python_version >= '3.12'",
  45. ],
  46. "openai": "== 1.63.2",
  47. "opencv-contrib-python": "== 4.10.0.84",
  48. "openpyxl": "",
  49. "packaging": "",
  50. "pandas": "<= 1.5.3",
  51. "pillow": "",
  52. "premailer": "",
  53. "prettytable": "",
  54. "py-cpuinfo": "",
  55. "pyclipper": "",
  56. "pycocotools": "",
  57. "pydantic": ">= 2",
  58. "pypdfium2": ">= 4",
  59. "PyYAML": "== 6.0.2",
  60. "regex": "",
  61. "requests": "",
  62. "ruamel.yaml": "",
  63. "scikit-image": "",
  64. "scikit-learn": "",
  65. "shapely": "",
  66. "soundfile": "",
  67. "starlette": ">= 0.36",
  68. "tiktoken": "",
  69. "tokenizers": "== 0.19.1",
  70. "tqdm": "",
  71. "typing-extensions": "",
  72. "ujson": "",
  73. "uvicorn": ">= 0.16",
  74. "yarl": ">= 1.9",
  75. }
  76. REQUIRED_DEPS = [
  77. "chardet",
  78. "colorlog",
  79. "filelock",
  80. "GPUtil",
  81. "numpy",
  82. "packaging",
  83. # Currently it is not easy to make `pandas` optional
  84. "pandas",
  85. "pillow",
  86. "prettytable",
  87. "py-cpuinfo",
  88. "pydantic",
  89. "PyYAML",
  90. "requests",
  91. "ruamel.yaml",
  92. "typing-extensions",
  93. "ujson",
  94. ]
  95. EXTRAS = {
  96. "base": {
  97. "cv": [
  98. "faiss-cpu",
  99. "matplotlib",
  100. "opencv-contrib-python",
  101. "pycocotools",
  102. # Currently `pypdfium2` is required by the image batch sampler
  103. "pypdfium2",
  104. "scikit-image",
  105. ],
  106. "multimodal": [
  107. "einops",
  108. "ftfy",
  109. "Jinja2",
  110. "opencv-contrib-python",
  111. # For the same reason as in `cv`
  112. "pypdfium2",
  113. "regex",
  114. "tiktoken",
  115. ],
  116. "ie": [
  117. "ftfy",
  118. "imagesize",
  119. "langchain",
  120. "langchain-community",
  121. "langchain-core",
  122. "langchain-openai",
  123. "lxml",
  124. "openai",
  125. "opencv-contrib-python",
  126. "openpyxl",
  127. "premailer",
  128. "pyclipper",
  129. "pypdfium2",
  130. "scikit-learn",
  131. "shapely",
  132. "tokenizers",
  133. ],
  134. "ocr": [
  135. "ftfy",
  136. "imagesize",
  137. "lxml",
  138. "opencv-contrib-python",
  139. "openpyxl",
  140. "premailer",
  141. "pyclipper",
  142. "pypdfium2",
  143. "scikit-learn",
  144. "shapely",
  145. "tokenizers",
  146. ],
  147. "speech": [
  148. "ftfy",
  149. "Jinja2",
  150. "regex",
  151. "soundfile",
  152. "tqdm",
  153. ],
  154. "ts": [
  155. "chinese-calendar",
  156. "joblib",
  157. "matplotlib",
  158. "scikit-learn",
  159. ],
  160. "video": [
  161. "decord",
  162. "opencv-contrib-python",
  163. ],
  164. },
  165. "plugins": {
  166. "serving": [
  167. "aiohttp",
  168. "bce-python-sdk",
  169. "fastapi",
  170. "filetype",
  171. "starlette",
  172. "uvicorn",
  173. "yarl",
  174. ],
  175. },
  176. }
  177. def _get_dep_specs(deps):
  178. dep_specs = []
  179. for dep in deps:
  180. val = DEP_SPECS[dep]
  181. if not isinstance(val, list):
  182. val = [val]
  183. for v in val:
  184. if not v:
  185. dep_specs.append(dep)
  186. else:
  187. dep_specs.append(dep + " " + v)
  188. return dep_specs
  189. def _sort_dep_specs(dep_specs):
  190. return sorted(dep_specs, key=str.lower)
  191. def readme():
  192. """get readme"""
  193. with open("README.md", "r", encoding="utf-8") as file:
  194. return file.read()
  195. def dependencies():
  196. dep_specs = _get_dep_specs(REQUIRED_DEPS)
  197. return _sort_dep_specs(dep_specs)
  198. def extras():
  199. dic = {}
  200. all_dep_specs = set()
  201. for group_name, group in EXTRAS.items():
  202. group_dep_specs = set()
  203. for extra_name, extra_deps in group.items():
  204. extra_dep_specs = _get_dep_specs(extra_deps)
  205. dic[extra_name] = _sort_dep_specs(extra_dep_specs)
  206. group_dep_specs.update(extra_dep_specs)
  207. dic[group_name] = _sort_dep_specs(group_dep_specs)
  208. all_dep_specs.update(group_dep_specs)
  209. dic["all"] = _sort_dep_specs(all_dep_specs)
  210. return dic
  211. def version():
  212. """get version"""
  213. with open(os.path.join("paddlex", ".version"), "r") as file:
  214. return file.read().rstrip()
  215. def get_data_files(directory: str, filetypes: list = None):
  216. all_files = []
  217. filetypes = filetypes or []
  218. for root, _, files in os.walk(directory):
  219. rel_root = os.path.relpath(root, directory)
  220. for file in files:
  221. filepath = os.path.join(rel_root, file)
  222. filetype = os.path.splitext(file)[1][1:]
  223. if filetype in filetypes:
  224. all_files.append(filepath)
  225. return all_files
  226. def packages_and_package_data():
  227. """get packages and package_data"""
  228. def _recursively_find(pattern, exts=None):
  229. for dir_ in glob.iglob(pattern):
  230. for root, _, files in os.walk(dir_):
  231. for f in files:
  232. if exts is not None:
  233. ext = os.path.splitext(f)[1]
  234. if ext not in exts:
  235. continue
  236. yield os.path.join(root, f)
  237. pkgs = find_packages(exclude=["tests", "*.tests", "*.tests.*", "tests.*"])
  238. pkg_data = []
  239. for p in itertools.chain(
  240. _recursively_find("paddlex/configs/*", exts=[".yml", ".yaml"]),
  241. ):
  242. if Path(p).suffix in (".pyc", ".pyo"):
  243. continue
  244. pkg_data.append(Path(p).relative_to("paddlex").as_posix())
  245. pipeline_config = [
  246. Path(p).relative_to("paddlex").as_posix()
  247. for p in glob.glob("paddlex/pipelines/*.yaml")
  248. ]
  249. pkg_data.append("inference/pipelines/ppchatocrv3/ch_prompt.yaml")
  250. pkg_data.extend(pipeline_config)
  251. pkg_data.append(".version")
  252. pkg_data.append("hpip_links.html")
  253. pkg_data.append("inference/utils/hpi_model_info_collection.json")
  254. ops_file_dir = "paddlex/ops"
  255. ops_file_types = ["h", "hpp", "cpp", "cc", "cu"]
  256. return pkgs, {
  257. "paddlex.ops": get_data_files(ops_file_dir, ops_file_types),
  258. "paddlex": pkg_data,
  259. }
  260. if __name__ == "__main__":
  261. pkgs, pkg_data = packages_and_package_data()
  262. s = setup(
  263. name="paddlex",
  264. version=version(),
  265. description=("Low-code development tool based on PaddlePaddle."),
  266. long_description=readme(),
  267. long_description_content_type="text/markdown",
  268. author="PaddlePaddle Authors",
  269. author_email="",
  270. install_requires=dependencies(),
  271. extras_require=extras(),
  272. packages=pkgs,
  273. package_data=pkg_data,
  274. entry_points={
  275. "console_scripts": [
  276. "paddlex = paddlex.__main__:console_entry",
  277. ],
  278. },
  279. # PyPI package information
  280. classifiers=[
  281. "Development Status :: 4 - Beta",
  282. "Intended Audience :: Developers",
  283. "Intended Audience :: Education",
  284. "Intended Audience :: Science/Research",
  285. "License :: OSI Approved :: Apache Software License",
  286. "Programming Language :: Python :: 3.8",
  287. "Programming Language :: Python :: 3.9",
  288. "Programming Language :: Python :: 3.10",
  289. "Programming Language :: Python :: 3.11",
  290. "Programming Language :: Python :: 3.12",
  291. "Topic :: Scientific/Engineering",
  292. "Topic :: Scientific/Engineering :: Mathematics",
  293. "Topic :: Scientific/Engineering :: Artificial Intelligence",
  294. "Topic :: Software Development",
  295. "Topic :: Software Development :: Libraries",
  296. "Topic :: Software Development :: Libraries :: Python Modules",
  297. ],
  298. license="Apache 2.0",
  299. keywords=["paddlepaddle"],
  300. )