check_imports.py 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211
  1. # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # TODO: Less verbose
  15. import ast
  16. import pathlib
  17. import re
  18. import sys
  19. import traceback
  20. from collections import deque
  21. from stdlib_list import stdlib_list
  22. sys.path.append(str(pathlib.Path(__file__).parent.parent))
  23. from setup import REQUIRED_DEPS
  24. # NOTE: We do not use `importlib.metadata.packages_distributions` here because
  25. # 1. It is supported only in Python 3.10+.
  26. # 2. It requires the packages to be installed, but we are doing a static check.
  27. MOD_TO_DEP = {
  28. "aistudio_sdk": "aistudio-sdk",
  29. "aiohttp": "aiohttp",
  30. "baidubce": "bce-python-sdk",
  31. "bs4": "beautifulsoup4",
  32. "chardet": "chardet",
  33. "chinese_calendar": "chinese-calendar",
  34. "colorlog": "colorlog",
  35. "decord": "decord",
  36. "einops": "einops",
  37. "faiss": "faiss-cpu",
  38. "fastapi": "fastapi",
  39. "filelock": "filelock",
  40. "filetype": "filetype",
  41. "flash_attn": "flash-attn",
  42. "ftfy": "ftfy",
  43. "GPUtil": "GPUtil",
  44. "huggingface_hub": "huggingface-hub",
  45. "imagesize": "imagesize",
  46. "jinja2": "Jinja2",
  47. "joblib": "joblib",
  48. "langchain": "langchain",
  49. "langchain_community": "langchain-community",
  50. "langchain_core": "langchain-core",
  51. "langchain_openai": "langchain-openai",
  52. "lxml": "lxml",
  53. "matplotlib": "matplotlib",
  54. "modelscope": "modelscope",
  55. "numpy": "numpy",
  56. "openai": "openai",
  57. "cv2": "opencv-contrib-python",
  58. "openpyxl": "openpyxl",
  59. "packaging": "packaging",
  60. "paddle2onnx": "paddle2onnx",
  61. "pandas": "pandas",
  62. "PIL": "pillow",
  63. "premailer": "premailer",
  64. "prettytable": "prettytable",
  65. "cpuinfo": "py-cpuinfo",
  66. "pyclipper": "pyclipper",
  67. "pycocotools": "pycocotools",
  68. "pydantic": "pydantic",
  69. "pypdfium2": "pypdfium2",
  70. "yaml": "PyYAML",
  71. "regex": "regex",
  72. "requests": "requests",
  73. "ruamel.yaml": "ruamel.yaml",
  74. "safetensors": "safetensors",
  75. "skimage": "scikit-image",
  76. "sklearn": "scikit-learn",
  77. "sentencepiece": "sentencepiece",
  78. "sglang": "sglang",
  79. "shapely": "shapely",
  80. "soundfile": "soundfile",
  81. "starlette": "starlette",
  82. "tiktoken": "tiktoken",
  83. "tokenizers": "tokenizers",
  84. "torch": "torch",
  85. "tqdm": "tqdm",
  86. "transformers": "transformers",
  87. "typing_extensions": "typing-extensions",
  88. "ujson": "ujson",
  89. "uvicorn": "uvicorn",
  90. "uvloop": "uvloop",
  91. "vllm": "vllm",
  92. "xformers": "xformers",
  93. "yarl": "yarl",
  94. "bidi": "python-bidi",
  95. }
  96. MOD_PATTERN = re.compile(
  97. rf"^(?:{'|'.join([re.escape(mod) for mod in MOD_TO_DEP])})(?=\.|$)"
  98. )
  99. STDLIB_MODS = set(stdlib_list())
  100. SPECIAL_KNOWN_MODS = {
  101. "paddle",
  102. "paddleseg",
  103. "paddleclas",
  104. "paddledet",
  105. "paddlets",
  106. "paddlenlp",
  107. "paddlespeech",
  108. "parl",
  109. "paddlemix",
  110. "paddle3d",
  111. "paddlevideo",
  112. }
  113. MANUALLY_MANAGED_OPTIONAL_HEAVY_MODS = {
  114. "paddle_custom_device",
  115. "ultra_infer",
  116. "fastdeploy",
  117. }
  118. def check(file_path):
  119. # TODO:
  120. # 1. Handle more cases, e.g., `from ruamel import yaml`.
  121. # 2. Find unused dependencies.
  122. # 3. Better output format.
  123. with open(file_path, "r", encoding="utf-8") as f:
  124. file_contents = f.read()
  125. try:
  126. tree = ast.parse(file_contents)
  127. except Exception:
  128. print(
  129. f"Failed to parse the source code in `{file_path}` into an AST node:\n{traceback.format_exc()}"
  130. )
  131. return False
  132. # 1. Never import unknown modules
  133. # 2. Don't import optional third-party modules at the top level
  134. unknown_modules_found = False
  135. top_level_imports_found = False
  136. q = deque()
  137. for child in ast.iter_child_nodes(tree):
  138. q.append((child, 1))
  139. while q:
  140. node, level = q.popleft()
  141. mods = set()
  142. if isinstance(node, ast.Import):
  143. for alias in node.names:
  144. mod = alias.name
  145. mods.add(mod)
  146. elif isinstance(node, ast.ImportFrom):
  147. if node.module and node.level == 0:
  148. mod = node.module
  149. mods.add(mod)
  150. for mod in mods:
  151. pos = f"{file_path}:{node.lineno}:{node.col_offset}"
  152. tl = mod.split(".")[0]
  153. if tl == "paddlex" or tl in SPECIAL_KNOWN_MODS or tl in STDLIB_MODS:
  154. continue
  155. elif tl in MANUALLY_MANAGED_OPTIONAL_HEAVY_MODS:
  156. if level == 1:
  157. print(
  158. f"{pos}: Module of a manually managed heavy dependency imported at the top level: {mod}"
  159. )
  160. top_level_imports_found = True
  161. elif match_ := MOD_PATTERN.match(mod):
  162. if level == 1:
  163. dep = MOD_TO_DEP[match_.group(0)]
  164. if dep not in REQUIRED_DEPS:
  165. print(
  166. f"{pos}: Module of an optional dependency imported at the top level: {mod}"
  167. )
  168. top_level_imports_found = True
  169. else:
  170. print(f"{pos}: Unknown module imported: {mod}")
  171. unknown_modules_found = True
  172. for child in ast.iter_child_nodes(node):
  173. q.append((child, level + 1))
  174. return unknown_modules_found | (top_level_imports_found << 1)
  175. def main():
  176. files = sys.argv[1:]
  177. flag = 0
  178. for file in files:
  179. ret = check(file)
  180. flag |= ret
  181. if flag:
  182. if flag & 1:
  183. curr_script_path = pathlib.Path(__file__)
  184. curr_script_path = curr_script_path.relative_to(
  185. curr_script_path.parent.parent
  186. )
  187. print(
  188. f"If a new dependency should be added, please update `setup.py` and `{curr_script_path}`."
  189. )
  190. if (flag >> 1) & 1:
  191. print(
  192. "Please put the imports from optional dependencies and manually managed heavy dependencies inside a conditional body or within a function body."
  193. )
  194. sys.exit(1)
  195. if __name__ == "__main__":
  196. main()