hpi.py 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273
  1. # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import ctypes.util
  15. import importlib.resources
  16. import importlib.util
  17. import json
  18. import platform
  19. from functools import lru_cache
  20. from os import PathLike
  21. from pathlib import Path
  22. from typing import Any, Dict, List, Literal, Optional, Tuple, TypedDict, Union
  23. from pydantic import BaseModel, Field
  24. from typing_extensions import Annotated, TypeAlias
  25. from ...utils.deps import function_requires_deps, is_paddle2onnx_plugin_available
  26. from ...utils.flags import USE_PIR_TRT, FLAGS_json_format_model
  27. class PaddleInferenceInfo(BaseModel):
  28. trt_dynamic_shapes: Optional[Dict[str, List[List[int]]]] = None
  29. trt_dynamic_shape_input_data: Optional[Dict[str, List[List[float]]]] = None
  30. class TensorRTInfo(BaseModel):
  31. dynamic_shapes: Optional[Dict[str, List[List[int]]]] = None
  32. class InferenceBackendInfoCollection(BaseModel):
  33. paddle_infer: Optional[PaddleInferenceInfo] = None
  34. tensorrt: Optional[TensorRTInfo] = None
  35. # Does using `TypedDict` make things more convenient?
  36. class HPIInfo(BaseModel):
  37. backend_configs: Optional[InferenceBackendInfoCollection] = None
  38. # For multi-backend inference only
  39. InferenceBackend: TypeAlias = Literal[
  40. "paddle", "openvino", "onnxruntime", "tensorrt", "om"
  41. ]
  42. class OpenVINOConfig(BaseModel):
  43. cpu_num_threads: int = 8
  44. class ONNXRuntimeConfig(BaseModel):
  45. cpu_num_threads: int = 8
  46. class TensorRTConfig(BaseModel):
  47. precision: Literal["fp32", "fp16"] = "fp32"
  48. use_dynamic_shapes: bool = True
  49. dynamic_shapes: Optional[Dict[str, List[List[int]]]] = None
  50. # TODO: Control caching behavior
  51. class OMConfig(BaseModel):
  52. pass
  53. class HPIConfig(BaseModel):
  54. pdx_model_name: Annotated[str, Field(alias="model_name")]
  55. device_type: str
  56. device_id: Optional[int] = None
  57. auto_config: bool = True
  58. backend: Optional[InferenceBackend] = None
  59. backend_config: Optional[Dict[str, Any]] = None
  60. hpi_info: Optional[HPIInfo] = None
  61. auto_paddle2onnx: bool = True
  62. # TODO: Add more validation logic here
  63. class ModelInfo(BaseModel):
  64. name: str
  65. hpi_info: Optional[HPIInfo] = None
  66. ModelFormat: TypeAlias = Literal["paddle", "onnx", "om"]
  67. class ModelPaths(TypedDict, total=False):
  68. paddle: Tuple[Path, Path]
  69. onnx: Path
  70. om: Path
  71. def get_model_paths(
  72. model_dir: Union[str, PathLike], model_file_prefix: str
  73. ) -> ModelPaths:
  74. model_dir = Path(model_dir)
  75. model_paths: ModelPaths = {}
  76. pd_model_path = None
  77. if FLAGS_json_format_model:
  78. if (model_dir / f"{model_file_prefix}.json").exists():
  79. pd_model_path = model_dir / f"{model_file_prefix}.json"
  80. else:
  81. if (model_dir / f"{model_file_prefix}.json").exists():
  82. pd_model_path = model_dir / f"{model_file_prefix}.json"
  83. elif (model_dir / f"{model_file_prefix}.pdmodel").exists():
  84. pd_model_path = model_dir / f"{model_file_prefix}.pdmodel"
  85. if pd_model_path and (model_dir / f"{model_file_prefix}.pdiparams").exists():
  86. model_paths["paddle"] = (
  87. pd_model_path,
  88. model_dir / f"{model_file_prefix}.pdiparams",
  89. )
  90. if (model_dir / f"{model_file_prefix}.onnx").exists():
  91. model_paths["onnx"] = model_dir / f"{model_file_prefix}.onnx"
  92. if (model_dir / f"{model_file_prefix}.om").exists():
  93. model_paths["om"] = model_dir / f"{model_file_prefix}.om"
  94. return model_paths
  95. @lru_cache(1)
  96. def _get_hpi_model_info_collection():
  97. with importlib.resources.open_text(
  98. __package__, "hpi_model_info_collection.json", encoding="utf-8"
  99. ) as f:
  100. hpi_model_info_collection = json.load(f)
  101. return hpi_model_info_collection
  102. @function_requires_deps("ultra-infer")
  103. def suggest_inference_backend_and_config(
  104. hpi_config: HPIConfig,
  105. model_paths: ModelPaths,
  106. ) -> Union[Tuple[InferenceBackend, Dict[str, Any]], Tuple[None, str]]:
  107. # TODO: The current strategy is naive. It would be better to consider
  108. # additional important factors, such as NVIDIA GPU compute capability and
  109. # device manufacturers. We should also allow users to provide hints.
  110. import paddle
  111. from ultra_infer import (
  112. is_built_with_om,
  113. is_built_with_openvino,
  114. is_built_with_ort,
  115. is_built_with_trt,
  116. )
  117. is_onnx_model_available = "onnx" in model_paths
  118. # TODO: Give a warning if the Paddle2ONNX plugin is not available but
  119. # can be used to select a better backend.
  120. if hpi_config.auto_paddle2onnx and is_paddle2onnx_plugin_available():
  121. is_onnx_model_available = is_onnx_model_available or "paddle" in model_paths
  122. available_backends = []
  123. if "paddle" in model_paths:
  124. available_backends.append("paddle")
  125. if is_built_with_openvino() and is_onnx_model_available:
  126. available_backends.append("openvino")
  127. if is_built_with_ort() and is_onnx_model_available:
  128. available_backends.append("onnxruntime")
  129. if is_built_with_trt() and is_onnx_model_available:
  130. available_backends.append("tensorrt")
  131. if is_built_with_om() and "om" in model_paths:
  132. available_backends.append("om")
  133. if not available_backends:
  134. return None, "No inference backends are available."
  135. if hpi_config.backend is not None and hpi_config.backend not in available_backends:
  136. return None, f"Inference backend {repr(hpi_config.backend)} is unavailable."
  137. paddle_version = paddle.__version__
  138. if paddle_version != "3.0.0":
  139. return None, f"{repr(paddle_version)} is not a supported Paddle version."
  140. if hpi_config.device_type == "cpu":
  141. uname = platform.uname()
  142. arch = uname.machine.lower()
  143. if arch == "x86_64":
  144. key = "cpu_x64"
  145. else:
  146. return None, f"{repr(arch)} is not a supported architecture."
  147. elif hpi_config.device_type == "gpu":
  148. # FIXME: We should not rely on the PaddlePaddle library to detemine CUDA
  149. # and cuDNN versions.
  150. # Should we inject environment info from the outside?
  151. import paddle.version
  152. cuda_version = paddle.version.cuda()
  153. cuda_version = cuda_version.replace(".", "")
  154. cudnn_version = paddle.version.cudnn().rsplit(".", 1)[0]
  155. cudnn_version = cudnn_version.replace(".", "")
  156. key = f"gpu_cuda{cuda_version}_cudnn{cudnn_version}"
  157. else:
  158. return None, f"{repr(hpi_config.device_type)} is not a supported device type."
  159. hpi_model_info_collection = _get_hpi_model_info_collection()
  160. if key not in hpi_model_info_collection:
  161. return None, "No prior knowledge can be utilized."
  162. hpi_model_info_collection_for_env = hpi_model_info_collection[key]
  163. if hpi_config.pdx_model_name not in hpi_model_info_collection_for_env:
  164. return None, f"{repr(hpi_config.pdx_model_name)} is not a known model."
  165. supported_pseudo_backends = hpi_model_info_collection_for_env[
  166. hpi_config.pdx_model_name
  167. ]
  168. # XXX
  169. if not ctypes.util.find_library("nvinfer") or (
  170. USE_PIR_TRT and importlib.util.find_spec("tensorrt") is None
  171. ):
  172. if "paddle_tensorrt" in supported_pseudo_backends:
  173. supported_pseudo_backends.remove("paddle_tensorrt")
  174. if "paddle_tensorrt_fp16" in supported_pseudo_backends:
  175. supported_pseudo_backends.remove("paddle_tensorrt_fp16")
  176. candidate_backends = []
  177. backend_to_pseudo_backend = {}
  178. for pb in supported_pseudo_backends:
  179. if pb.startswith("paddle"):
  180. backend = "paddle"
  181. elif pb.startswith("tensorrt"):
  182. backend = "tensorrt"
  183. else:
  184. backend = pb
  185. if available_backends is not None and backend not in available_backends:
  186. continue
  187. candidate_backends.append(backend)
  188. backend_to_pseudo_backend[backend] = pb
  189. if not candidate_backends:
  190. return None, "No inference backend can be selected."
  191. if hpi_config.backend is not None:
  192. if hpi_config.backend not in candidate_backends:
  193. return (
  194. None,
  195. f"{repr(hpi_config.backend)} is not a supported inference backend.",
  196. )
  197. suggested_backend = hpi_config.backend
  198. else:
  199. # The first backend is the preferred one.
  200. suggested_backend = candidate_backends[0]
  201. suggested_backend_config = {}
  202. if suggested_backend == "paddle":
  203. pseudo_backend = backend_to_pseudo_backend["paddle"]
  204. assert pseudo_backend in (
  205. "paddle",
  206. "paddle_tensorrt_fp32",
  207. "paddle_tensorrt_fp16",
  208. ), pseudo_backend
  209. if pseudo_backend == "paddle_tensorrt_fp32":
  210. suggested_backend_config.update({"run_mode": "trt_fp32"})
  211. elif pseudo_backend == "paddle_tensorrt_fp16":
  212. # TODO: Check if the target device supports FP16.
  213. suggested_backend_config.update({"run_mode": "trt_fp16"})
  214. elif suggested_backend == "tensorrt":
  215. pseudo_backend = backend_to_pseudo_backend["tensorrt"]
  216. assert pseudo_backend in ("tensorrt", "tensorrt_fp16"), pseudo_backend
  217. if pseudo_backend == "tensorrt_fp16":
  218. suggested_backend_config.update({"precision": "fp16"})
  219. if hpi_config.backend_config is not None:
  220. suggested_backend_config.update(hpi_config.backend_config)
  221. return suggested_backend, suggested_backend_config