hpi.py 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245
  1. # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import ctypes.util
  15. import importlib.resources
  16. import importlib.util
  17. import json
  18. import platform
  19. from functools import lru_cache
  20. from typing import Any, Dict, List, Literal, Optional, Tuple, Union
  21. from pydantic import BaseModel, Field
  22. from typing_extensions import Annotated, TypeAlias
  23. from ...utils.deps import function_requires_deps, is_paddle2onnx_plugin_available
  24. from ...utils.env import get_cuda_version, get_cudnn_version, get_paddle_version
  25. from ...utils.flags import USE_PIR_TRT
  26. from .model_paths import ModelPaths
  27. class PaddleInferenceInfo(BaseModel):
  28. trt_dynamic_shapes: Optional[Dict[str, List[List[int]]]] = None
  29. trt_dynamic_shape_input_data: Optional[Dict[str, List[List[float]]]] = None
  30. class TensorRTInfo(BaseModel):
  31. dynamic_shapes: Optional[Dict[str, List[List[int]]]] = None
  32. class InferenceBackendInfoCollection(BaseModel):
  33. paddle_infer: Optional[PaddleInferenceInfo] = None
  34. tensorrt: Optional[TensorRTInfo] = None
  35. # Does using `TypedDict` make things more convenient?
  36. class HPIInfo(BaseModel):
  37. backend_configs: Optional[InferenceBackendInfoCollection] = None
  38. # For multi-backend inference only
  39. InferenceBackend: TypeAlias = Literal[
  40. "paddle", "openvino", "onnxruntime", "tensorrt", "om"
  41. ]
  42. class OpenVINOConfig(BaseModel):
  43. cpu_num_threads: int = 8
  44. class ONNXRuntimeConfig(BaseModel):
  45. cpu_num_threads: int = 8
  46. class TensorRTConfig(BaseModel):
  47. precision: Literal["fp32", "fp16"] = "fp32"
  48. use_dynamic_shapes: bool = True
  49. dynamic_shapes: Optional[Dict[str, List[List[int]]]] = None
  50. # TODO: Control caching behavior
  51. class OMConfig(BaseModel):
  52. pass
  53. class HPIConfig(BaseModel):
  54. pdx_model_name: Annotated[str, Field(alias="model_name")]
  55. device_type: str
  56. device_id: Optional[int] = None
  57. auto_config: bool = True
  58. backend: Optional[InferenceBackend] = None
  59. backend_config: Optional[Dict[str, Any]] = None
  60. hpi_info: Optional[HPIInfo] = None
  61. auto_paddle2onnx: bool = True
  62. # TODO: Add more validation logic here
  63. class ModelInfo(BaseModel):
  64. name: str
  65. hpi_info: Optional[HPIInfo] = None
  66. ModelFormat: TypeAlias = Literal["paddle", "onnx", "om"]
  67. @lru_cache(1)
  68. def _get_hpi_model_info_collection():
  69. with importlib.resources.open_text(
  70. __package__, "hpi_model_info_collection.json", encoding="utf-8"
  71. ) as f:
  72. hpi_model_info_collection = json.load(f)
  73. return hpi_model_info_collection
  74. @function_requires_deps("ultra-infer")
  75. def suggest_inference_backend_and_config(
  76. hpi_config: HPIConfig,
  77. model_paths: ModelPaths,
  78. ) -> Union[Tuple[InferenceBackend, Dict[str, Any]], Tuple[None, str]]:
  79. # TODO: The current strategy is naive. It would be better to consider
  80. # additional important factors, such as NVIDIA GPU compute capability and
  81. # device manufacturers. We should also allow users to provide hints.
  82. from ultra_infer import (
  83. is_built_with_om,
  84. is_built_with_openvino,
  85. is_built_with_ort,
  86. is_built_with_trt,
  87. )
  88. is_onnx_model_available = "onnx" in model_paths
  89. # TODO: Give a warning if the Paddle2ONNX plugin is not available but
  90. # can be used to select a better backend.
  91. if hpi_config.auto_paddle2onnx and is_paddle2onnx_plugin_available():
  92. is_onnx_model_available = is_onnx_model_available or "paddle" in model_paths
  93. available_backends = []
  94. if "paddle" in model_paths:
  95. available_backends.append("paddle")
  96. if is_built_with_openvino() and is_onnx_model_available:
  97. available_backends.append("openvino")
  98. if is_built_with_ort() and is_onnx_model_available:
  99. available_backends.append("onnxruntime")
  100. if is_built_with_trt() and is_onnx_model_available:
  101. available_backends.append("tensorrt")
  102. if is_built_with_om() and "om" in model_paths:
  103. available_backends.append("om")
  104. if not available_backends:
  105. return None, "No inference backends are available."
  106. if hpi_config.backend is not None and hpi_config.backend not in available_backends:
  107. return None, f"Inference backend {repr(hpi_config.backend)} is unavailable."
  108. paddle_version = get_paddle_version()
  109. if paddle_version != (3, 0, 0, None):
  110. return (
  111. None,
  112. f"{paddle_version} is not a supported Paddle version.",
  113. )
  114. if hpi_config.device_type == "cpu":
  115. uname = platform.uname()
  116. arch = uname.machine.lower()
  117. if arch == "x86_64":
  118. key = "cpu_x64"
  119. else:
  120. return None, f"{repr(arch)} is not a supported architecture."
  121. elif hpi_config.device_type == "gpu":
  122. cuda_version = get_cuda_version()
  123. cuda_version = "".join(map(str, cuda_version))
  124. cudnn_version = get_cudnn_version()
  125. cudnn_version = "".join(map(str, cudnn_version[:-1]))
  126. key = f"gpu_cuda{cuda_version}_cudnn{cudnn_version}"
  127. else:
  128. return None, f"{repr(hpi_config.device_type)} is not a supported device type."
  129. hpi_model_info_collection = _get_hpi_model_info_collection()
  130. if key not in hpi_model_info_collection:
  131. return None, "No prior knowledge can be utilized."
  132. hpi_model_info_collection_for_env = hpi_model_info_collection[key]
  133. if hpi_config.pdx_model_name not in hpi_model_info_collection_for_env:
  134. return None, f"{repr(hpi_config.pdx_model_name)} is not a known model."
  135. supported_pseudo_backends = hpi_model_info_collection_for_env[
  136. hpi_config.pdx_model_name
  137. ].copy()
  138. # XXX
  139. if not (
  140. USE_PIR_TRT
  141. and importlib.util.find_spec("tensorrt")
  142. and ctypes.util.find_library("nvinfer")
  143. ):
  144. if (
  145. "paddle_tensorrt" in supported_pseudo_backends
  146. or "paddle_tensorrt_fp16" in supported_pseudo_backends
  147. ):
  148. supported_pseudo_backends.append("paddle")
  149. if "paddle_tensorrt" in supported_pseudo_backends:
  150. supported_pseudo_backends.remove("paddle_tensorrt")
  151. if "paddle_tensorrt_fp16" in supported_pseudo_backends:
  152. supported_pseudo_backends.remove("paddle_tensorrt_fp16")
  153. candidate_backends = []
  154. backend_to_pseudo_backend = {}
  155. for pb in supported_pseudo_backends:
  156. if pb.startswith("paddle"):
  157. backend = "paddle"
  158. elif pb.startswith("tensorrt"):
  159. backend = "tensorrt"
  160. else:
  161. backend = pb
  162. if available_backends is not None and backend not in available_backends:
  163. continue
  164. candidate_backends.append(backend)
  165. backend_to_pseudo_backend[backend] = pb
  166. if not candidate_backends:
  167. return None, "No inference backend can be selected."
  168. if hpi_config.backend is not None:
  169. if hpi_config.backend not in candidate_backends:
  170. return (
  171. None,
  172. f"{repr(hpi_config.backend)} is not a supported inference backend.",
  173. )
  174. suggested_backend = hpi_config.backend
  175. else:
  176. # The first backend is the preferred one.
  177. suggested_backend = candidate_backends[0]
  178. suggested_backend_config = {}
  179. if suggested_backend == "paddle":
  180. pseudo_backend = backend_to_pseudo_backend["paddle"]
  181. assert pseudo_backend in (
  182. "paddle",
  183. "paddle_tensorrt",
  184. "paddle_tensorrt_fp16",
  185. ), pseudo_backend
  186. if pseudo_backend == "paddle_tensorrt":
  187. suggested_backend_config.update({"run_mode": "trt_fp32"})
  188. elif pseudo_backend == "paddle_tensorrt_fp16":
  189. # TODO: Check if the target device supports FP16.
  190. suggested_backend_config.update({"run_mode": "trt_fp16"})
  191. elif suggested_backend == "tensorrt":
  192. pseudo_backend = backend_to_pseudo_backend["tensorrt"]
  193. assert pseudo_backend in ("tensorrt", "tensorrt_fp16"), pseudo_backend
  194. if pseudo_backend == "tensorrt_fp16":
  195. suggested_backend_config.update({"precision": "fp16"})
  196. if hpi_config.backend_config is not None:
  197. suggested_backend_config.update(hpi_config.backend_config)
  198. return suggested_backend, suggested_backend_config