hpi.py 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234
  1. # copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import importlib.resources
  15. import json
  16. import platform
  17. from functools import lru_cache
  18. from os import PathLike
  19. from pathlib import Path
  20. from typing import Any, Dict, Final, List, Literal, Optional, Tuple, TypedDict, Union
  21. from pydantic import BaseModel, Field
  22. from typing_extensions import Annotated, TypeAlias
  23. from ...utils.flags import FLAGS_json_format_model
  24. class PaddleInferenceInfo(BaseModel):
  25. trt_dynamic_shapes: Optional[Dict[str, List[List[int]]]] = None
  26. trt_dynamic_shape_input_data: Optional[Dict[str, List[List[float]]]] = None
  27. class TensorRTInfo(BaseModel):
  28. dynamic_shapes: Optional[Dict[str, List[List[int]]]] = None
  29. class InferenceBackendInfoCollection(BaseModel):
  30. paddle_infer: Optional[PaddleInferenceInfo] = None
  31. tensorrt: Optional[TensorRTInfo] = None
  32. # Does using `TypedDict` make things more convenient?
  33. class HPIInfo(BaseModel):
  34. backend_configs: Optional[InferenceBackendInfoCollection] = None
  35. # For multi-backend inference only
  36. InferenceBackend: TypeAlias = Literal[
  37. "paddle", "openvino", "onnxruntime", "tensorrt", "om"
  38. ]
  39. class OpenVINOConfig(BaseModel):
  40. cpu_num_threads: int = 8
  41. class ONNXRuntimeConfig(BaseModel):
  42. cpu_num_threads: int = 8
  43. class TensorRTConfig(BaseModel):
  44. precision: Literal["fp32", "fp16"] = "fp32"
  45. use_dynamic_shapes: bool = True
  46. dynamic_shapes: Optional[Dict[str, List[List[int]]]] = None
  47. # TODO: Control caching behavior
  48. class OMConfig(BaseModel):
  49. pass
  50. class HPIConfig(BaseModel):
  51. pdx_model_name: Annotated[str, Field(alias="model_name")]
  52. device_type: str
  53. device_id: Optional[int] = None
  54. auto_config: bool = True
  55. backend: Optional[InferenceBackend] = None
  56. backend_config: Optional[Dict[str, Any]] = None
  57. hpi_info: Optional[HPIInfo] = None
  58. auto_paddle2onnx: bool = True
  59. # TODO: Add more validation logic here
  60. class ModelInfo(BaseModel):
  61. name: str
  62. hpi_info: Optional[HPIInfo] = None
  63. ModelFormat: TypeAlias = Literal["paddle", "onnx", "om"]
  64. class ModelPaths(TypedDict, total=False):
  65. paddle: Tuple[Path, Path]
  66. onnx: Path
  67. om: Path
  68. def get_model_paths(
  69. model_dir: Union[str, PathLike], model_file_prefix: str
  70. ) -> ModelPaths:
  71. model_dir = Path(model_dir)
  72. model_paths: ModelPaths = {}
  73. pd_model_path = None
  74. if FLAGS_json_format_model:
  75. if (model_dir / f"{model_file_prefix}.json").exists():
  76. pd_model_path = model_dir / f"{model_file_prefix}.json"
  77. else:
  78. if (model_dir / f"{model_file_prefix}.json").exists():
  79. pd_model_path = model_dir / f"{model_file_prefix}.json"
  80. elif (model_dir / f"{model_file_prefix}.pdmodel").exists():
  81. pd_model_path = model_dir / f"{model_file_prefix}.pdmodel"
  82. if pd_model_path and (model_dir / f"{model_file_prefix}.pdiparams").exists():
  83. model_paths["paddle"] = (
  84. pd_model_path,
  85. model_dir / f"{model_file_prefix}.pdiparams",
  86. )
  87. if (model_dir / f"{model_file_prefix}.onnx").exists():
  88. model_paths["onnx"] = model_dir / f"{model_file_prefix}.onnx"
  89. if (model_dir / f"{model_file_prefix}.om").exists():
  90. model_paths["om"] = model_dir / f"{model_file_prefix}.om"
  91. return model_paths
  92. @lru_cache(1)
  93. def _get_hpi_model_info_collection():
  94. with importlib.resources.open_text(
  95. __package__, "hpi_model_info_collection.json", encoding="utf-8"
  96. ) as f:
  97. hpi_model_info_collection = json.load(f)
  98. return hpi_model_info_collection
  99. def suggest_inference_backend_and_config(
  100. hpi_config: HPIConfig,
  101. available_backends: Optional[List[InferenceBackend]] = None,
  102. ) -> Union[Tuple[InferenceBackend, Dict[str, Any]], Tuple[None, str]]:
  103. # TODO: The current strategy is naive. It would be better to consider
  104. # additional important factors, such as NVIDIA GPU compute capability and
  105. # device manufacturers. We should also allow users to provide hints.
  106. import lazy_paddle as paddle
  107. if available_backends is not None and not available_backends:
  108. return None, "No inference backends are available."
  109. paddle_version = paddle.__version__
  110. if paddle_version != "3.0.0-rc0":
  111. return None, f"{repr(paddle_version)} is not a supported Paddle version."
  112. if hpi_config.device_type == "cpu":
  113. uname = platform.uname()
  114. arch = uname.machine.lower()
  115. if arch == "x86_64":
  116. key = "cpu_x64"
  117. else:
  118. return None, f"{repr(arch)} is not a supported architecture."
  119. elif hpi_config.device_type == "gpu":
  120. # FIXME: We should not rely on the PaddlePaddle library to detemine CUDA
  121. # and cuDNN versions.
  122. # Should we inject environment info from the outside?
  123. import lazy_paddle.version
  124. cuda_version = lazy_paddle.version.cuda()
  125. cuda_version = cuda_version.replace(".", "")
  126. cudnn_version = lazy_paddle.version.cudnn().rsplit(".", 1)[0]
  127. cudnn_version = cudnn_version.replace(".", "")
  128. key = f"gpu_cuda{cuda_version}_cudnn{cudnn_version}"
  129. else:
  130. return None, f"{repr(hpi_config.device_type)} is not a supported device type."
  131. hpi_model_info_collection = _get_hpi_model_info_collection()
  132. if key not in hpi_model_info_collection:
  133. return None, "No prior knowledge can be utilized."
  134. hpi_model_info_collection_for_env = hpi_model_info_collection[key]
  135. if hpi_config.pdx_model_name not in hpi_model_info_collection_for_env:
  136. return None, f"{repr(hpi_config.pdx_model_name)} is not a known model."
  137. supported_pseudo_backends = hpi_model_info_collection_for_env[
  138. hpi_config.pdx_model_name
  139. ]
  140. candidate_backends = []
  141. backend_to_pseudo_backend = {}
  142. for pb in supported_pseudo_backends:
  143. if pb.startswith("paddle"):
  144. backend = "paddle"
  145. elif pb.startswith("tensorrt"):
  146. backend = "tensorrt"
  147. else:
  148. backend = pb
  149. if available_backends is not None and backend not in available_backends:
  150. continue
  151. candidate_backends.append(backend)
  152. backend_to_pseudo_backend[backend] = pb
  153. if not candidate_backends:
  154. return None, "No inference backend can be selected."
  155. if hpi_config.backend is not None:
  156. if hpi_config.backend not in candidate_backends:
  157. return (
  158. None,
  159. f"{repr(hpi_config.backend)} is not a supported inference backend.",
  160. )
  161. suggested_backend = hpi_config.backend
  162. else:
  163. # The first backend is the preferred one.
  164. suggested_backend = candidate_backends[0]
  165. suggested_backend_config = {}
  166. if suggested_backend == "paddle":
  167. pseudo_backend = backend_to_pseudo_backend["paddle"]
  168. assert pseudo_backend in (
  169. "paddle",
  170. "paddle_tensorrt_fp32",
  171. "paddle_tensorrt_fp16",
  172. ), pseudo_backend
  173. if pseudo_backend == "paddle_tensorrt_fp32":
  174. suggested_backend_config.update({"run_mode": "trt_fp32"})
  175. elif pseudo_backend == "paddle_tensorrt_fp16":
  176. # TODO: Check if the target device supports FP16.
  177. suggested_backend_config.update({"run_mode": "trt_fp16"})
  178. elif suggested_backend == "tensorrt":
  179. pseudo_backend = backend_to_pseudo_backend["tensorrt"]
  180. assert pseudo_backend in ("tensorrt", "tensorrt_fp16"), pseudo_backend
  181. if pseudo_backend == "tensorrt_fp16":
  182. suggested_backend_config.update({"precision": "fp16"})
  183. if hpi_config.backend_config is not None:
  184. suggested_backend_config.update(hpi_config.backend_config)
  185. return suggested_backend, suggested_backend_config