runtime.py 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710
  1. # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from __future__ import absolute_import
  15. import logging
  16. import numpy as np
  17. from . import ModelFormat
  18. from . import c_lib_wrap as C
  19. class Runtime:
  20. """UltraInfer Runtime object."""
  21. def __init__(self, runtime_option):
  22. """Initialize a UltraInfer Runtime object.
  23. :param runtime_option: (ultra_infer.RuntimeOption)Options for UltraInfer Runtime
  24. """
  25. self._runtime = C.Runtime()
  26. self.runtime_option = runtime_option
  27. assert self._runtime.init(
  28. self.runtime_option._option
  29. ), "Initialize Runtime Failed!"
  30. def forward(self, *inputs):
  31. """[Only for Poros backend] Inference with input data for poros
  32. :param data: (list[str : numpy.ndarray])The input data list
  33. :return list of numpy.ndarray
  34. """
  35. if self.runtime_option._option.model_format != ModelFormat.TORCHSCRIPT:
  36. raise Exception(
  37. "The forward function is only used for Poros backend, please call infer function"
  38. )
  39. inputs_dict = dict()
  40. for i in range(len(inputs)):
  41. inputs_dict["x" + str(i)] = inputs[i]
  42. return self.infer(inputs_dict)
  43. def infer(self, data):
  44. """Inference with input data.
  45. :param data: (dict[str : numpy.ndarray])The input data dict, key value must keep same with the loaded model
  46. :return list of numpy.ndarray
  47. """
  48. assert isinstance(data, dict) or isinstance(
  49. data, list
  50. ), "The input data should be type of dict or list."
  51. if isinstance(data, dict):
  52. for k, v in data.items():
  53. if isinstance(v, np.ndarray) and not v.data.contiguous:
  54. data[k] = np.ascontiguousarray(data[k])
  55. return self._runtime.infer(data)
  56. def bind_input_tensor(self, name, fdtensor):
  57. """Bind FDTensor by name, no copy and share input memory
  58. :param name: (str)The name of input data.
  59. :param fdtensor: (ultra_infer.FDTensor)The input FDTensor.
  60. """
  61. self._runtime.bind_input_tensor(name, fdtensor)
  62. def bind_output_tensor(self, name, fdtensor):
  63. """Bind FDTensor by name, no copy and share output memory
  64. :param name: (str)The name of output data.
  65. :param fdtensor: (ultra_infer.FDTensor)The output FDTensor.
  66. """
  67. self._runtime.bind_output_tensor(name, fdtensor)
  68. def zero_copy_infer(self):
  69. """No params inference the model.
  70. the input and output data need to pass through the bind_input_tensor and get_output_tensor interfaces.
  71. """
  72. self._runtime.infer()
  73. def get_output_tensor(self, name):
  74. """Get output FDTensor by name, no copy and share backend output memory
  75. :param name: (str)The name of output data.
  76. :return ultra_infer.FDTensor
  77. """
  78. return self._runtime.get_output_tensor(name)
  79. def compile(self, warm_datas):
  80. """[Only for Poros backend] compile with prewarm data for poros
  81. :param data: (list[str : numpy.ndarray])The prewarm data list
  82. :return TorchScript Model
  83. """
  84. if self.runtime_option._option.model_format != ModelFormat.TORCHSCRIPT:
  85. raise Exception(
  86. "The compile function is only used for Poros backend, please call infer function"
  87. )
  88. assert isinstance(warm_datas, list), "The prewarm data should be type of list."
  89. for i in range(len(warm_datas)):
  90. warm_data = warm_datas[i]
  91. if isinstance(warm_data[0], np.ndarray):
  92. warm_data = list(data for data in warm_data)
  93. else:
  94. warm_data = list(data.numpy() for data in warm_data)
  95. warm_datas[i] = warm_data
  96. return self._runtime.compile(warm_datas, self.runtime_option._option)
  97. def num_inputs(self):
  98. """Get number of inputs of the loaded model."""
  99. return self._runtime.num_inputs()
  100. def num_outputs(self):
  101. """Get number of outputs of the loaded model."""
  102. return self._runtime.num_outputs()
  103. def get_input_info(self, index):
  104. """Get input information of the loaded model.
  105. :param index: (int)Index of the input
  106. :return ultra_infer.TensorInfo
  107. """
  108. assert isinstance(
  109. index, int
  110. ), "The input parameter index should be type of int."
  111. assert (
  112. index < self.num_inputs()
  113. ), "The input parameter index:{} should less than number of inputs:{}.".format(
  114. index, self.num_inputs
  115. )
  116. return self._runtime.get_input_info(index)
  117. def get_output_info(self, index):
  118. """Get output information of the loaded model.
  119. :param index: (int)Index of the output
  120. :return ultra_infer.TensorInfo
  121. """
  122. assert isinstance(
  123. index, int
  124. ), "The input parameter index should be type of int."
  125. assert (
  126. index < self.num_outputs()
  127. ), "The input parameter index:{} should less than number of outputs:{}.".format(
  128. index, self.num_outputs
  129. )
  130. return self._runtime.get_output_info(index)
  131. def get_profile_time(self):
  132. """Get profile time of Runtime after the profile process is done."""
  133. return self._runtime.get_profile_time()
  134. class RuntimeOption:
  135. """Options for UltraInfer Runtime."""
  136. __slots__ = ["_option"]
  137. def __init__(self):
  138. """Initialize a UltraInfer RuntimeOption object."""
  139. self._option = C.RuntimeOption()
  140. def set_model_path(
  141. self, model_path, params_path="", model_format=ModelFormat.PADDLE
  142. ):
  143. """Set path of model file and parameters file
  144. :param model_path: (str)Path of model file
  145. :param params_path: (str)Path of parameters file
  146. :param model_format: (ModelFormat)Format of model, support ModelFormat.PADDLE/ModelFormat.ONNX/ModelFormat.TORCHSCRIPT
  147. """
  148. return self._option.set_model_path(model_path, params_path, model_format)
  149. def set_model_buffer(
  150. self, model_buffer, params_buffer="", model_format=ModelFormat.PADDLE
  151. ):
  152. """Specify the memory buffer of model and parameter. Used when model and params are loaded directly from memory
  153. :param model_buffer: (bytes)The memory buffer of model
  154. :param params_buffer: (bytes)The memory buffer of the parameters
  155. :param model_format: (ModelFormat)Format of model, support ModelFormat.PADDLE/ModelFormat.ONNX/ModelFormat.TORCHSCRIPT
  156. """
  157. return self._option.set_model_buffer(model_buffer, params_buffer, model_format)
  158. def use_gpu(self, device_id=0):
  159. """Inference with Nvidia GPU
  160. :param device_id: (int)The index of GPU will be used for inference, default 0
  161. """
  162. if not C.is_built_with_gpu():
  163. logging.warning(
  164. "The installed ultra_infer-python package is not built with GPU, will force to use CPU. To use GPU, following the commands to install ultra_infer-gpu-python."
  165. )
  166. return
  167. return self._option.use_gpu(device_id)
  168. def use_kunlunxin(
  169. self,
  170. device_id=0,
  171. l3_workspace_size=16 * 1024 * 1024,
  172. locked=False,
  173. autotune=True,
  174. autotune_file="",
  175. precision="int16",
  176. adaptive_seqlen=False,
  177. enable_multi_stream=False,
  178. gm_default_size=0,
  179. ):
  180. """Inference with KunlunXin XPU
  181. :param device_id: (int)The index of KunlunXin XPU will be used for inference, default 0
  182. :param l3_workspace_size: (int)The size of the video memory allocated by the l3 cache, the maximum is 16M, default 16M
  183. :param locked: (bool)Whether the allocated L3 cache can be locked. If false, it means that the L3 cache is not locked,
  184. and the allocated L3 cache can be shared by multiple models, and multiple models
  185. :param autotune: (bool)Whether to autotune the conv operator in the model.
  186. If true, when the conv operator of a certain dimension is executed for the first time,
  187. it will automatically search for a better algorithm to improve the performance of subsequent conv operators of the same dimension.
  188. :param autotune_file: (str)Specify the path of the autotune file. If autotune_file is specified,
  189. the algorithm specified in the file will be used and autotune will not be performed again.
  190. :param precision: (str)Calculation accuracy of multi_encoder
  191. :param adaptive_seqlen: (bool)adaptive_seqlen Is the input of multi_encoder variable length
  192. :param enable_multi_stream: (bool)Whether to enable the multi stream of KunlunXin XPU.
  193. :param gm_default_size The default size of context global memory of KunlunXin XPU.
  194. """
  195. return self._option.use_kunlunxin(
  196. device_id,
  197. l3_workspace_size,
  198. locked,
  199. autotune,
  200. autotune_file,
  201. precision,
  202. adaptive_seqlen,
  203. enable_multi_stream,
  204. gm_default_size,
  205. )
  206. def use_cpu(self):
  207. """Inference with CPU"""
  208. return self._option.use_cpu()
  209. def use_rknpu2(
  210. self, rknpu2_name=C.CpuName.RK356X, rknpu2_core=C.CoreMask.RKNN_NPU_CORE_AUTO
  211. ):
  212. return self._option.use_rknpu2(rknpu2_name, rknpu2_core)
  213. def use_sophgo(self):
  214. """Inference with SOPHGO TPU"""
  215. return self._option.use_sophgo()
  216. def use_ascend(self, device_id=0):
  217. """Inference with Huawei Ascend NPU"""
  218. return self._option.use_ascend(device_id)
  219. def disable_valid_backend_check(self):
  220. """Disable checking validity of backend during inference"""
  221. return self._option.disable_valid_backend_check()
  222. def enable_valid_backend_check(self):
  223. """Enable checking validity of backend during inference"""
  224. return self._option.enable_valid_backend_check()
  225. def set_cpu_thread_num(self, thread_num=-1):
  226. """Set number of threads if inference with CPU
  227. :param thread_num: (int)Number of threads, if not positive, means the number of threads is decided by the backend, default -1
  228. """
  229. return self._option.set_cpu_thread_num(thread_num)
  230. def set_ort_graph_opt_level(self, level=-1):
  231. """Set graph optimization level for ONNX Runtime backend
  232. :param level: (int)Optimization level, -1 means the default setting
  233. """
  234. logging.warning(
  235. "`RuntimeOption.set_ort_graph_opt_level` will be deprecated in v1.2.0, please use `RuntimeOption.graph_optimize_level = 99` instead."
  236. )
  237. self._option.ort_option.graph_optimize_level = level
  238. def use_paddle_backend(self):
  239. """Use Paddle Inference backend, support inference Paddle model on CPU/Nvidia GPU."""
  240. return self._option.use_paddle_backend()
  241. def use_paddle_infer_backend(self):
  242. """Wrapper function of use_paddle_backend(), use Paddle Inference backend, support inference Paddle model on CPU/Nvidia GPU."""
  243. return self.use_paddle_backend()
  244. def use_poros_backend(self):
  245. """Use Poros backend, support inference TorchScript model on CPU/Nvidia GPU."""
  246. return self._option.use_poros_backend()
  247. def use_ort_backend(self):
  248. """Use ONNX Runtime backend, support inference Paddle/ONNX model on CPU/Nvidia GPU."""
  249. return self._option.use_ort_backend()
  250. def use_tvm_backend(self):
  251. """Use TVM Runtime backend, support inference TVM model on CPU."""
  252. return self._option.use_tvm_backend()
  253. def use_trt_backend(self):
  254. """Use TensorRT backend, support inference Paddle/ONNX model on Nvidia GPU."""
  255. return self._option.use_trt_backend()
  256. def use_openvino_backend(self):
  257. """Use OpenVINO backend, support inference Paddle/ONNX model on CPU."""
  258. return self._option.use_openvino_backend()
  259. def use_lite_backend(self):
  260. """Use Paddle Lite backend, support inference Paddle model on ARM CPU."""
  261. return self._option.use_lite_backend()
  262. def use_paddle_lite_backend(self):
  263. """Wrapper function of use_lite_backend(), use Paddle Lite backend, support inference Paddle model on ARM CPU."""
  264. return self.use_lite_backend()
  265. def use_om_backend(self):
  266. """Use Om backend, support inference Om model on NPU"""
  267. return self._option.use_om_backend()
  268. def set_lite_context_properties(self, context_properties):
  269. """Set nnadapter context properties for Paddle Lite backend."""
  270. logging.warning(
  271. "`RuntimeOption.set_lite_context_properties` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_lite_option.nnadapter_context_properties = ...` instead."
  272. )
  273. self._option.paddle_lite_option.nnadapter_context_properties = (
  274. context_properties
  275. )
  276. def set_lite_model_cache_dir(self, model_cache_dir):
  277. """Set nnadapter model cache dir for Paddle Lite backend."""
  278. logging.warning(
  279. "`RuntimeOption.set_lite_model_cache_dir` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_lite_option.nnadapter_model_cache_dir = ...` instead."
  280. )
  281. self._option.paddle_lite_option.nnadapter_model_cache_dir = model_cache_dir
  282. def set_lite_dynamic_shape_info(self, dynamic_shape_info):
  283. """Set nnadapter dynamic shape info for Paddle Lite backend."""
  284. logging.warning(
  285. "`RuntimeOption.set_lite_dynamic_shape_info` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_lite_option.nnadapter_dynamic_shape_info = ...` instead."
  286. )
  287. self._option.paddle_lite_option.nnadapter_dynamic_shape_info = (
  288. dynamic_shape_info
  289. )
  290. def set_lite_subgraph_partition_path(self, subgraph_partition_path):
  291. """Set nnadapter subgraph partition path for Paddle Lite backend."""
  292. logging.warning(
  293. "`RuntimeOption.set_lite_subgraph_partition_path` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_lite_option.nnadapter_subgraph_partition_config_path = ...` instead."
  294. )
  295. self._option.paddle_lite_option.nnadapter_subgraph_partition_config_path = (
  296. subgraph_partition_path
  297. )
  298. def set_lite_subgraph_partition_config_buffer(self, subgraph_partition_buffer):
  299. """Set nnadapter subgraph partition buffer for Paddle Lite backend."""
  300. logging.warning(
  301. "`RuntimeOption.set_lite_subgraph_partition_buffer` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_lite_option.nnadapter_subgraph_partition_config_buffer = ...` instead."
  302. )
  303. self._option.paddle_lite_option.nnadapter_subgraph_partition_config_buffer = (
  304. subgraph_partition_buffer
  305. )
  306. def set_lite_mixed_precision_quantization_config_path(
  307. self, mixed_precision_quantization_config_path
  308. ):
  309. """Set nnadapter mixed precision quantization config path for Paddle Lite backend.."""
  310. logging.warning(
  311. "`RuntimeOption.set_lite_mixed_precision_quantization_config_path` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_lite_option.nnadapter_mixed_precision_quantization_config_path = ...` instead."
  312. )
  313. self._option.paddle_lite_option.nnadapter_mixed_precision_quantization_config_path = (
  314. mixed_precision_quantization_config_path
  315. )
  316. def set_paddle_mkldnn(self, use_mkldnn=True):
  317. """Enable/Disable MKLDNN while using Paddle Inference backend, mkldnn is enabled by default."""
  318. logging.warning(
  319. "`RuntimeOption.set_paddle_mkldnn` will be derepcated in v1.2.0, please use `RuntimeOption.paddle_infer_option.enable_mkldnn = True` instead."
  320. )
  321. self._option.paddle_infer_option.enable_mkldnn = True
  322. def set_openvino_device(self, name="CPU"):
  323. """Set device name for OpenVINO, default 'CPU', can also be 'AUTO', 'GPU', 'GPU.1'....
  324. This interface is deprecated, please use `RuntimeOption.openvino_option.set_device` instead.
  325. """
  326. logging.warning(
  327. "`RuntimeOption.set_openvino_device` will be deprecated in v1.2.0, please use `RuntimeOption.openvino_option.set_device` instead."
  328. )
  329. self._option.openvino_option.set_device(name)
  330. def set_openvino_shape_info(self, shape_info):
  331. """Set shape information of the models' inputs, used for GPU to fix the shape
  332. This interface is deprecated, please use `RuntimeOption.openvino_option.set_shape_info` instead.
  333. :param shape_info: (dict{str, list of int})Shape information of model's inputs, e.g {"image": [1, 3, 640, 640], "scale_factor": [1, 2]}
  334. """
  335. logging.warning(
  336. "`RuntimeOption.set_openvino_shape_info` will be deprecated in v1.2.0, please use `RuntimeOption.openvino_option.set_shape_info` instead."
  337. )
  338. self._option.openvino_option.set_shape_info(shape_info)
  339. def set_openvino_cpu_operators(self, operators):
  340. """While using OpenVINO backend and intel GPU, this interface specifies unsupported operators to run on CPU
  341. This interface is deprecated, please use `RuntimeOption.openvino_option.set_cpu_operators` instead.
  342. :param operators: (list of string)list of operators' name, e.g ["MulticlasNms"]
  343. """
  344. logging.warning(
  345. "`RuntimeOption.set_openvino_cpu_operators` will be deprecated in v1.2.0, please use `RuntimeOption.openvino_option.set_cpu_operators` instead."
  346. )
  347. self._option.openvino_option.set_cpu_operators(operators)
  348. def enable_paddle_log_info(self):
  349. """Enable print out the debug log information while using Paddle Inference backend, the log information is disabled by default."""
  350. logging.warning(
  351. "RuntimeOption.enable_paddle_log_info` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_infer_option.enable_log_info = True` instead."
  352. )
  353. self._option.paddle_infer_option.enable_log_info = True
  354. def disable_paddle_log_info(self):
  355. """Disable print out the debug log information while using Paddle Inference backend, the log information is disabled by default."""
  356. logging.warning(
  357. "RuntimeOption.disable_paddle_log_info` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_infer_option.enable_log_info = False` instead."
  358. )
  359. self._option.paddle_infer_option.enable_log_info = False
  360. def set_paddle_mkldnn_cache_size(self, cache_size):
  361. """Set size of shape cache while using Paddle Inference backend with MKLDNN enabled, default will cache all the dynamic shape."""
  362. logging.warning(
  363. "RuntimeOption.set_paddle_mkldnn_cache_size` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_infer_option.mkldnn_cache_size = {}` instead.".format(
  364. cache_size
  365. )
  366. )
  367. self._option.paddle_infer_option.mkldnn_cache_size = cache_size
  368. def enable_lite_fp16(self):
  369. """Enable half precision inference while using Paddle Lite backend on ARM CPU, fp16 is disabled by default."""
  370. logging.warning(
  371. "`RuntimeOption.enable_lite_fp16` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_lite_option.enable_fp16 = True` instead."
  372. )
  373. self._option.paddle_lite_option.enable_fp16 = True
  374. def disable_lite_fp16(self):
  375. """Disable half precision inference while using Paddle Lite backend on ARM CPU, fp16 is disabled by default."""
  376. logging.warning(
  377. "`RuntimeOption.disable_lite_fp16` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_lite_option.enable_fp16 = False` instead."
  378. )
  379. self._option.paddle_lite_option.enable_fp16 = False
  380. def set_lite_power_mode(self, mode):
  381. """Set POWER mode while using Paddle Lite backend on ARM CPU."""
  382. logging.warning(
  383. "`RuntimeOption.set_lite_powermode` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_lite_option.power_mode = {}` instead.".format(
  384. mode
  385. )
  386. )
  387. self._option.paddle_lite_option.power_mode = mode
  388. def set_trt_input_shape(
  389. self, tensor_name, min_shape, opt_shape=None, max_shape=None
  390. ):
  391. """Set shape range information while using TensorRT backend with loadding a model contains dynamic input shape. While inference with a new input shape out of the set shape range, the tensorrt engine will be rebuilt to expand the shape range information.
  392. :param tensor_name: (str)Name of input which has dynamic shape
  393. :param min_shape: (list of int)Minimum shape of the input, e.g [1, 3, 224, 224]
  394. :param opt_shape: (list of int)Optimize shape of the input, this often set as the most common input shape, if set to None, it will keep same with min_shape
  395. :param max_shape: (list of int)Maximum shape of the input, e.g [8, 3, 224, 224], if set to None, it will keep same with the min_shape
  396. """
  397. logging.warning(
  398. "`RuntimeOption.set_trt_input_shape` will be deprecated in v1.2.0, please use `RuntimeOption.trt_option.set_shape()` instead."
  399. )
  400. if opt_shape is None and max_shape is None:
  401. opt_shape = min_shape
  402. max_shape = min_shape
  403. else:
  404. assert (
  405. opt_shape is not None and max_shape is not None
  406. ), "Set min_shape only, or set min_shape, opt_shape, max_shape both."
  407. return self._option.trt_option.set_shape(
  408. tensor_name, min_shape, opt_shape, max_shape
  409. )
  410. def set_trt_input_data(
  411. self, tensor_name, min_input_data, opt_input_data=None, max_input_data=None
  412. ):
  413. """Set input data while using TensorRT backend with loadding a model contains dynamic input shape.
  414. :param tensor_name: (str)Name of input which has dynamic shape
  415. :param min_input_data: (list of int)Input data for Minimum shape of the input.
  416. :param opt_input_data: (list of int)Input data for Optimize shape of the input, if set to None, it will keep same with min_input_data
  417. :param max_input_data: (list of int)Input data for Maximum shape of the input, if set to None, it will keep same with the min_input_data
  418. """
  419. logging.warning(
  420. "`RuntimeOption.set_trt_input_data` will be deprecated in v1.2.0, please use `RuntimeOption.trt_option.set_input_data()` instead."
  421. )
  422. if opt_input_data is None and max_input_data is None:
  423. opt_input_data = min_input_data
  424. opt_input_data = min_input_data
  425. else:
  426. assert (
  427. opt_input_data is not None and max_input_data is not None
  428. ), "Set min_input_data only, or set min_input_data, opt_input_data, max_input_data both."
  429. return self._option.trt_option.set_input_data(
  430. tensor_name, min_input_data, opt_input_data, max_input_data
  431. )
  432. def set_trt_cache_file(self, cache_file_path):
  433. """Set a cache file path while using TensorRT backend. While loading a Paddle/ONNX model with set_trt_cache_file("./tensorrt_cache/model.trt"), if file `./tensorrt_cache/model.trt` exists, it will skip building tensorrt engine and load the cache file directly; if file `./tensorrt_cache/model.trt` doesn't exist, it will building tensorrt engine and save the engine as binary string to the cache file.
  434. :param cache_file_path: (str)Path of tensorrt cache file
  435. """
  436. logging.warning(
  437. "`RuntimeOption.set_trt_cache_file` will be deprecated in v1.2.0, please use `RuntimeOption.trt_option.serialize_file = {}` instead.".format(
  438. cache_file_path
  439. )
  440. )
  441. self._option.trt_option.serialize_file = cache_file_path
  442. def enable_trt_fp16(self):
  443. """Enable half precision inference while using TensorRT backend, notice that not all the Nvidia GPU support FP16, in those cases, will fallback to FP32 inference."""
  444. logging.warning(
  445. "`RuntimeOption.enable_trt_fp16` will be deprecated in v1.2.0, please use `RuntimeOption.trt_option.enable_fp16 = True` instead."
  446. )
  447. self._option.trt_option.enable_fp16 = True
  448. def disable_trt_fp16(self):
  449. """Disable half precision inference while suing TensorRT backend."""
  450. logging.warning(
  451. "`RuntimeOption.disable_trt_fp16` will be deprecated in v1.2.0, please use `RuntimeOption.trt_option.enable_fp16 = False` instead."
  452. )
  453. self._option.trt_option.enable_fp16 = False
  454. def enable_pinned_memory(self):
  455. """Enable pinned memory. Pinned memory can be utilized to speedup the data transfer between CPU and GPU. Currently it's only supported in TRT backend and Paddle Inference backend."""
  456. return self._option.enable_pinned_memory()
  457. def disable_pinned_memory(self):
  458. """Disable pinned memory."""
  459. return self._option.disable_pinned_memory()
  460. def enable_paddle_to_trt(self):
  461. """While using TensorRT backend, enable_paddle_to_trt() will change to use Paddle Inference backend, and use its integrated TensorRT instead."""
  462. logging.warning(
  463. "`RuntimeOption.enable_paddle_to_trt` will be deprecated in v1.2.l0, if you want to run tensorrt with Paddle Inference backend, please use the following method, "
  464. )
  465. logging.warning(" ==============================================")
  466. logging.warning(" import ultra_infer as fd")
  467. logging.warning(" option = fd.RuntimeOption()")
  468. logging.warning(" option.use_gpu(0)")
  469. logging.warning(" option.use_paddle_infer_backend()")
  470. logging.warning(" option.paddle_infer_option.enable_trt = True")
  471. logging.warning(" ==============================================")
  472. self._option.use_paddle_backend()
  473. self._option.paddle_infer_option.enable_trt = True
  474. def set_trt_max_workspace_size(self, trt_max_workspace_size):
  475. """Set max workspace size while using TensorRT backend."""
  476. logging.warning(
  477. "`RuntimeOption.set_trt_max_workspace_size` will be deprecated in v1.2.0, please use `RuntimeOption.trt_option.max_workspace_size = {}` instead.".format(
  478. trt_max_workspace_size
  479. )
  480. )
  481. self._option.trt_option.max_workspace_size = trt_max_workspace_size
  482. def set_trt_max_batch_size(self, trt_max_batch_size):
  483. """Set max batch size while using TensorRT backend."""
  484. logging.warning(
  485. "`RuntimeOption.set_trt_max_batch_size` will be deprecated in v1.2.0, please use `RuntimeOption.trt_option.max_batch_size = {}` instead.".format(
  486. trt_max_batch_size
  487. )
  488. )
  489. self._option.trt_option.max_batch_size = trt_max_batch_size
  490. def enable_paddle_trt_collect_shape(self):
  491. """Enable collect subgraph shape information while using Paddle Inference with TensorRT"""
  492. logging.warning(
  493. "`RuntimeOption.enable_paddle_trt_collect_shape` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_infer_option.collect_trt_shape = True` instead."
  494. )
  495. self._option.paddle_infer_option.collect_trt_shape = True
  496. def disable_paddle_trt_collect_shape(self):
  497. """Disable collect subgraph shape information while using Paddle Inference with TensorRT"""
  498. logging.warning(
  499. "`RuntimeOption.disable_paddle_trt_collect_shape` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_infer_option.collect_trt_shape = False` instead."
  500. )
  501. self._option.paddle_infer_option.collect_trt_shape = False
  502. def delete_paddle_backend_pass(self, pass_name):
  503. """Delete pass by name in paddle backend"""
  504. logging.warning(
  505. "`RuntimeOption.delete_paddle_backend_pass` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_infer_option.delete_pass` instead."
  506. )
  507. self._option.paddle_infer_option.delete_pass(pass_name)
  508. def disable_paddle_trt_ops(self, ops):
  509. """Disable some ops in paddle trt backend"""
  510. logging.warning(
  511. "`RuntimeOption.disable_paddle_trt_ops` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_infer_option.disable_trt_ops()` instead."
  512. )
  513. self._option.disable_trt_ops(ops)
  514. def use_ipu(
  515. self,
  516. device_num=1,
  517. micro_batch_size=1,
  518. enable_pipelining=False,
  519. batches_per_step=1,
  520. ):
  521. return self._option.use_ipu(
  522. device_num, micro_batch_size, enable_pipelining, batches_per_step
  523. )
  524. def set_ipu_config(
  525. self,
  526. enable_fp16=False,
  527. replica_num=1,
  528. available_memory_proportion=1.0,
  529. enable_half_partial=False,
  530. ):
  531. logging.warning(
  532. "`RuntimeOption.set_ipu_config` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_infer_option.set_ipu_config()` instead."
  533. )
  534. self._option.paddle_infer_option.set_ipu_config(
  535. enable_fp16, replica_num, available_memory_proportion, enable_half_partial
  536. )
  537. @property
  538. def poros_option(self):
  539. """Get PorosBackendOption object to configure Poros backend
  540. :return PorosBackendOption
  541. """
  542. return self._option.poros_option
  543. @property
  544. def paddle_lite_option(self):
  545. """Get LiteBackendOption object to configure Paddle Lite backend
  546. :return LiteBackendOption
  547. """
  548. return self._option.paddle_lite_option
  549. @property
  550. def openvino_option(self):
  551. """Get OpenVINOOption object to configure OpenVINO backend
  552. :return OpenVINOOption
  553. """
  554. return self._option.openvino_option
  555. @property
  556. def ort_option(self):
  557. """Get OrtBackendOption object to configure ONNX Runtime backend
  558. :return OrtBackendOption
  559. """
  560. return self._option.ort_option
  561. @property
  562. def trt_option(self):
  563. """Get TrtBackendOption object to configure TensorRT backend
  564. :return TrtBackendOption
  565. """
  566. return self._option.trt_option
  567. @property
  568. def paddle_infer_option(self):
  569. """Get PaddleBackendOption object to configure Paddle Inference backend
  570. :return PaddleBackendOption
  571. """
  572. return self._option.paddle_infer_option
  573. def enable_profiling(self, inclue_h2d_d2h=False, repeat=100, warmup=50):
  574. """Set the profile mode as 'true'.
  575. :param inclue_h2d_d2h Whether to include time of H2D_D2H for time of runtime.
  576. :param repeat Repeat times for runtime inference.
  577. :param warmup Warmup times for runtime inference.
  578. """
  579. return self._option.enable_profiling(inclue_h2d_d2h, repeat, warmup)
  580. def disable_profiling(self):
  581. """Set the profile mode as 'false'."""
  582. return self._option.disable_profiling()
  583. def set_external_raw_stream(self, cuda_stream):
  584. """Set the external raw stream used by ultra_infer runtime."""
  585. self._option.set_external_raw_stream(cuda_stream)
  586. def __repr__(self):
  587. attrs = dir(self._option)
  588. message = "RuntimeOption(\n"
  589. for attr in attrs:
  590. if attr.startswith("__"):
  591. continue
  592. if hasattr(getattr(self._option, attr), "__call__"):
  593. continue
  594. message += " {} : {}\t\n".format(attr, getattr(self._option, attr))
  595. message.strip("\n")
  596. message += ")"
  597. return message