runtime_option.h 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283
  1. // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. /*! \file runtime_option.h
  15. \brief A brief file description.
  16. More details
  17. */
  18. #pragma once
  19. #include "ultra_infer/benchmark/option.h"
  20. #include "ultra_infer/runtime/backends/lite/option.h"
  21. #include "ultra_infer/runtime/backends/openvino/option.h"
  22. #include "ultra_infer/runtime/backends/ort/option.h"
  23. #include "ultra_infer/runtime/backends/paddle/option.h"
  24. #include "ultra_infer/runtime/backends/poros/option.h"
  25. #include "ultra_infer/runtime/backends/rknpu2/option.h"
  26. #include "ultra_infer/runtime/backends/sophgo/option.h"
  27. #include "ultra_infer/runtime/backends/tensorrt/option.h"
  28. #include "ultra_infer/runtime/backends/tvm/option.h"
  29. #include "ultra_infer/runtime/enum_variables.h"
  30. #include <algorithm>
  31. #include <map>
  32. #include <vector>
  33. namespace ultra_infer {
  34. /*! @brief Option object used when create a new Runtime object
  35. */
  36. struct ULTRAINFER_DECL RuntimeOption {
  37. /** \brief Set path of model file and parameter file
  38. *
  39. * \param[in] model_path Path of model file, e.g ResNet50/model.pdmodel for
  40. * Paddle format model / ResNet50/model.onnx for ONNX format model \param[in]
  41. * params_path Path of parameter file, this only used when the model format is
  42. * Paddle, e.g Resnet50/model.pdiparams \param[in] format Format of the loaded
  43. * model
  44. */
  45. void SetModelPath(const std::string &model_path,
  46. const std::string &params_path = "",
  47. const ModelFormat &format = ModelFormat::PADDLE);
  48. /** \brief Specify the memory buffer of model and parameter. Used when model
  49. * and params are loaded directly from memory
  50. *
  51. * \param[in] model_buffer The string of model memory buffer
  52. * \param[in] params_buffer The string of parameters memory buffer
  53. * \param[in] format Format of the loaded model
  54. */
  55. void SetModelBuffer(const std::string &model_buffer,
  56. const std::string &params_buffer = "",
  57. const ModelFormat &format = ModelFormat::PADDLE);
  58. /// Use cpu to inference, the runtime will inference on CPU by default
  59. void UseCpu();
  60. /// Use Nvidia GPU to inference
  61. void UseGpu(int gpu_id = 0);
  62. /// Use RKNPU2 e.g RK3588/RK356X to inference
  63. void UseRKNPU2(ultra_infer::rknpu2::CpuName rknpu2_name =
  64. ultra_infer::rknpu2::CpuName::RK356X,
  65. ultra_infer::rknpu2::CoreMask rknpu2_core =
  66. ultra_infer::rknpu2::CoreMask::RKNN_NPU_CORE_AUTO);
  67. // Use Horizon NPU to inference
  68. void UseHorizon();
  69. /// Use TimVX e.g RV1126/A311D to inference
  70. void UseTimVX();
  71. /// Use Huawei Ascend to inference
  72. void UseAscend(int npu_id);
  73. /// Use onnxruntime DirectML to inference
  74. void UseDirectML();
  75. /// Use Sophgo to inference
  76. void UseSophgo();
  77. /// \brief Turn on KunlunXin XPU.
  78. ///
  79. /// \param kunlunxin_id the KunlunXin XPU card to use (default is 0).
  80. /// \param l3_workspace_size The size of the video memory allocated by the l3
  81. /// cache, the maximum is 16M.
  82. /// \param locked Whether the allocated L3 cache can be locked. If false,
  83. /// it means that the L3 cache is not locked, and the allocated L3
  84. /// cache can be shared by multiple models, and multiple models
  85. /// sharing the L3 cache will be executed sequentially on the card.
  86. /// \param autotune Whether to autotune the conv operator in the model. If
  87. /// true, when the conv operator of a certain dimension is executed
  88. /// for the first time, it will automatically search for a better
  89. /// algorithm to improve the performance of subsequent conv operators
  90. /// of the same dimension.
  91. /// \param autotune_file Specify the path of the autotune file. If
  92. /// autotune_file is specified, the algorithm specified in the
  93. /// file will be used and autotune will not be performed again.
  94. /// \param precision Calculation accuracy of multi_encoder
  95. /// \param adaptive_seqlen Is the input of multi_encoder variable length
  96. /// \param enable_multi_stream Whether to enable the multi stream of
  97. /// KunlunXin XPU.
  98. /// \param gm_default_size The default size of global memory of KunlunXin XPU.
  99. ///
  100. void UseKunlunXin(int kunlunxin_id = 0, int l3_workspace_size = 0xfffc00,
  101. bool locked = false, bool autotune = true,
  102. const std::string &autotune_file = "",
  103. const std::string &precision = "int16",
  104. bool adaptive_seqlen = false,
  105. bool enable_multi_stream = false,
  106. int64_t gm_default_size = 0);
  107. void SetExternalStream(void *external_stream);
  108. /*
  109. * @brief Set number of cpu threads while inference on CPU, by default it will
  110. * decided by the different backends
  111. */
  112. void SetCpuThreadNum(int thread_num);
  113. /// Set Paddle Inference as inference backend, support CPU/GPU
  114. void UsePaddleInferBackend() { return UsePaddleBackend(); }
  115. /// Set ONNX Runtime as inference backend, support CPU/GPU
  116. void UseOrtBackend();
  117. /// Set SOPHGO Runtime as inference backend, support SOPHGO
  118. void UseSophgoBackend();
  119. /// Set TensorRT as inference backend, only support GPU
  120. void UseTrtBackend();
  121. /// Set Poros backend as inference backend, support CPU/GPU
  122. void UsePorosBackend();
  123. /// Set OpenVINO as inference backend, only support CPU
  124. void UseOpenVINOBackend();
  125. /// Set Paddle Lite as inference backend, only support arm cpu
  126. void UsePaddleLiteBackend() { return UseLiteBackend(); }
  127. /** \Use Graphcore IPU to inference.
  128. *
  129. * \param[in] device_num the number of IPUs.
  130. * \param[in] micro_batch_size the batch size in the graph, only work when
  131. * graph has no batch shape info. \param[in] enable_pipelining enable
  132. * pipelining. \param[in] batches_per_step the number of batches per run in
  133. * pipelining.
  134. */
  135. void UseIpu(int device_num = 1, int micro_batch_size = 1,
  136. bool enable_pipelining = false, int batches_per_step = 1);
  137. /// Option to configure ONNX Runtime backend
  138. OrtBackendOption ort_option;
  139. /// Option to configure TensorRT backend
  140. TrtBackendOption trt_option;
  141. /// Option to configure Paddle Inference backend
  142. PaddleBackendOption paddle_infer_option;
  143. /// Option to configure Poros backend
  144. PorosBackendOption poros_option;
  145. /// Option to configure OpenVINO backend
  146. OpenVINOBackendOption openvino_option;
  147. /// Option to configure Paddle Lite backend
  148. LiteBackendOption paddle_lite_option;
  149. /// Option to configure RKNPU2 backend
  150. RKNPU2BackendOption rknpu2_option;
  151. /// Option to configure TVM backend
  152. TVMBackendOption tvm_option;
  153. // \brief Set the profile mode as 'true'.
  154. //
  155. // \param[in] inclue_h2d_d2h Whether to
  156. // include time of H2D_D2H for time of runtime.
  157. // \param[in] repeat Repeat times for runtime inference.
  158. // \param[in] warmup Warmup times for runtime inference.
  159. //
  160. void EnableProfiling(bool inclue_h2d_d2h = false, int repeat = 100,
  161. int warmup = 50) {
  162. benchmark_option.enable_profile = true;
  163. benchmark_option.warmup = warmup;
  164. benchmark_option.repeats = repeat;
  165. benchmark_option.include_h2d_d2h = inclue_h2d_d2h;
  166. }
  167. // \brief Set the profile mode as 'false'.
  168. //
  169. void DisableProfiling() { benchmark_option.enable_profile = false; }
  170. // \brief Enable to check if current backend set by
  171. // user can be found at valid_xxx_backend.
  172. //
  173. void EnableValidBackendCheck() { enable_valid_backend_check = true; }
  174. // \brief Disable to check if current backend set by
  175. // user can be found at valid_xxx_backend.
  176. //
  177. void DisableValidBackendCheck() { enable_valid_backend_check = false; }
  178. // Benchmark option
  179. benchmark::BenchmarkOption benchmark_option;
  180. // enable the check for valid backend, default true.
  181. bool enable_valid_backend_check = true;
  182. // If model_from_memory is true, the model_file and params_file is
  183. // binary stream in memory;
  184. // Otherwise, the model_file and params_file means the path of file
  185. std::string model_file = "";
  186. std::string params_file = "";
  187. bool model_from_memory_ = false;
  188. // format of input model
  189. ModelFormat model_format = ModelFormat::PADDLE;
  190. // for cpu inference
  191. // default will let the backend choose their own default value
  192. int cpu_thread_num = -1;
  193. int device_id = 0;
  194. Backend backend = Backend::UNKNOWN;
  195. Device device = Device::CPU;
  196. void *external_stream_ = nullptr;
  197. bool enable_pinned_memory = false;
  198. // *** The belowing api are deprecated, will be removed in v1.2.0
  199. // *** Do not use it anymore
  200. void SetPaddleMKLDNN(bool pd_mkldnn = true);
  201. void EnablePaddleToTrt();
  202. void DeletePaddleBackendPass(const std::string &delete_pass_name);
  203. void EnablePaddleLogInfo();
  204. void DisablePaddleLogInfo();
  205. void SetPaddleMKLDNNCacheSize(int size);
  206. void SetOpenVINODevice(const std::string &name = "CPU");
  207. void SetOpenVINOShapeInfo(
  208. const std::map<std::string, std::vector<int64_t>> &shape_info) {
  209. openvino_option.shape_infos = shape_info;
  210. }
  211. void SetOpenVINOCpuOperators(const std::vector<std::string> &operators) {
  212. openvino_option.SetCpuOperators(operators);
  213. }
  214. void SetLiteOptimizedModelDir(const std::string &optimized_model_dir);
  215. void SetLiteSubgraphPartitionPath(
  216. const std::string &nnadapter_subgraph_partition_config_path);
  217. void SetLiteSubgraphPartitionConfigBuffer(
  218. const std::string &nnadapter_subgraph_partition_config_buffer);
  219. void
  220. SetLiteContextProperties(const std::string &nnadapter_context_properties);
  221. void SetLiteModelCacheDir(const std::string &nnadapter_model_cache_dir);
  222. void SetLiteDynamicShapeInfo(
  223. const std::map<std::string, std::vector<std::vector<int64_t>>>
  224. &nnadapter_dynamic_shape_info);
  225. void SetLiteMixedPrecisionQuantizationConfigPath(
  226. const std::string &nnadapter_mixed_precision_quantization_config_path);
  227. void EnableLiteFP16();
  228. void DisableLiteFP16();
  229. void EnableLiteInt8();
  230. void DisableLiteInt8();
  231. void SetLitePowerMode(LitePowerMode mode);
  232. void SetTrtInputShape(
  233. const std::string &input_name, const std::vector<int32_t> &min_shape,
  234. const std::vector<int32_t> &opt_shape = std::vector<int32_t>(),
  235. const std::vector<int32_t> &max_shape = std::vector<int32_t>());
  236. void SetTrtInputData(
  237. const std::string &input_name, const std::vector<float> &min_shape_data,
  238. const std::vector<float> &opt_shape_data = std::vector<float>(),
  239. const std::vector<float> &max_shape_data = std::vector<float>());
  240. void SetTrtMaxWorkspaceSize(size_t trt_max_workspace_size);
  241. void SetTrtMaxBatchSize(size_t max_batch_size);
  242. void EnableTrtFP16();
  243. void DisableTrtFP16();
  244. void SetTrtCacheFile(const std::string &cache_file_path);
  245. void EnablePinnedMemory();
  246. void DisablePinnedMemory();
  247. void EnablePaddleTrtCollectShape();
  248. void DisablePaddleTrtCollectShape();
  249. void DisablePaddleTrtOPs(const std::vector<std::string> &ops);
  250. void SetOpenVINOStreams(int num_streams);
  251. void SetOrtGraphOptLevel(int level = -1);
  252. void UsePaddleBackend();
  253. void UseLiteBackend();
  254. void UseHorizonNPUBackend();
  255. void UseTVMBackend();
  256. void UseOMBackend();
  257. };
  258. } // namespace ultra_infer