runtime.cc 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439
  1. // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. #include "ultra_infer/runtime/runtime.h"
  15. #include <algorithm>
  16. #include <cassert>
  17. #include <chrono>
  18. #include <cstdlib>
  19. #include "ultra_infer/utils/unique_ptr.h"
  20. #include "ultra_infer/utils/utils.h"
  21. #include "yaml-cpp/yaml.h"
  22. #ifdef ENABLE_ORT_BACKEND
  23. #include "ultra_infer/runtime/backends/ort/ort_backend.h"
  24. #endif
  25. #ifdef ENABLE_TRT_BACKEND
  26. #include "ultra_infer/runtime/backends/tensorrt/trt_backend.h"
  27. #endif
  28. #ifdef ENABLE_PADDLE_BACKEND
  29. #include "ultra_infer/runtime/backends/paddle/paddle_backend.h"
  30. #endif
  31. #ifdef ENABLE_POROS_BACKEND
  32. #include "ultra_infer/runtime/backends/poros/poros_backend.h"
  33. #endif
  34. #ifdef ENABLE_OPENVINO_BACKEND
  35. #include "ultra_infer/runtime/backends/openvino/ov_backend.h"
  36. #endif
  37. #ifdef ENABLE_LITE_BACKEND
  38. #include "ultra_infer/runtime/backends/lite/lite_backend.h"
  39. #endif
  40. #ifdef ENABLE_RKNPU2_BACKEND
  41. #include "ultra_infer/runtime/backends/rknpu2/rknpu2_backend.h"
  42. #endif
  43. #ifdef ENABLE_SOPHGO_BACKEND
  44. #include "ultra_infer/runtime/backends/sophgo/sophgo_backend.h"
  45. #endif
  46. #ifdef ENABLE_HORIZON_BACKEND
  47. #include "ultra_infer/runtime/backends/horizon/horizon_backend.h"
  48. #endif
  49. #ifdef ENABLE_TVM_BACKEND
  50. #include "ultra_infer/runtime/backends/tvm/tvm_backend.h"
  51. #endif
  52. #ifdef ENABLE_OM_BACKEND
  53. #include "ultra_infer/runtime/backends/om/om_backend.h"
  54. #endif
  55. namespace ultra_infer {
  56. bool AutoSelectBackend(RuntimeOption &option) {
  57. auto iter0 = s_default_backends_by_format.find(option.model_format);
  58. if (iter0 == s_default_backends_by_format.end()) {
  59. FDERROR << "Cannot found a default backend for model format: "
  60. << option.model_format
  61. << ", please define the inference backend in RuntimeOption."
  62. << std::endl;
  63. return false;
  64. }
  65. auto iter1 = s_default_backends_by_device.find(option.device);
  66. if (iter1 == s_default_backends_by_device.end()) {
  67. FDERROR << "Cannot found a default backend for device: " << option.device
  68. << ", please define the inference backend in RuntimeOption."
  69. << std::endl;
  70. return false;
  71. }
  72. std::vector<Backend> candidates;
  73. for (const auto &b0 : iter0->second) {
  74. for (const auto &b1 : iter1->second) {
  75. if (b0 == b1) {
  76. candidates.push_back(b0);
  77. }
  78. }
  79. }
  80. if (candidates.size() == 0) {
  81. FDERROR << "Cannot found available inference backends by model format: "
  82. << option.model_format << " with device: " << option.device
  83. << std::endl;
  84. return false;
  85. }
  86. for (const auto &b : candidates) {
  87. if (IsBackendAvailable(b)) {
  88. option.backend = b;
  89. FDINFO << "UltraInfer will choose " << b << " to inference this model."
  90. << std::endl;
  91. return true;
  92. }
  93. }
  94. std::string debug_message = Str(candidates);
  95. FDERROR << "The candidate backends for " << option.model_format << " & "
  96. << option.device << " are " << debug_message
  97. << ", but both of them have not been compiled with current "
  98. "UltraInfer yet."
  99. << std::endl;
  100. return false;
  101. }
  102. bool Runtime::Init(const RuntimeOption &_option) {
  103. option = _option;
  104. // Choose default backend by model format and device if backend is not
  105. // specified
  106. if (option.backend == Backend::UNKNOWN) {
  107. if (!AutoSelectBackend(option)) {
  108. return false;
  109. }
  110. }
  111. if (option.backend == Backend::ORT) {
  112. CreateOrtBackend();
  113. } else if (option.backend == Backend::TRT) {
  114. CreateTrtBackend();
  115. } else if (option.backend == Backend::PDINFER) {
  116. CreatePaddleBackend();
  117. } else if (option.backend == Backend::OPENVINO) {
  118. CreateOpenVINOBackend();
  119. } else if (option.backend == Backend::LITE) {
  120. CreateLiteBackend();
  121. } else if (option.backend == Backend::RKNPU2) {
  122. CreateRKNPU2Backend();
  123. } else if (option.backend == Backend::SOPHGOTPU) {
  124. CreateSophgoNPUBackend();
  125. } else if (option.backend == Backend::POROS) {
  126. CreatePorosBackend();
  127. } else if (option.backend == Backend::HORIZONNPU) {
  128. CreateHorizonBackend();
  129. } else if (option.backend == Backend::TVM) {
  130. CreateTVMBackend();
  131. } else if (option.backend == Backend::OMONNPU) {
  132. CreateOMBackend();
  133. } else {
  134. std::string msg = Str(GetAvailableBackends());
  135. FDERROR << "The compiled UltraInfer only supports " << msg << ", "
  136. << option.backend << " is not supported now." << std::endl;
  137. return false;
  138. }
  139. backend_->benchmark_option_ = option.benchmark_option;
  140. return true;
  141. }
  142. TensorInfo Runtime::GetInputInfo(int index) {
  143. return backend_->GetInputInfo(index);
  144. }
  145. TensorInfo Runtime::GetOutputInfo(int index) {
  146. return backend_->GetOutputInfo(index);
  147. }
  148. std::vector<TensorInfo> Runtime::GetInputInfos() {
  149. return backend_->GetInputInfos();
  150. }
  151. std::vector<TensorInfo> Runtime::GetOutputInfos() {
  152. return backend_->GetOutputInfos();
  153. }
  154. bool Runtime::Infer(std::vector<FDTensor> &input_tensors,
  155. std::vector<FDTensor> *output_tensors) {
  156. for (auto &tensor : input_tensors) {
  157. FDASSERT(tensor.device_id < 0 || tensor.device_id == option.device_id,
  158. "Device id of input tensor(%d) and runtime(%d) are not same.",
  159. tensor.device_id, option.device_id);
  160. }
  161. return backend_->Infer(input_tensors, output_tensors);
  162. }
  163. bool Runtime::Infer() {
  164. bool result = false;
  165. if (option.device == Device::KUNLUNXIN) {
  166. // FDTensor SetExternalData is not support for Device::KUNLUNXIN
  167. // now, so, we need to set copy_to_fd as 'true'.
  168. result = backend_->Infer(input_tensors_, &output_tensors_, true);
  169. } else {
  170. result = backend_->Infer(input_tensors_, &output_tensors_, false);
  171. }
  172. for (auto &tensor : output_tensors_) {
  173. tensor.device_id = option.device_id;
  174. }
  175. return result;
  176. }
  177. void Runtime::BindInputTensor(const std::string &name, FDTensor &input) {
  178. bool is_exist = false;
  179. for (auto &t : input_tensors_) {
  180. if (t.name == name) {
  181. is_exist = true;
  182. t.SetExternalData(input.shape, input.dtype, input.MutableData(),
  183. input.device, input.device_id);
  184. break;
  185. }
  186. }
  187. if (!is_exist) {
  188. FDTensor new_tensor(name);
  189. new_tensor.SetExternalData(input.shape, input.dtype, input.MutableData(),
  190. input.device, input.device_id);
  191. input_tensors_.emplace_back(std::move(new_tensor));
  192. }
  193. }
  194. void Runtime::BindOutputTensor(const std::string &name, FDTensor &output) {
  195. bool is_exist = false;
  196. for (auto &t : output_tensors_) {
  197. if (t.name == name) {
  198. is_exist = true;
  199. t.SetExternalData(output.shape, output.dtype, output.MutableData(),
  200. output.device, output.device_id);
  201. break;
  202. }
  203. }
  204. if (!is_exist) {
  205. FDTensor new_tensor(name);
  206. new_tensor.SetExternalData(output.shape, output.dtype, output.MutableData(),
  207. output.device, output.device_id);
  208. output_tensors_.emplace_back(std::move(new_tensor));
  209. }
  210. }
  211. FDTensor *Runtime::GetOutputTensor(const std::string &name) {
  212. for (auto &t : output_tensors_) {
  213. if (t.name == name) {
  214. return &t;
  215. }
  216. }
  217. FDWARNING << "The output name [" << name << "] don't exist." << std::endl;
  218. return nullptr;
  219. }
  220. void Runtime::ReleaseModelMemoryBuffer() {
  221. if (option.model_from_memory_) {
  222. option.model_file.clear();
  223. option.model_file.shrink_to_fit();
  224. option.params_file.clear();
  225. option.params_file.shrink_to_fit();
  226. }
  227. }
  228. void Runtime::CreatePaddleBackend() {
  229. #ifdef ENABLE_PADDLE_BACKEND
  230. backend_ = utils::make_unique<PaddleBackend>();
  231. FDASSERT(backend_->Init(option),
  232. "Failed to initialized Paddle Inference backend.");
  233. #else
  234. FDASSERT(false, "PaddleBackend is not available, please compiled with "
  235. "ENABLE_PADDLE_BACKEND=ON.");
  236. #endif
  237. FDINFO << "Runtime initialized with Backend::PDINFER in " << option.device
  238. << "." << std::endl;
  239. }
  240. void Runtime::CreateOpenVINOBackend() {
  241. #ifdef ENABLE_OPENVINO_BACKEND
  242. backend_ = utils::make_unique<OpenVINOBackend>();
  243. FDASSERT(backend_->Init(option), "Failed to initialize OpenVINOBackend.");
  244. #else
  245. FDASSERT(false, "OpenVINOBackend is not available, please compiled with "
  246. "ENABLE_OPENVINO_BACKEND=ON.");
  247. #endif
  248. FDINFO << "Runtime initialized with Backend::OPENVINO in " << option.device
  249. << "." << std::endl;
  250. }
  251. void Runtime::CreateTVMBackend() {
  252. #ifdef ENABLE_TVM_BACKEND
  253. backend_ = utils::make_unique<TVMBackend>();
  254. FDASSERT(backend_->Init(option), "Failed to initialize TVM backend.");
  255. #else
  256. FDASSERT(false, "TVMBackend is not available, please compiled with "
  257. "ENABLE_TVM_BACKEND=ON.");
  258. #endif
  259. FDINFO << "Runtime initialized with Backend::TVM in " << option.device << "."
  260. << std::endl;
  261. }
  262. void Runtime::CreateOrtBackend() {
  263. #ifdef ENABLE_ORT_BACKEND
  264. backend_ = utils::make_unique<OrtBackend>();
  265. FDASSERT(backend_->Init(option), "Failed to initialize Backend::ORT.");
  266. #else
  267. FDASSERT(false, "OrtBackend is not available, please compiled with "
  268. "ENABLE_ORT_BACKEND=ON.");
  269. #endif
  270. FDINFO << "Runtime initialized with Backend::ORT in " << option.device << "."
  271. << std::endl;
  272. }
  273. void Runtime::CreateTrtBackend() {
  274. #ifdef ENABLE_TRT_BACKEND
  275. backend_ = utils::make_unique<TrtBackend>();
  276. FDASSERT(backend_->Init(option), "Failed to initialize TensorRT backend.");
  277. #else
  278. FDASSERT(false, "TrtBackend is not available, please compiled with "
  279. "ENABLE_TRT_BACKEND=ON.");
  280. #endif
  281. FDINFO << "Runtime initialized with Backend::TRT in " << option.device << "."
  282. << std::endl;
  283. }
  284. void Runtime::CreateLiteBackend() {
  285. #ifdef ENABLE_LITE_BACKEND
  286. backend_ = utils::make_unique<LiteBackend>();
  287. FDASSERT(backend_->Init(option),
  288. "Load model from nb file failed while initializing LiteBackend.");
  289. #else
  290. FDASSERT(false, "LiteBackend is not available, please compiled with "
  291. "ENABLE_LITE_BACKEND=ON.");
  292. #endif
  293. FDINFO << "Runtime initialized with Backend::PDLITE in " << option.device
  294. << "." << std::endl;
  295. }
  296. void Runtime::CreateRKNPU2Backend() {
  297. #ifdef ENABLE_RKNPU2_BACKEND
  298. backend_ = utils::make_unique<RKNPU2Backend>();
  299. FDASSERT(backend_->Init(option), "Failed to initialize RKNPU2 backend.");
  300. #else
  301. FDASSERT(false, "RKNPU2Backend is not available, please compiled with "
  302. "ENABLE_RKNPU2_BACKEND=ON.");
  303. #endif
  304. FDINFO << "Runtime initialized with Backend::RKNPU2 in " << option.device
  305. << "." << std::endl;
  306. }
  307. void Runtime::CreateHorizonBackend() {
  308. #ifdef ENABLE_HORIZON_BACKEND
  309. backend_ = utils::make_unique<HorizonBackend>();
  310. FDASSERT(backend_->Init(option), "Failed to initialize Horizon backend.");
  311. #else
  312. FDASSERT(false, "HorizonBackend is not available, please compiled with ",
  313. " ENABLE_HORIZON_BACKEND=ON.");
  314. #endif
  315. FDINFO << "Runtime initialized with Backend::HORIZONNPU in " << option.device
  316. << "." << std::endl;
  317. }
  318. void Runtime::CreateSophgoNPUBackend() {
  319. #ifdef ENABLE_SOPHGO_BACKEND
  320. backend_ = utils::make_unique<SophgoBackend>();
  321. FDASSERT(backend_->Init(option), "Failed to initialize Sophgo backend.");
  322. #else
  323. FDASSERT(false, "SophgoBackend is not available, please compiled with "
  324. "ENABLE_SOPHGO_BACKEND=ON.");
  325. #endif
  326. FDINFO << "Runtime initialized with Backend::SOPHGO in " << option.device
  327. << "." << std::endl;
  328. }
  329. Runtime *Runtime::Clone(void *stream, int device_id) {
  330. Runtime *runtime = new Runtime();
  331. if (option.backend != Backend::OPENVINO &&
  332. option.backend != Backend::PDINFER) {
  333. runtime->Init(option);
  334. FDWARNING << "Only OpenVINO/Paddle Inference support \
  335. clone engine to reduce CPU/GPU memory usage now. For "
  336. << option.backend
  337. << ", UltraInfer will create a new engine which \
  338. will not share memory with the current runtime."
  339. << std::endl;
  340. return runtime;
  341. }
  342. FDINFO << "Runtime Clone with Backend:: " << option.backend << " in "
  343. << option.device << "." << std::endl;
  344. runtime->option = option;
  345. runtime->backend_ = backend_->Clone(option, stream, device_id);
  346. return runtime;
  347. }
  348. void Runtime::CreatePorosBackend() {
  349. #ifdef ENABLE_POROS_BACKEND
  350. backend_ = utils::make_unique<PorosBackend>();
  351. FDASSERT(backend_->Init(option), "Failed to initialize Poros backend.");
  352. #else
  353. FDASSERT(false, "PorosBackend is not available, please compiled with "
  354. "ENABLE_POROS_BACKEND=ON.");
  355. #endif
  356. FDINFO << "Runtime initialized with Backend::POROS in " << option.device
  357. << "." << std::endl;
  358. }
  359. void Runtime::CreateOMBackend() {
  360. #ifdef ENABLE_OM_BACKEND
  361. backend_ = utils::make_unique<OmBackend>();
  362. FDASSERT(backend_->Init(option), "Failed to initialize om backend.");
  363. #else
  364. FDASSERT(false, "OMBackend is not available, please compiled with ",
  365. " ENABLE_OM_BACKEND=ON.");
  366. #endif
  367. FDINFO << "Runtime initialized with Backend::OMONNPU in " << option.device
  368. << "." << std::endl;
  369. }
  370. // only for poros backend
  371. bool Runtime::Compile(std::vector<std::vector<FDTensor>> &prewarm_tensors) {
  372. #ifdef ENABLE_POROS_BACKEND
  373. option.poros_option.device = option.device;
  374. option.poros_option.device_id = option.device_id;
  375. option.poros_option.enable_fp16 = option.trt_option.enable_fp16;
  376. option.poros_option.max_batch_size = option.trt_option.max_batch_size;
  377. option.poros_option.max_workspace_size = option.trt_option.max_workspace_size;
  378. auto casted_backend = dynamic_cast<PorosBackend *>(backend_.get());
  379. FDASSERT(
  380. casted_backend->Compile(option.model_file, prewarm_tensors,
  381. option.poros_option),
  382. "Load model from Torchscript failed while initializing PorosBackend.");
  383. #else
  384. FDASSERT(false, "PorosBackend is not available, please compiled with "
  385. "ENABLE_POROS_BACKEND=ON.");
  386. #endif
  387. return true;
  388. }
  389. } // namespace ultra_infer