runtime.cc 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431
  1. // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. #include "ultra_infer/runtime/runtime.h"
  15. #include <algorithm>
  16. #include <cassert>
  17. #include <chrono>
  18. #include <cstdlib>
  19. #include "ultra_infer/utils/unique_ptr.h"
  20. #include "ultra_infer/utils/utils.h"
  21. #include "yaml-cpp/yaml.h"
  22. #ifdef ENABLE_ORT_BACKEND
  23. #include "ultra_infer/runtime/backends/ort/ort_backend.h"
  24. #endif
  25. #ifdef ENABLE_TRT_BACKEND
  26. #include "ultra_infer/runtime/backends/tensorrt/trt_backend.h"
  27. #endif
  28. #ifdef ENABLE_PADDLE_BACKEND
  29. #include "ultra_infer/runtime/backends/paddle/paddle_backend.h"
  30. #endif
  31. #ifdef ENABLE_POROS_BACKEND
  32. #include "ultra_infer/runtime/backends/poros/poros_backend.h"
  33. #endif
  34. #ifdef ENABLE_OPENVINO_BACKEND
  35. #include "ultra_infer/runtime/backends/openvino/ov_backend.h"
  36. #endif
  37. #ifdef ENABLE_LITE_BACKEND
  38. #include "ultra_infer/runtime/backends/lite/lite_backend.h"
  39. #endif
  40. #ifdef ENABLE_RKNPU2_BACKEND
  41. #include "ultra_infer/runtime/backends/rknpu2/rknpu2_backend.h"
  42. #endif
  43. #ifdef ENABLE_SOPHGO_BACKEND
  44. #include "ultra_infer/runtime/backends/sophgo/sophgo_backend.h"
  45. #endif
  46. #ifdef ENABLE_HORIZON_BACKEND
  47. #include "ultra_infer/runtime/backends/horizon/horizon_backend.h"
  48. #endif
  49. #ifdef ENABLE_TVM_BACKEND
  50. #include "ultra_infer/runtime/backends/tvm/tvm_backend.h"
  51. #endif
  52. namespace ultra_infer {
  53. bool AutoSelectBackend(RuntimeOption &option) {
  54. auto iter0 = s_default_backends_by_format.find(option.model_format);
  55. if (iter0 == s_default_backends_by_format.end()) {
  56. FDERROR << "Cannot found a default backend for model format: "
  57. << option.model_format
  58. << ", please define the inference backend in RuntimeOption."
  59. << std::endl;
  60. return false;
  61. }
  62. auto iter1 = s_default_backends_by_device.find(option.device);
  63. if (iter1 == s_default_backends_by_device.end()) {
  64. FDERROR << "Cannot found a default backend for device: " << option.device
  65. << ", please define the inference backend in RuntimeOption."
  66. << std::endl;
  67. return false;
  68. }
  69. std::vector<Backend> candidates;
  70. for (const auto &b0 : iter0->second) {
  71. for (const auto &b1 : iter1->second) {
  72. if (b0 == b1) {
  73. candidates.push_back(b0);
  74. }
  75. }
  76. }
  77. if (candidates.size() == 0) {
  78. FDERROR << "Cannot found availabel inference backends by model format: "
  79. << option.model_format << " with device: " << option.device
  80. << std::endl;
  81. return false;
  82. }
  83. for (const auto &b : candidates) {
  84. if (IsBackendAvailable(b)) {
  85. option.backend = b;
  86. FDINFO << "UltraInfer will choose " << b << " to inference this model."
  87. << std::endl;
  88. return true;
  89. }
  90. }
  91. std::string debug_message = Str(candidates);
  92. FDERROR << "The candiate backends for " << option.model_format << " & "
  93. << option.device << " are " << debug_message
  94. << ", but both of them have not been compiled with current "
  95. "UltraInfer yet."
  96. << std::endl;
  97. return false;
  98. }
  99. bool Runtime::Init(const RuntimeOption &_option) {
  100. option = _option;
  101. // Choose default backend by model format and device if backend is not
  102. // specified
  103. if (option.backend == Backend::UNKNOWN) {
  104. if (!AutoSelectBackend(option)) {
  105. return false;
  106. }
  107. }
  108. if (option.backend == Backend::ORT) {
  109. CreateOrtBackend();
  110. } else if (option.backend == Backend::TRT) {
  111. CreateTrtBackend();
  112. } else if (option.backend == Backend::PDINFER) {
  113. CreatePaddleBackend();
  114. } else if (option.backend == Backend::OPENVINO) {
  115. CreateOpenVINOBackend();
  116. } else if (option.backend == Backend::LITE) {
  117. CreateLiteBackend();
  118. } else if (option.backend == Backend::RKNPU2) {
  119. CreateRKNPU2Backend();
  120. } else if (option.backend == Backend::SOPHGOTPU) {
  121. CreateSophgoNPUBackend();
  122. } else if (option.backend == Backend::POROS) {
  123. CreatePorosBackend();
  124. } else if (option.backend == Backend::HORIZONNPU) {
  125. CreateHorizonBackend();
  126. } else if (option.backend == Backend::TVM) {
  127. CreateTVMBackend();
  128. } else {
  129. std::string msg = Str(GetAvailableBackends());
  130. FDERROR << "The compiled UltraInfer only supports " << msg << ", "
  131. << option.backend << " is not supported now." << std::endl;
  132. return false;
  133. }
  134. backend_->benchmark_option_ = option.benchmark_option;
  135. return true;
  136. }
  137. TensorInfo Runtime::GetInputInfo(int index) {
  138. return backend_->GetInputInfo(index);
  139. }
  140. TensorInfo Runtime::GetOutputInfo(int index) {
  141. return backend_->GetOutputInfo(index);
  142. }
  143. std::vector<TensorInfo> Runtime::GetInputInfos() {
  144. return backend_->GetInputInfos();
  145. }
  146. std::vector<TensorInfo> Runtime::GetOutputInfos() {
  147. return backend_->GetOutputInfos();
  148. }
  149. bool Runtime::Infer(std::vector<FDTensor> &input_tensors,
  150. std::vector<FDTensor> *output_tensors) {
  151. for (auto &tensor : input_tensors) {
  152. FDASSERT(tensor.device_id < 0 || tensor.device_id == option.device_id,
  153. "Device id of input tensor(%d) and runtime(%d) are not same.",
  154. tensor.device_id, option.device_id);
  155. }
  156. return backend_->Infer(input_tensors, output_tensors);
  157. }
  158. bool Runtime::Infer() {
  159. bool result = false;
  160. if (option.device == Device::KUNLUNXIN) {
  161. // FDTensor SetExternalData is not support for Device::KUNLUNXIN
  162. // now, so, we need to set copy_to_fd as 'true'.
  163. result = backend_->Infer(input_tensors_, &output_tensors_, true);
  164. } else {
  165. result = backend_->Infer(input_tensors_, &output_tensors_, false);
  166. }
  167. for (auto &tensor : output_tensors_) {
  168. tensor.device_id = option.device_id;
  169. }
  170. return result;
  171. }
  172. void Runtime::BindInputTensor(const std::string &name, FDTensor &input) {
  173. bool is_exist = false;
  174. for (auto &t : input_tensors_) {
  175. if (t.name == name) {
  176. is_exist = true;
  177. t.SetExternalData(input.shape, input.dtype, input.MutableData(),
  178. input.device, input.device_id);
  179. break;
  180. }
  181. }
  182. if (!is_exist) {
  183. FDTensor new_tensor(name);
  184. new_tensor.SetExternalData(input.shape, input.dtype, input.MutableData(),
  185. input.device, input.device_id);
  186. input_tensors_.emplace_back(std::move(new_tensor));
  187. }
  188. }
  189. void Runtime::BindOutputTensor(const std::string &name, FDTensor &output) {
  190. bool is_exist = false;
  191. for (auto &t : output_tensors_) {
  192. if (t.name == name) {
  193. is_exist = true;
  194. t.SetExternalData(output.shape, output.dtype, output.MutableData(),
  195. output.device, output.device_id);
  196. break;
  197. }
  198. }
  199. if (!is_exist) {
  200. FDTensor new_tensor(name);
  201. new_tensor.SetExternalData(output.shape, output.dtype, output.MutableData(),
  202. output.device, output.device_id);
  203. output_tensors_.emplace_back(std::move(new_tensor));
  204. }
  205. }
  206. FDTensor *Runtime::GetOutputTensor(const std::string &name) {
  207. for (auto &t : output_tensors_) {
  208. if (t.name == name) {
  209. return &t;
  210. }
  211. }
  212. FDWARNING << "The output name [" << name << "] don't exist." << std::endl;
  213. return nullptr;
  214. }
  215. void Runtime::ReleaseModelMemoryBuffer() {
  216. if (option.model_from_memory_) {
  217. option.model_file.clear();
  218. option.model_file.shrink_to_fit();
  219. option.params_file.clear();
  220. option.params_file.shrink_to_fit();
  221. }
  222. }
  223. void Runtime::CreatePaddleBackend() {
  224. #ifdef ENABLE_PADDLE_BACKEND
  225. backend_ = utils::make_unique<PaddleBackend>();
  226. FDASSERT(backend_->Init(option),
  227. "Failed to initialized Paddle Inference backend.");
  228. #else
  229. FDASSERT(false, "PaddleBackend is not available, please compiled with "
  230. "ENABLE_PADDLE_BACKEND=ON.");
  231. #endif
  232. FDINFO << "Runtime initialized with Backend::PDINFER in " << option.device
  233. << "." << std::endl;
  234. const char *pirApiFlag = "FLAGS_enable_pir_api";
  235. const char *envValue = getenv(pirApiFlag);
  236. if (envValue == nullptr ||
  237. (std::string(envValue) != "0" && std::string(envValue) != "False")) {
  238. FDWARNING
  239. << "To ensure the correct operation of the Paddle backend, please set"
  240. "the environment variable 'FLAGS_enable_pir_api' to 'False'."
  241. << std::endl;
  242. }
  243. }
  244. void Runtime::CreateOpenVINOBackend() {
  245. #ifdef ENABLE_OPENVINO_BACKEND
  246. backend_ = utils::make_unique<OpenVINOBackend>();
  247. FDASSERT(backend_->Init(option), "Failed to initialize OpenVINOBackend.");
  248. #else
  249. FDASSERT(false, "OpenVINOBackend is not available, please compiled with "
  250. "ENABLE_OPENVINO_BACKEND=ON.");
  251. #endif
  252. FDINFO << "Runtime initialized with Backend::OPENVINO in " << option.device
  253. << "." << std::endl;
  254. }
  255. void Runtime::CreateTVMBackend() {
  256. #ifdef ENABLE_TVM_BACKEND
  257. backend_ = utils::make_unique<TVMBackend>();
  258. FDASSERT(backend_->Init(option), "Failed to initialize TVM backend.");
  259. #else
  260. FDASSERT(false, "TVMBackend is not available, please compiled with "
  261. "ENABLE_TVM_BACKEND=ON.");
  262. #endif
  263. FDINFO << "Runtime initialized with Backend::TVM in " << option.device << "."
  264. << std::endl;
  265. }
  266. void Runtime::CreateOrtBackend() {
  267. #ifdef ENABLE_ORT_BACKEND
  268. backend_ = utils::make_unique<OrtBackend>();
  269. FDASSERT(backend_->Init(option), "Failed to initialize Backend::ORT.");
  270. #else
  271. FDASSERT(false, "OrtBackend is not available, please compiled with "
  272. "ENABLE_ORT_BACKEND=ON.");
  273. #endif
  274. FDINFO << "Runtime initialized with Backend::ORT in " << option.device << "."
  275. << std::endl;
  276. }
  277. void Runtime::CreateTrtBackend() {
  278. #ifdef ENABLE_TRT_BACKEND
  279. backend_ = utils::make_unique<TrtBackend>();
  280. FDASSERT(backend_->Init(option), "Failed to initialize TensorRT backend.");
  281. #else
  282. FDASSERT(false, "TrtBackend is not available, please compiled with "
  283. "ENABLE_TRT_BACKEND=ON.");
  284. #endif
  285. FDINFO << "Runtime initialized with Backend::TRT in " << option.device << "."
  286. << std::endl;
  287. }
  288. void Runtime::CreateLiteBackend() {
  289. #ifdef ENABLE_LITE_BACKEND
  290. backend_ = utils::make_unique<LiteBackend>();
  291. FDASSERT(backend_->Init(option),
  292. "Load model from nb file failed while initializing LiteBackend.");
  293. #else
  294. FDASSERT(false, "LiteBackend is not available, please compiled with "
  295. "ENABLE_LITE_BACKEND=ON.");
  296. #endif
  297. FDINFO << "Runtime initialized with Backend::PDLITE in " << option.device
  298. << "." << std::endl;
  299. }
  300. void Runtime::CreateRKNPU2Backend() {
  301. #ifdef ENABLE_RKNPU2_BACKEND
  302. backend_ = utils::make_unique<RKNPU2Backend>();
  303. FDASSERT(backend_->Init(option), "Failed to initialize RKNPU2 backend.");
  304. #else
  305. FDASSERT(false, "RKNPU2Backend is not available, please compiled with "
  306. "ENABLE_RKNPU2_BACKEND=ON.");
  307. #endif
  308. FDINFO << "Runtime initialized with Backend::RKNPU2 in " << option.device
  309. << "." << std::endl;
  310. }
  311. void Runtime::CreateHorizonBackend() {
  312. #ifdef ENABLE_HORIZON_BACKEND
  313. backend_ = utils::make_unique<HorizonBackend>();
  314. FDASSERT(backend_->Init(option), "Failed to initialize Horizon backend.");
  315. #else
  316. FDASSERT(false, "HorizonBackend is not available, please compiled with ",
  317. " ENABLE_HORIZON_BACKEND=ON.");
  318. #endif
  319. FDINFO << "Runtime initialized with Backend::HORIZONNPU in " << option.device
  320. << "." << std::endl;
  321. }
  322. void Runtime::CreateSophgoNPUBackend() {
  323. #ifdef ENABLE_SOPHGO_BACKEND
  324. backend_ = utils::make_unique<SophgoBackend>();
  325. FDASSERT(backend_->Init(option), "Failed to initialize Sophgo backend.");
  326. #else
  327. FDASSERT(false, "SophgoBackend is not available, please compiled with "
  328. "ENABLE_SOPHGO_BACKEND=ON.");
  329. #endif
  330. FDINFO << "Runtime initialized with Backend::SOPHGO in " << option.device
  331. << "." << std::endl;
  332. }
  333. Runtime *Runtime::Clone(void *stream, int device_id) {
  334. Runtime *runtime = new Runtime();
  335. if (option.backend != Backend::OPENVINO &&
  336. option.backend != Backend::PDINFER) {
  337. runtime->Init(option);
  338. FDWARNING << "Only OpenVINO/Paddle Inference support \
  339. clone engine to reduce CPU/GPU memory usage now. For "
  340. << option.backend
  341. << ", UltraInfer will create a new engine which \
  342. will not share memory with the current runtime."
  343. << std::endl;
  344. return runtime;
  345. }
  346. FDINFO << "Runtime Clone with Backend:: " << option.backend << " in "
  347. << option.device << "." << std::endl;
  348. runtime->option = option;
  349. runtime->backend_ = backend_->Clone(option, stream, device_id);
  350. return runtime;
  351. }
  352. void Runtime::CreatePorosBackend() {
  353. #ifdef ENABLE_POROS_BACKEND
  354. backend_ = utils::make_unique<PorosBackend>();
  355. FDASSERT(backend_->Init(option), "Failed to initialize Poros backend.");
  356. #else
  357. FDASSERT(false, "PorosBackend is not available, please compiled with "
  358. "ENABLE_POROS_BACKEND=ON.");
  359. #endif
  360. FDINFO << "Runtime initialized with Backend::POROS in " << option.device
  361. << "." << std::endl;
  362. }
  363. // only for poros backend
  364. bool Runtime::Compile(std::vector<std::vector<FDTensor>> &prewarm_tensors) {
  365. #ifdef ENABLE_POROS_BACKEND
  366. option.poros_option.device = option.device;
  367. option.poros_option.device_id = option.device_id;
  368. option.poros_option.enable_fp16 = option.trt_option.enable_fp16;
  369. option.poros_option.max_batch_size = option.trt_option.max_batch_size;
  370. option.poros_option.max_workspace_size = option.trt_option.max_workspace_size;
  371. auto casted_backend = dynamic_cast<PorosBackend *>(backend_.get());
  372. FDASSERT(
  373. casted_backend->Compile(option.model_file, prewarm_tensors,
  374. option.poros_option),
  375. "Load model from Torchscript failed while initliazing PorosBackend.");
  376. #else
  377. FDASSERT(false, "PorosBackend is not available, please compiled with "
  378. "ENABLE_POROS_BACKEND=ON.");
  379. #endif
  380. return true;
  381. }
  382. } // namespace ultra_infer