ultra_infer_model.cc 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517
  1. // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. #include "ultra_infer/ultra_infer_model.h"
  15. #include "ultra_infer/utils/utils.h"
  16. namespace ultra_infer {
  17. std::string Str(const std::vector<Backend> &backends) {
  18. std::ostringstream oss;
  19. if (backends.size() == 0) {
  20. oss << "[]";
  21. return oss.str();
  22. }
  23. oss << "[ " << backends[0];
  24. for (int i = 1; i < backends.size(); ++i) {
  25. oss << " ," << backends[i];
  26. }
  27. oss << " ]";
  28. return oss.str();
  29. }
  30. bool CheckBackendSupported(const std::vector<Backend> &backends,
  31. Backend backend) {
  32. for (size_t i = 0; i < backends.size(); ++i) {
  33. if (backends[i] == backend) {
  34. return true;
  35. }
  36. }
  37. return false;
  38. }
  39. bool UltraInferModel::IsSupported(const std::vector<Backend> &backends,
  40. Backend backend) {
  41. #ifdef ENABLE_BENCHMARK
  42. if (runtime_option.benchmark_option.enable_profile) {
  43. FDWARNING << "In benchmark mode, we don't check to see if "
  44. << "the backend [" << backend
  45. << "] is supported for current model!" << std::endl;
  46. return true;
  47. } else if (!runtime_option.enable_valid_backend_check) {
  48. FDWARNING << "Checking for valid backend is disable, we don't"
  49. << " check to see if the backend [" << backend
  50. << "] is supported for current model!" << std::endl;
  51. return true;
  52. }
  53. return CheckBackendSupported(backends, backend);
  54. #else
  55. if (!runtime_option.enable_valid_backend_check) {
  56. FDWARNING << "Checking for valid backend is disable, we don't"
  57. << " check to see if the backend [" << backend
  58. << "] is supported for current model!" << std::endl;
  59. return true;
  60. }
  61. return CheckBackendSupported(backends, backend);
  62. #endif
  63. }
  64. bool UltraInferModel::InitRuntimeWithSpecifiedBackend() {
  65. if (!IsBackendAvailable(runtime_option.backend)) {
  66. FDERROR << runtime_option.backend
  67. << " is not compiled with current UltraInfer library." << std::endl;
  68. return false;
  69. }
  70. bool use_gpu = (runtime_option.device == Device::GPU);
  71. bool use_ipu = (runtime_option.device == Device::IPU);
  72. bool use_rknpu = (runtime_option.device == Device::RKNPU);
  73. bool use_horizon = (runtime_option.device == Device::SUNRISENPU);
  74. bool use_sophgotpu = (runtime_option.device == Device::SOPHGOTPUD);
  75. bool use_timvx = (runtime_option.device == Device::TIMVX);
  76. bool use_ascend = (runtime_option.device == Device::ASCEND);
  77. bool use_directml = (runtime_option.device == Device::DIRECTML);
  78. bool use_kunlunxin = (runtime_option.device == Device::KUNLUNXIN);
  79. if (use_gpu) {
  80. if (!IsSupported(valid_gpu_backends, runtime_option.backend)) {
  81. FDERROR << "The valid gpu backends of model " << ModelName() << " are "
  82. << Str(valid_gpu_backends) << ", " << runtime_option.backend
  83. << " is not supported." << std::endl;
  84. return false;
  85. }
  86. } else if (use_rknpu) {
  87. if (!IsSupported(valid_rknpu_backends, runtime_option.backend)) {
  88. FDERROR << "The valid rknpu backends of model " << ModelName() << " are "
  89. << Str(valid_rknpu_backends) << ", " << runtime_option.backend
  90. << " is not supported." << std::endl;
  91. return false;
  92. }
  93. } else if (use_horizon) {
  94. if (!IsSupported(valid_horizon_backends, runtime_option.backend)) {
  95. FDERROR << "The valid horizon backends of model " << ModelName()
  96. << " are " << Str(valid_horizon_backends) << ", "
  97. << runtime_option.backend << " is not supported." << std::endl;
  98. return false;
  99. }
  100. } else if (use_sophgotpu) {
  101. if (!IsSupported(valid_sophgonpu_backends, runtime_option.backend)) {
  102. FDERROR << "The valid sophgo backends of model " << ModelName() << " are "
  103. << Str(valid_sophgonpu_backends) << ", " << runtime_option.backend
  104. << " is not supported." << std::endl;
  105. return false;
  106. }
  107. } else if (use_timvx) {
  108. if (!IsSupported(valid_timvx_backends, runtime_option.backend)) {
  109. FDERROR << "The valid timvx backends of model " << ModelName() << " are "
  110. << Str(valid_timvx_backends) << ", " << runtime_option.backend
  111. << " is not supported." << std::endl;
  112. return false;
  113. }
  114. } else if (use_ascend) {
  115. if (!IsSupported(valid_ascend_backends, runtime_option.backend)) {
  116. FDERROR << "The valid ascend backends of model " << ModelName() << " are "
  117. << Str(valid_ascend_backends) << ", " << runtime_option.backend
  118. << " is not supported." << std::endl;
  119. return false;
  120. }
  121. } else if (use_directml) {
  122. if (!IsSupported(valid_directml_backends, runtime_option.backend)) {
  123. FDERROR << "The valid directml backends of model " << ModelName()
  124. << " are " << Str(valid_directml_backends) << ", "
  125. << runtime_option.backend << " is not supported." << std::endl;
  126. return false;
  127. }
  128. } else if (use_kunlunxin) {
  129. if (!IsSupported(valid_kunlunxin_backends, runtime_option.backend)) {
  130. FDERROR << "The valid kunlunxin backends of model " << ModelName()
  131. << " are " << Str(valid_kunlunxin_backends) << ", "
  132. << runtime_option.backend << " is not supported." << std::endl;
  133. return false;
  134. }
  135. } else if (use_ipu) {
  136. if (!IsSupported(valid_ipu_backends, runtime_option.backend)) {
  137. FDERROR << "The valid ipu backends of model " << ModelName() << " are "
  138. << Str(valid_ipu_backends) << ", " << runtime_option.backend
  139. << " is not supported." << std::endl;
  140. return false;
  141. }
  142. } else {
  143. if (!IsSupported(valid_cpu_backends, runtime_option.backend)) {
  144. FDERROR << "The valid cpu backends of model " << ModelName() << " are "
  145. << Str(valid_cpu_backends) << ", " << runtime_option.backend
  146. << " is not supported." << std::endl;
  147. return false;
  148. }
  149. }
  150. runtime_ = std::shared_ptr<Runtime>(new Runtime());
  151. if (!runtime_->Init(runtime_option)) {
  152. return false;
  153. }
  154. runtime_initialized_ = true;
  155. return true;
  156. }
  157. bool UltraInferModel::InitRuntimeWithSpecifiedDevice() {
  158. if (runtime_option.device == Device::CPU) {
  159. return CreateCpuBackend();
  160. } else if (runtime_option.device == Device::GPU) {
  161. #ifdef WITH_GPU
  162. return CreateGpuBackend();
  163. #else
  164. FDERROR << "The compiled UltraInfer library doesn't support GPU now."
  165. << std::endl;
  166. return false;
  167. #endif
  168. } else if (runtime_option.device == Device::RKNPU) {
  169. return CreateRKNPUBackend();
  170. } else if (runtime_option.device == Device::SUNRISENPU) {
  171. return CreateHorizonBackend();
  172. } else if (runtime_option.device == Device::TIMVX) {
  173. return CreateTimVXBackend();
  174. } else if (runtime_option.device == Device::ASCEND) {
  175. return CreateASCENDBackend();
  176. } else if (runtime_option.device == Device::DIRECTML) {
  177. return CreateDirectMLBackend();
  178. } else if (runtime_option.device == Device::KUNLUNXIN) {
  179. return CreateKunlunXinBackend();
  180. } else if (runtime_option.device == Device::SOPHGOTPUD) {
  181. return CreateSophgoNPUBackend();
  182. } else if (runtime_option.device == Device::IPU) {
  183. #ifdef WITH_IPU
  184. return CreateIpuBackend();
  185. #else
  186. FDERROR << "The compiled UltraInfer library doesn't support IPU now."
  187. << std::endl;
  188. return false;
  189. #endif
  190. }
  191. FDERROR << "Only support "
  192. "CPU/GPU/IPU/RKNPU/HORIZONNPU/TIMVX/KunlunXin/ASCEND/DirectML now."
  193. << std::endl;
  194. return false;
  195. }
  196. bool UltraInferModel::InitRuntime() {
  197. if (runtime_initialized_) {
  198. FDERROR << "The model is already initialized, cannot be initialized again."
  199. << std::endl;
  200. return false;
  201. }
  202. if (runtime_option.backend != Backend::UNKNOWN) {
  203. return InitRuntimeWithSpecifiedBackend();
  204. }
  205. return InitRuntimeWithSpecifiedDevice();
  206. }
  207. bool UltraInferModel::CreateCpuBackend() {
  208. if (valid_cpu_backends.size() == 0) {
  209. FDERROR << "There's no valid cpu backends for model: " << ModelName()
  210. << std::endl;
  211. return false;
  212. }
  213. for (size_t i = 0; i < valid_cpu_backends.size(); ++i) {
  214. if (!IsBackendAvailable(valid_cpu_backends[i])) {
  215. continue;
  216. }
  217. runtime_option.backend = valid_cpu_backends[i];
  218. runtime_ = std::shared_ptr<Runtime>(new Runtime());
  219. if (!runtime_->Init(runtime_option)) {
  220. return false;
  221. }
  222. runtime_initialized_ = true;
  223. return true;
  224. }
  225. FDERROR << "Found no valid backend for model: " << ModelName() << std::endl;
  226. return false;
  227. }
  228. bool UltraInferModel::CreateGpuBackend() {
  229. if (valid_gpu_backends.empty()) {
  230. FDERROR << "There's no valid gpu backends for model: " << ModelName()
  231. << std::endl;
  232. return false;
  233. }
  234. for (size_t i = 0; i < valid_gpu_backends.size(); ++i) {
  235. if (!IsBackendAvailable(valid_gpu_backends[i])) {
  236. continue;
  237. }
  238. runtime_option.backend = valid_gpu_backends[i];
  239. runtime_ = std::shared_ptr<Runtime>(new Runtime());
  240. if (!runtime_->Init(runtime_option)) {
  241. return false;
  242. }
  243. runtime_initialized_ = true;
  244. return true;
  245. }
  246. FDERROR << "Cannot find an available gpu backend to load this model."
  247. << std::endl;
  248. return false;
  249. }
  250. bool UltraInferModel::CreateRKNPUBackend() {
  251. if (valid_rknpu_backends.empty()) {
  252. FDERROR << "There's no valid npu backends for model: " << ModelName()
  253. << std::endl;
  254. return false;
  255. }
  256. for (size_t i = 0; i < valid_rknpu_backends.size(); ++i) {
  257. if (!IsBackendAvailable(valid_rknpu_backends[i])) {
  258. continue;
  259. }
  260. runtime_option.backend = valid_rknpu_backends[i];
  261. runtime_ = std::unique_ptr<Runtime>(new Runtime());
  262. if (!runtime_->Init(runtime_option)) {
  263. return false;
  264. }
  265. runtime_initialized_ = true;
  266. return true;
  267. }
  268. FDERROR << "Cannot find an available npu backend to load this model."
  269. << std::endl;
  270. return false;
  271. }
  272. bool UltraInferModel::CreateHorizonBackend() {
  273. if (valid_horizon_backends.empty()) {
  274. FDERROR << "There's no valid npu backends for model: " << ModelName()
  275. << std::endl;
  276. return false;
  277. }
  278. for (size_t i = 0; i < valid_horizon_backends.size(); ++i) {
  279. if (!IsBackendAvailable(valid_horizon_backends[i])) {
  280. continue;
  281. }
  282. runtime_option.backend = valid_horizon_backends[i];
  283. runtime_ = std::unique_ptr<Runtime>(new Runtime());
  284. if (!runtime_->Init(runtime_option)) {
  285. return false;
  286. }
  287. runtime_initialized_ = true;
  288. return true;
  289. }
  290. FDERROR << "Cannot find an available npu backend to load this model."
  291. << std::endl;
  292. return false;
  293. }
  294. bool UltraInferModel::CreateSophgoNPUBackend() {
  295. if (valid_sophgonpu_backends.empty()) {
  296. FDERROR << "There's no valid npu backends for model: " << ModelName()
  297. << std::endl;
  298. return false;
  299. }
  300. for (size_t i = 0; i < valid_sophgonpu_backends.size(); ++i) {
  301. if (!IsBackendAvailable(valid_sophgonpu_backends[i])) {
  302. continue;
  303. }
  304. runtime_option.backend = valid_sophgonpu_backends[i];
  305. runtime_ = std::unique_ptr<Runtime>(new Runtime());
  306. if (!runtime_->Init(runtime_option)) {
  307. return false;
  308. }
  309. runtime_initialized_ = true;
  310. return true;
  311. }
  312. FDERROR << "Cannot find an available npu backend to load this model."
  313. << std::endl;
  314. return false;
  315. }
  316. bool UltraInferModel::CreateTimVXBackend() {
  317. if (valid_timvx_backends.size() == 0) {
  318. FDERROR << "There's no valid timvx backends for model: " << ModelName()
  319. << std::endl;
  320. return false;
  321. }
  322. for (size_t i = 0; i < valid_timvx_backends.size(); ++i) {
  323. if (!IsBackendAvailable(valid_timvx_backends[i])) {
  324. continue;
  325. }
  326. runtime_option.backend = valid_timvx_backends[i];
  327. runtime_ = std::unique_ptr<Runtime>(new Runtime());
  328. if (!runtime_->Init(runtime_option)) {
  329. return false;
  330. }
  331. runtime_initialized_ = true;
  332. return true;
  333. }
  334. FDERROR << "Found no valid backend for model: " << ModelName() << std::endl;
  335. return false;
  336. }
  337. bool UltraInferModel::CreateKunlunXinBackend() {
  338. if (valid_kunlunxin_backends.size() == 0) {
  339. FDERROR << "There's no valid KunlunXin backends for model: " << ModelName()
  340. << std::endl;
  341. return false;
  342. }
  343. for (size_t i = 0; i < valid_kunlunxin_backends.size(); ++i) {
  344. if (!IsBackendAvailable(valid_kunlunxin_backends[i])) {
  345. continue;
  346. }
  347. runtime_option.backend = valid_kunlunxin_backends[i];
  348. runtime_ = std::unique_ptr<Runtime>(new Runtime());
  349. if (!runtime_->Init(runtime_option)) {
  350. return false;
  351. }
  352. runtime_initialized_ = true;
  353. return true;
  354. }
  355. FDERROR << "Found no valid backend for model: " << ModelName() << std::endl;
  356. return false;
  357. }
  358. bool UltraInferModel::CreateASCENDBackend() {
  359. if (valid_ascend_backends.size() == 0) {
  360. FDERROR << "There's no valid ascend backends for model: " << ModelName()
  361. << std::endl;
  362. return false;
  363. }
  364. for (size_t i = 0; i < valid_ascend_backends.size(); ++i) {
  365. if (!IsBackendAvailable(valid_ascend_backends[i])) {
  366. continue;
  367. }
  368. runtime_option.backend = valid_ascend_backends[i];
  369. runtime_ = std::unique_ptr<Runtime>(new Runtime());
  370. if (!runtime_->Init(runtime_option)) {
  371. return false;
  372. }
  373. runtime_initialized_ = true;
  374. return true;
  375. }
  376. FDERROR << "Found no valid backend for model: " << ModelName() << std::endl;
  377. return false;
  378. }
  379. bool UltraInferModel::CreateDirectMLBackend() {
  380. if (valid_directml_backends.size() == 0) {
  381. FDERROR << "There's no valid directml backends for model: " << ModelName()
  382. << std::endl;
  383. return false;
  384. }
  385. for (size_t i = 0; i < valid_directml_backends.size(); ++i) {
  386. if (!IsBackendAvailable(valid_directml_backends[i])) {
  387. continue;
  388. }
  389. runtime_option.backend = valid_directml_backends[i];
  390. runtime_ = std::unique_ptr<Runtime>(new Runtime());
  391. if (!runtime_->Init(runtime_option)) {
  392. return false;
  393. }
  394. runtime_initialized_ = true;
  395. return true;
  396. }
  397. FDERROR << "Found no valid directml backend for model: " << ModelName()
  398. << std::endl;
  399. return false;
  400. }
  401. bool UltraInferModel::CreateIpuBackend() {
  402. if (valid_ipu_backends.size() == 0) {
  403. FDERROR << "There's no valid ipu backends for model: " << ModelName()
  404. << std::endl;
  405. return false;
  406. }
  407. for (size_t i = 0; i < valid_ipu_backends.size(); ++i) {
  408. if (!IsBackendAvailable(valid_ipu_backends[i])) {
  409. continue;
  410. }
  411. runtime_option.backend = valid_ipu_backends[i];
  412. runtime_ = std::unique_ptr<Runtime>(new Runtime());
  413. if (!runtime_->Init(runtime_option)) {
  414. return false;
  415. }
  416. runtime_initialized_ = true;
  417. return true;
  418. }
  419. FDERROR << "Found no valid backend for model: " << ModelName() << std::endl;
  420. return false;
  421. }
  422. bool UltraInferModel::Infer(std::vector<FDTensor> &input_tensors,
  423. std::vector<FDTensor> *output_tensors) {
  424. TimeCounter tc;
  425. if (enable_record_time_of_runtime_) {
  426. tc.Start();
  427. }
  428. auto ret = runtime_->Infer(input_tensors, output_tensors);
  429. if (enable_record_time_of_runtime_) {
  430. tc.End();
  431. if (time_of_runtime_.size() > 50000) {
  432. FDWARNING << "There are already 50000 records of runtime, will force to "
  433. "disable record time of runtime now."
  434. << std::endl;
  435. enable_record_time_of_runtime_ = false;
  436. }
  437. time_of_runtime_.push_back(tc.Duration());
  438. }
  439. return ret;
  440. }
  441. bool UltraInferModel::Infer() {
  442. return Infer(reused_input_tensors_, &reused_output_tensors_);
  443. }
  444. std::map<std::string, float> UltraInferModel::PrintStatisInfoOfRuntime() {
  445. std::map<std::string, float> statis_info_of_runtime_dict;
  446. if (time_of_runtime_.size() < 10) {
  447. FDWARNING << "PrintStatisInfoOfRuntime require the runtime ran 10 times at "
  448. "least, but now you only ran "
  449. << time_of_runtime_.size() << " times." << std::endl;
  450. }
  451. double warmup_time = 0.0;
  452. double remain_time = 0.0;
  453. int warmup_iter = time_of_runtime_.size() / 5;
  454. for (size_t i = 0; i < time_of_runtime_.size(); ++i) {
  455. if (i < warmup_iter) {
  456. warmup_time += time_of_runtime_[i];
  457. } else {
  458. remain_time += time_of_runtime_[i];
  459. }
  460. }
  461. double avg_time = remain_time / (time_of_runtime_.size() - warmup_iter);
  462. std::cout << "============= Runtime Statis Info(" << ModelName()
  463. << ") =============" << std::endl;
  464. std::cout << "Total iterations: " << time_of_runtime_.size() << std::endl;
  465. std::cout << "Total time of runtime: " << warmup_time + remain_time << "s."
  466. << std::endl;
  467. std::cout << "Warmup iterations: " << warmup_iter << std::endl;
  468. std::cout << "Total time of runtime in warmup step: " << warmup_time << "s."
  469. << std::endl;
  470. std::cout << "Average time of runtime exclude warmup step: "
  471. << avg_time * 1000 << "ms." << std::endl;
  472. statis_info_of_runtime_dict["total_time"] = warmup_time + remain_time;
  473. statis_info_of_runtime_dict["warmup_time"] = warmup_time;
  474. statis_info_of_runtime_dict["remain_time"] = remain_time;
  475. statis_info_of_runtime_dict["warmup_iter"] = warmup_iter;
  476. statis_info_of_runtime_dict["avg_time"] = avg_time;
  477. statis_info_of_runtime_dict["iterations"] = time_of_runtime_.size();
  478. return statis_info_of_runtime_dict;
  479. }
  480. } // namespace ultra_infer