om_backend.cc 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579
  1. // Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. #include "ultra_infer/runtime/backends/om/om_backend.h"
  15. #include "acl/acl.h"
  16. #include <chrono>
  17. #include <sys/stat.h>
  18. namespace ultra_infer {
  19. bool OmBackend::aclInitFlag = false;
  20. OmBackend::~OmBackend() {
  21. FreeInputBuffer();
  22. FreeOutputBuffer();
  23. DestroyInput();
  24. DestroyOutput();
  25. DestroyResource();
  26. }
  27. TensorInfo OmBackend::GetInputInfo(int index) {
  28. FDASSERT(index < NumInputs(),
  29. "The index: %d should less than the number of inputs: %d.", index,
  30. NumInputs());
  31. return inputs_desc_[index];
  32. }
  33. std::vector<TensorInfo> OmBackend::GetInputInfos() { return inputs_desc_; }
  34. TensorInfo OmBackend::GetOutputInfo(int index) {
  35. FDASSERT(index < NumOutputs(),
  36. "The index: %d should less than the number of outputs %d.", index,
  37. NumOutputs());
  38. return outputs_desc_[index];
  39. }
  40. std::vector<TensorInfo> OmBackend::GetOutputInfos() { return outputs_desc_; }
  41. bool OmBackend::Init(const RuntimeOption &runtime_option) {
  42. deviceId_ = runtime_option.device_id;
  43. // ACL init
  44. aclError ret = InitResource();
  45. if (ret != true) {
  46. FDERROR << "execute InitResource failed, errorCode = "
  47. << static_cast<int32_t>(ret);
  48. return false;
  49. }
  50. // model init;
  51. const char *omModelPath = (char *)runtime_option.model_file.data();
  52. FDINFO << "omModelPath = " << omModelPath;
  53. ret = LoadModel(omModelPath);
  54. if (ret != true) {
  55. FDERROR << "execute LoadModel failed";
  56. return false;
  57. }
  58. // build input/output info
  59. ret = CreateModelDesc();
  60. if (ret != true) {
  61. FDERROR << "execute CreateModelDesc failed";
  62. return false;
  63. }
  64. ret = CreateInput();
  65. if (ret != true) {
  66. FDERROR << "execute CreateInput failed";
  67. FreeInputBuffer();
  68. return false;
  69. }
  70. ret = CreateOutput();
  71. if (ret != true) {
  72. FDERROR << "execute CreateOutput failed";
  73. FreeInputBuffer();
  74. return false;
  75. }
  76. return true;
  77. }
  78. bool OmBackend::Infer(std::vector<FDTensor> &inputs,
  79. std::vector<FDTensor> *outputs, bool copy_to_fd) {
  80. // set context
  81. aclError aclRet = aclrtSetCurrentContext(context_);
  82. if (aclRet != ACL_SUCCESS) {
  83. FDERROR << "aclrtSetCurrentContext failed"
  84. << ", errorCode is " << static_cast<int32_t>(aclRet);
  85. return false;
  86. }
  87. // Judge whether the input and output size are the same
  88. if (inputs.size() != inputs_desc_.size()) {
  89. FDERROR << "[OmBackend] Size of the inputs(" << inputs.size()
  90. << ") should keep same with the inputs of this model("
  91. << inputs_desc_.size() << ")." << std::endl;
  92. FreeInputBuffer();
  93. return false;
  94. }
  95. // cp input tensor to inputBuffer
  96. for (size_t i = 0; i < inputs.size(); ++i) {
  97. if (inputs[i].Data() == nullptr) {
  98. FDERROR << "inputs[i].Data is NULL." << std::endl;
  99. return false;
  100. }
  101. size_t modelInputSize = aclmdlGetInputSizeByIndex(modelDesc_, i);
  102. aclRet = aclrtMemcpy(inputBuffer[i], modelInputSize, inputs[i].Data(),
  103. inputs[i].Nbytes(), ACL_MEMCPY_DEVICE_TO_DEVICE);
  104. if (aclRet != ACL_SUCCESS) {
  105. FDERROR << "memcpy d2d failed. buffer size is " << modelInputSize
  106. << ", inputs[i].Nbytes() is " << inputs[i].Nbytes()
  107. << ", errorCode is " << static_cast<int32_t>(aclRet);
  108. return false;
  109. }
  110. }
  111. bool ret = Execute();
  112. if (ret != true) {
  113. FDERROR << "execute inference failed";
  114. FreeInputBuffer();
  115. DestroyInput();
  116. DestroyOutput();
  117. return false;
  118. }
  119. // cp outputbuffer to outputs
  120. outputs->resize(outputs_desc_.size());
  121. std::vector<int64_t> temp_shape(4);
  122. for (size_t i = 0; i < outputs_desc_.size(); ++i) {
  123. temp_shape.resize(outputs_desc_[i].shape.size());
  124. for (int j = 0; j < outputs_desc_[i].shape.size(); ++j) {
  125. temp_shape[j] = outputs_desc_[i].shape[j];
  126. }
  127. (*outputs)[i].Resize(temp_shape, outputs_desc_[i].dtype,
  128. outputs_desc_[i].name);
  129. size_t modelOutputSize = aclmdlGetOutputSizeByIndex(modelDesc_, i);
  130. if (modelOutputSize != (*outputs)[i].Nbytes()) {
  131. FDERROR << "output size is not match, index: " << i
  132. << ", modelOutputSize:" << modelOutputSize
  133. << ", (*outputs)[i].Nbytes():" << (*outputs)[i].Nbytes();
  134. return false;
  135. }
  136. aclError aclRet = aclrtMemcpy(
  137. (*outputs)[i].MutableData(), (*outputs)[i].Nbytes(), outputBuffer[i],
  138. (*outputs)[i].Nbytes(), ACL_MEMCPY_DEVICE_TO_HOST);
  139. if (aclRet != ACL_SUCCESS) {
  140. FDERROR << "memcpy h2d failed. buffer size is " << (*outputs)[i].Nbytes()
  141. << ", errorCode is " << static_cast<int32_t>(aclRet);
  142. return false;
  143. }
  144. }
  145. return true;
  146. }
  147. bool OmBackend::InitResource() {
  148. // ACL init
  149. aclError ret;
  150. if (aclInitFlag == false) {
  151. ret = aclInit(NULL);
  152. if (ret != ACL_SUCCESS) {
  153. FDERROR << "acl init failed, errorCode = " << static_cast<int32_t>(ret);
  154. return false;
  155. }
  156. aclInitFlag = true;
  157. }
  158. // set device
  159. ret = aclrtSetDevice(deviceId_);
  160. if (ret != ACL_SUCCESS) {
  161. FDERROR << "acl set device" << deviceId_
  162. << " failed, errorCode = " << static_cast<int32_t>(ret);
  163. return false;
  164. }
  165. // create context (set current)
  166. ret = aclrtCreateContext(&context_, deviceId_);
  167. if (ret != ACL_SUCCESS) {
  168. FDERROR << "acl create context failed, deviceId" << deviceId_
  169. << ", errorCode = " << static_cast<int32_t>(ret);
  170. return false;
  171. }
  172. // create stream
  173. ret = aclrtCreateStream(&stream_);
  174. if (ret != ACL_SUCCESS) {
  175. FDERROR << "acl create stream failed, deviceId" << deviceId_
  176. << ", errorCode = " << static_cast<int32_t>(ret);
  177. return false;
  178. }
  179. // get run mode
  180. // runMode is ACL_HOST which represents app is running in host
  181. // runMode is ACL_DEVICE which represents app is running in device
  182. aclrtRunMode runMode;
  183. ret = aclrtGetRunMode(&runMode);
  184. if (ret != ACL_SUCCESS) {
  185. FDERROR << "acl get run mode failed, errorCode = "
  186. << static_cast<int32_t>(ret);
  187. return false;
  188. }
  189. return true;
  190. }
  191. bool OmBackend::LoadModel(const char *modelPath) {
  192. if (loadFlag_) {
  193. FDERROR << "model has already been loaded";
  194. return false;
  195. }
  196. aclError ret = aclmdlQuerySize(modelPath, &modelWorkSize_, &modelWeightSize_);
  197. if (ret != ACL_SUCCESS) {
  198. FDERROR << "query model false, model file is" << modelPath
  199. << ", errorCode is " << static_cast<int32_t>(ret);
  200. return false;
  201. }
  202. // using ACL_MEM_MALLOC_HUGE_FIRST to malloc memory, huge memory is preferred
  203. // to use and huge memory can improve performance.
  204. ret = aclrtMalloc(&modelWorkPtr_, modelWorkSize_, ACL_MEM_MALLOC_HUGE_FIRST);
  205. if (ret != ACL_SUCCESS) {
  206. FDERROR << "malloc buffer for work failed, require size is "
  207. << modelWorkSize_ << ", errorCode is " << static_cast<int32_t>(ret);
  208. return false;
  209. }
  210. // using ACL_MEM_MALLOC_HUGE_FIRST to malloc memory, huge memory is preferred
  211. // to use and huge memory can improve performance.
  212. ret = aclrtMalloc(&modelWeightPtr_, modelWeightSize_,
  213. ACL_MEM_MALLOC_HUGE_FIRST);
  214. if (ret != ACL_SUCCESS) {
  215. FDERROR << "malloc buffer for weight failed, require size is "
  216. << modelWeightSize_ << ", errorCode is "
  217. << static_cast<int32_t>(ret);
  218. return false;
  219. }
  220. ret = aclmdlLoadFromFileWithMem(modelPath, &modelId_, modelWorkPtr_,
  221. modelWorkSize_, modelWeightPtr_,
  222. modelWeightSize_);
  223. if (ret != ACL_SUCCESS) {
  224. FDERROR << "load model from file failed, model file is " << modelPath
  225. << ", errorCode is " << static_cast<int32_t>(ret);
  226. return false;
  227. }
  228. loadFlag_ = true;
  229. FDINFO << "load model " << modelPath << " success";
  230. return true;
  231. }
  232. bool OmBackend::Execute() {
  233. aclError ret = aclmdlExecute(modelId_, input_, output_);
  234. if (ret != ACL_SUCCESS) {
  235. FDERROR << "execute model failed, modelId is " << modelId_
  236. << ", errorCode is " << static_cast<int32_t>(ret);
  237. return false;
  238. }
  239. return true;
  240. }
  241. bool OmBackend::CreateModelDesc() {
  242. modelDesc_ = aclmdlCreateDesc();
  243. if (modelDesc_ == nullptr) {
  244. FDERROR << "create model description failed";
  245. return false;
  246. }
  247. aclError ret = aclmdlGetDesc(modelDesc_, modelId_);
  248. if (ret != ACL_SUCCESS) {
  249. FDERROR << "get model description failed, modelId is " << modelId_
  250. << ", errorCode is " << static_cast<int32_t>(ret);
  251. return false;
  252. }
  253. return true;
  254. }
  255. bool OmBackend::CreateInput() {
  256. // om used in this sample has only one input
  257. if (modelDesc_ == nullptr) {
  258. FDERROR << "no model description, create input failed";
  259. return false;
  260. }
  261. // input:aclmdlDataset
  262. input_ = aclmdlCreateDataset();
  263. if (input_ == nullptr) {
  264. FDERROR << "can't create dataset, create input failed";
  265. return false;
  266. }
  267. // get input nums
  268. size_t inputNum = aclmdlGetNumInputs(modelDesc_);
  269. inputs_desc_.resize(inputNum);
  270. inputBuffer.resize(inputNum, nullptr);
  271. // inputBuffer = {nullptr};
  272. for (size_t i = 0; i < inputNum; ++i) {
  273. // get input size
  274. size_t modelInputSize = aclmdlGetInputSizeByIndex(modelDesc_, i);
  275. aclError ret =
  276. aclrtMalloc(&inputBuffer[i], modelInputSize, ACL_MEM_MALLOC_HUGE_FIRST);
  277. if (ret != ACL_SUCCESS) {
  278. FDERROR << "can't malloc buffer, size is " << modelInputSize
  279. << ", errorCode is " << static_cast<int32_t>(ret);
  280. return false;
  281. }
  282. // inputData:aclDataBuffer
  283. aclDataBuffer *inputData =
  284. aclCreateDataBuffer(inputBuffer[i], modelInputSize);
  285. if (inputData == nullptr) {
  286. FDERROR << "can't create data buffer, create input failed";
  287. return false;
  288. }
  289. // add aclDataBuffer to input
  290. ret = aclmdlAddDatasetBuffer(input_, inputData);
  291. if (ret != ACL_SUCCESS) {
  292. FDERROR << "add input dataset buffer failed, errorCode is "
  293. << static_cast<int32_t>(ret);
  294. (void)aclDestroyDataBuffer(inputData);
  295. inputData = nullptr;
  296. return false;
  297. }
  298. // get name/shape/dtype of input to build inputs_desc_
  299. const char *name;
  300. name = aclmdlGetInputNameByIndex(modelDesc_, i);
  301. std::string temp_name = name;
  302. std::vector<int> temp_shape{};
  303. aclmdlIODims dims;
  304. ret = aclmdlGetInputDims(modelDesc_, i, &dims);
  305. if (ret != ACL_SUCCESS) {
  306. FDERROR << "get input tensor dims fail! ret=" << ret << std::endl;
  307. return false;
  308. }
  309. int n_dims = (int)dims.dimCount;
  310. temp_shape.resize(n_dims);
  311. for (int j = 0; j < n_dims; j++) {
  312. temp_shape[j] = (int)dims.dims[j];
  313. }
  314. aclDataType dtype = aclmdlGetInputDataType(modelDesc_, i);
  315. FDDataType temp_dtype;
  316. switch (dtype) {
  317. case ACL_BOOL:
  318. temp_dtype = FDDataType::BOOL;
  319. break;
  320. case ACL_UINT8:
  321. temp_dtype = FDDataType::UINT8;
  322. break;
  323. case ACL_INT8:
  324. temp_dtype = FDDataType::INT8;
  325. break;
  326. case ACL_INT16:
  327. temp_dtype = FDDataType::INT16;
  328. break;
  329. case ACL_INT32:
  330. temp_dtype = FDDataType::INT32;
  331. break;
  332. case ACL_INT64:
  333. temp_dtype = FDDataType::INT64;
  334. break;
  335. case ACL_FLOAT16:
  336. temp_dtype = FDDataType::FP16;
  337. break;
  338. case ACL_FLOAT:
  339. temp_dtype = FDDataType::FP32;
  340. break;
  341. case ACL_DOUBLE:
  342. temp_dtype = FDDataType::FP64;
  343. break;
  344. default:
  345. FDERROR << "unsupported input tensor dtype: " << (int)dtype;
  346. return false;
  347. }
  348. TensorInfo temp_input_info = {temp_name, temp_shape, temp_dtype};
  349. inputs_desc_[i] = temp_input_info;
  350. }
  351. return true;
  352. }
  353. bool OmBackend::CreateOutput() {
  354. if (modelDesc_ == nullptr) {
  355. FDERROR << "no model description, create output failed";
  356. return false;
  357. }
  358. output_ = aclmdlCreateDataset();
  359. if (output_ == nullptr) {
  360. FDERROR << "can't create dataset, create output failed";
  361. return false;
  362. }
  363. size_t outputSize = aclmdlGetNumOutputs(modelDesc_);
  364. outputs_desc_.resize(outputSize);
  365. outputBuffer.resize(outputSize, nullptr);
  366. for (size_t i = 0; i < outputSize; ++i) {
  367. size_t modelOutputSize = aclmdlGetOutputSizeByIndex(modelDesc_, i);
  368. aclError ret = aclrtMalloc(&outputBuffer[i], modelOutputSize,
  369. ACL_MEM_MALLOC_HUGE_FIRST);
  370. if (ret != ACL_SUCCESS) {
  371. FDERROR << "can't malloc buffer, size is " << modelOutputSize
  372. << ", errorCode is " << static_cast<int32_t>(ret);
  373. return false;
  374. }
  375. aclDataBuffer *outputData =
  376. aclCreateDataBuffer(outputBuffer[i], modelOutputSize);
  377. if (outputData == nullptr) {
  378. FDERROR << "can't create data buffer, create output failed";
  379. return false;
  380. }
  381. ret = aclmdlAddDatasetBuffer(output_, outputData);
  382. if (ret != ACL_SUCCESS) {
  383. FDERROR << "add output dataset buffer failed, errorCode is "
  384. << static_cast<int32_t>(ret);
  385. (void)aclDestroyDataBuffer(outputData);
  386. return false;
  387. }
  388. const char *name;
  389. name = aclmdlGetOutputNameByIndex(modelDesc_, i);
  390. std::string temp_name = name;
  391. std::vector<int> temp_shape{};
  392. aclmdlIODims dims;
  393. ret = aclmdlGetOutputDims(modelDesc_, i, &dims);
  394. if (ret != ACL_SUCCESS) {
  395. FDERROR << "get output tensor dims fail! ret=" << ret << std::endl;
  396. return false;
  397. }
  398. int n_dims = (int)dims.dimCount;
  399. temp_shape.resize(n_dims);
  400. for (int j = 0; j < n_dims; j++) {
  401. temp_shape[j] = (int)dims.dims[j];
  402. }
  403. aclDataType dtype = aclmdlGetOutputDataType(modelDesc_, i);
  404. FDDataType temp_dtype;
  405. switch (dtype) {
  406. case ACL_BOOL:
  407. temp_dtype = FDDataType::BOOL;
  408. break;
  409. case ACL_UINT8:
  410. temp_dtype = FDDataType::UINT8;
  411. break;
  412. case ACL_INT8:
  413. temp_dtype = FDDataType::INT8;
  414. break;
  415. case ACL_INT16:
  416. temp_dtype = FDDataType::INT16;
  417. break;
  418. case ACL_INT32:
  419. temp_dtype = FDDataType::INT32;
  420. break;
  421. case ACL_INT64:
  422. temp_dtype = FDDataType::INT64;
  423. break;
  424. case ACL_FLOAT16:
  425. temp_dtype = FDDataType::FP16;
  426. break;
  427. case ACL_FLOAT:
  428. temp_dtype = FDDataType::FP32;
  429. break;
  430. case ACL_DOUBLE:
  431. temp_dtype = FDDataType::FP64;
  432. break;
  433. default:
  434. FDERROR << "unsupported output tensor dtype: " << (int)dtype;
  435. return false;
  436. }
  437. TensorInfo temp_output_info = {temp_name, temp_shape, temp_dtype};
  438. outputs_desc_[i] = temp_output_info;
  439. }
  440. return true;
  441. }
  442. void OmBackend::FreeInputBuffer() {
  443. for (int i = 0; i < (int)inputs_desc_.size(); ++i) {
  444. if (inputBuffer[i] != nullptr) {
  445. (void)aclrtFree(inputBuffer[i]);
  446. inputBuffer[i] = nullptr;
  447. }
  448. }
  449. }
  450. void OmBackend::FreeOutputBuffer() {
  451. for (int i = 0; i < (int)outputs_desc_.size(); ++i) {
  452. if (outputBuffer[i] != nullptr) {
  453. (void)aclrtFree(outputBuffer[i]);
  454. outputBuffer[i] = nullptr;
  455. }
  456. }
  457. }
  458. void OmBackend::DestroyInput() {
  459. if (input_ == nullptr) {
  460. return;
  461. }
  462. for (size_t i = 0; i < aclmdlGetDatasetNumBuffers(input_); ++i) {
  463. aclDataBuffer *dataBuffer = aclmdlGetDatasetBuffer(input_, i);
  464. (void)aclDestroyDataBuffer(dataBuffer);
  465. }
  466. (void)aclmdlDestroyDataset(input_);
  467. input_ = nullptr;
  468. }
  469. void OmBackend::DestroyOutput() {
  470. if (output_ == nullptr) {
  471. return;
  472. }
  473. for (size_t i = 0; i < aclmdlGetDatasetNumBuffers(output_); ++i) {
  474. aclDataBuffer *dataBuffer = aclmdlGetDatasetBuffer(output_, i);
  475. void *data = aclGetDataBufferAddr(dataBuffer);
  476. (void)aclrtFree(data);
  477. (void)aclDestroyDataBuffer(dataBuffer);
  478. }
  479. (void)aclmdlDestroyDataset(output_);
  480. output_ = nullptr;
  481. }
  482. void OmBackend::DestroyResource() {
  483. // set context
  484. aclError ret = aclrtSetCurrentContext(context_);
  485. if (ret != ACL_SUCCESS) {
  486. FDERROR << "aclrtSetCurrentContext failed"
  487. << ", errorCode is " << static_cast<int32_t>(ret);
  488. return;
  489. }
  490. if (stream_ != nullptr) {
  491. ret = aclrtDestroyStream(stream_);
  492. if (ret != ACL_SUCCESS) {
  493. FDERROR << "destroy stream failed, errorCode = "
  494. << static_cast<int32_t>(ret);
  495. }
  496. stream_ = nullptr;
  497. }
  498. if (context_ != nullptr) {
  499. ret = aclrtDestroyContext(context_);
  500. if (ret != ACL_SUCCESS) {
  501. FDERROR << "destroy context failed, errorCode = "
  502. << static_cast<int32_t>(ret);
  503. }
  504. context_ = nullptr;
  505. }
  506. ret = aclrtResetDevice(deviceId_);
  507. if (ret != ACL_SUCCESS) {
  508. FDERROR << "reset device " << deviceId_
  509. << " failed, errorCode = " << static_cast<int32_t>(ret);
  510. }
  511. if (aclInitFlag == true) {
  512. ret = aclFinalize();
  513. if (ret != ACL_SUCCESS) {
  514. FDERROR << "finalize acl failed, errorCode = "
  515. << static_cast<int32_t>(ret);
  516. }
  517. aclInitFlag = false;
  518. }
  519. }
  520. } // namespace ultra_infer