| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579 |
- // Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
- //
- // Licensed under the Apache License, Version 2.0 (the "License");
- // you may not use this file except in compliance with the License.
- // You may obtain a copy of the License at
- //
- // http://www.apache.org/licenses/LICENSE-2.0
- //
- // Unless required by applicable law or agreed to in writing, software
- // distributed under the License is distributed on an "AS IS" BASIS,
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- // See the License for the specific language governing permissions and
- // limitations under the License.
- #include "ultra_infer/runtime/backends/om/om_backend.h"
- #include "acl/acl.h"
- #include <chrono>
- #include <sys/stat.h>
- namespace ultra_infer {
- bool OmBackend::aclInitFlag = false;
- OmBackend::~OmBackend() {
- FreeInputBuffer();
- FreeOutputBuffer();
- DestroyInput();
- DestroyOutput();
- DestroyResource();
- }
- TensorInfo OmBackend::GetInputInfo(int index) {
- FDASSERT(index < NumInputs(),
- "The index: %d should less than the number of inputs: %d.", index,
- NumInputs());
- return inputs_desc_[index];
- }
- std::vector<TensorInfo> OmBackend::GetInputInfos() { return inputs_desc_; }
- TensorInfo OmBackend::GetOutputInfo(int index) {
- FDASSERT(index < NumOutputs(),
- "The index: %d should less than the number of outputs %d.", index,
- NumOutputs());
- return outputs_desc_[index];
- }
- std::vector<TensorInfo> OmBackend::GetOutputInfos() { return outputs_desc_; }
- bool OmBackend::Init(const RuntimeOption &runtime_option) {
- deviceId_ = runtime_option.device_id;
- // ACL init
- aclError ret = InitResource();
- if (ret != true) {
- FDERROR << "execute InitResource failed, errorCode = "
- << static_cast<int32_t>(ret);
- return false;
- }
- // model init;
- const char *omModelPath = (char *)runtime_option.model_file.data();
- FDINFO << "omModelPath = " << omModelPath;
- ret = LoadModel(omModelPath);
- if (ret != true) {
- FDERROR << "execute LoadModel failed";
- return false;
- }
- // build input/output info
- ret = CreateModelDesc();
- if (ret != true) {
- FDERROR << "execute CreateModelDesc failed";
- return false;
- }
- ret = CreateInput();
- if (ret != true) {
- FDERROR << "execute CreateInput failed";
- FreeInputBuffer();
- return false;
- }
- ret = CreateOutput();
- if (ret != true) {
- FDERROR << "execute CreateOutput failed";
- FreeInputBuffer();
- return false;
- }
- return true;
- }
- bool OmBackend::Infer(std::vector<FDTensor> &inputs,
- std::vector<FDTensor> *outputs, bool copy_to_fd) {
- // set context
- aclError aclRet = aclrtSetCurrentContext(context_);
- if (aclRet != ACL_SUCCESS) {
- FDERROR << "aclrtSetCurrentContext failed"
- << ", errorCode is " << static_cast<int32_t>(aclRet);
- return false;
- }
- // Judge whether the input and output size are the same
- if (inputs.size() != inputs_desc_.size()) {
- FDERROR << "[OmBackend] Size of the inputs(" << inputs.size()
- << ") should keep same with the inputs of this model("
- << inputs_desc_.size() << ")." << std::endl;
- FreeInputBuffer();
- return false;
- }
- // cp input tensor to inputBuffer
- for (size_t i = 0; i < inputs.size(); ++i) {
- if (inputs[i].Data() == nullptr) {
- FDERROR << "inputs[i].Data is NULL." << std::endl;
- return false;
- }
- size_t modelInputSize = aclmdlGetInputSizeByIndex(modelDesc_, i);
- aclRet = aclrtMemcpy(inputBuffer[i], modelInputSize, inputs[i].Data(),
- inputs[i].Nbytes(), ACL_MEMCPY_DEVICE_TO_DEVICE);
- if (aclRet != ACL_SUCCESS) {
- FDERROR << "memcpy d2d failed. buffer size is " << modelInputSize
- << ", inputs[i].Nbytes() is " << inputs[i].Nbytes()
- << ", errorCode is " << static_cast<int32_t>(aclRet);
- return false;
- }
- }
- bool ret = Execute();
- if (ret != true) {
- FDERROR << "execute inference failed";
- FreeInputBuffer();
- DestroyInput();
- DestroyOutput();
- return false;
- }
- // cp outputbuffer to outputs
- outputs->resize(outputs_desc_.size());
- std::vector<int64_t> temp_shape(4);
- for (size_t i = 0; i < outputs_desc_.size(); ++i) {
- temp_shape.resize(outputs_desc_[i].shape.size());
- for (int j = 0; j < outputs_desc_[i].shape.size(); ++j) {
- temp_shape[j] = outputs_desc_[i].shape[j];
- }
- (*outputs)[i].Resize(temp_shape, outputs_desc_[i].dtype,
- outputs_desc_[i].name);
- size_t modelOutputSize = aclmdlGetOutputSizeByIndex(modelDesc_, i);
- if (modelOutputSize != (*outputs)[i].Nbytes()) {
- FDERROR << "output size is not match, index: " << i
- << ", modelOutputSize:" << modelOutputSize
- << ", (*outputs)[i].Nbytes():" << (*outputs)[i].Nbytes();
- return false;
- }
- aclError aclRet = aclrtMemcpy(
- (*outputs)[i].MutableData(), (*outputs)[i].Nbytes(), outputBuffer[i],
- (*outputs)[i].Nbytes(), ACL_MEMCPY_DEVICE_TO_HOST);
- if (aclRet != ACL_SUCCESS) {
- FDERROR << "memcpy h2d failed. buffer size is " << (*outputs)[i].Nbytes()
- << ", errorCode is " << static_cast<int32_t>(aclRet);
- return false;
- }
- }
- return true;
- }
- bool OmBackend::InitResource() {
- // ACL init
- aclError ret;
- if (aclInitFlag == false) {
- ret = aclInit(NULL);
- if (ret != ACL_SUCCESS) {
- FDERROR << "acl init failed, errorCode = " << static_cast<int32_t>(ret);
- return false;
- }
- aclInitFlag = true;
- }
- // set device
- ret = aclrtSetDevice(deviceId_);
- if (ret != ACL_SUCCESS) {
- FDERROR << "acl set device" << deviceId_
- << " failed, errorCode = " << static_cast<int32_t>(ret);
- return false;
- }
- // create context (set current)
- ret = aclrtCreateContext(&context_, deviceId_);
- if (ret != ACL_SUCCESS) {
- FDERROR << "acl create context failed, deviceId" << deviceId_
- << ", errorCode = " << static_cast<int32_t>(ret);
- return false;
- }
- // create stream
- ret = aclrtCreateStream(&stream_);
- if (ret != ACL_SUCCESS) {
- FDERROR << "acl create stream failed, deviceId" << deviceId_
- << ", errorCode = " << static_cast<int32_t>(ret);
- return false;
- }
- // get run mode
- // runMode is ACL_HOST which represents app is running in host
- // runMode is ACL_DEVICE which represents app is running in device
- aclrtRunMode runMode;
- ret = aclrtGetRunMode(&runMode);
- if (ret != ACL_SUCCESS) {
- FDERROR << "acl get run mode failed, errorCode = "
- << static_cast<int32_t>(ret);
- return false;
- }
- return true;
- }
- bool OmBackend::LoadModel(const char *modelPath) {
- if (loadFlag_) {
- FDERROR << "model has already been loaded";
- return false;
- }
- aclError ret = aclmdlQuerySize(modelPath, &modelWorkSize_, &modelWeightSize_);
- if (ret != ACL_SUCCESS) {
- FDERROR << "query model false, model file is" << modelPath
- << ", errorCode is " << static_cast<int32_t>(ret);
- return false;
- }
- // using ACL_MEM_MALLOC_HUGE_FIRST to malloc memory, huge memory is preferred
- // to use and huge memory can improve performance.
- ret = aclrtMalloc(&modelWorkPtr_, modelWorkSize_, ACL_MEM_MALLOC_HUGE_FIRST);
- if (ret != ACL_SUCCESS) {
- FDERROR << "malloc buffer for work failed, require size is "
- << modelWorkSize_ << ", errorCode is " << static_cast<int32_t>(ret);
- return false;
- }
- // using ACL_MEM_MALLOC_HUGE_FIRST to malloc memory, huge memory is preferred
- // to use and huge memory can improve performance.
- ret = aclrtMalloc(&modelWeightPtr_, modelWeightSize_,
- ACL_MEM_MALLOC_HUGE_FIRST);
- if (ret != ACL_SUCCESS) {
- FDERROR << "malloc buffer for weight failed, require size is "
- << modelWeightSize_ << ", errorCode is "
- << static_cast<int32_t>(ret);
- return false;
- }
- ret = aclmdlLoadFromFileWithMem(modelPath, &modelId_, modelWorkPtr_,
- modelWorkSize_, modelWeightPtr_,
- modelWeightSize_);
- if (ret != ACL_SUCCESS) {
- FDERROR << "load model from file failed, model file is " << modelPath
- << ", errorCode is " << static_cast<int32_t>(ret);
- return false;
- }
- loadFlag_ = true;
- FDINFO << "load model " << modelPath << " success";
- return true;
- }
- bool OmBackend::Execute() {
- aclError ret = aclmdlExecute(modelId_, input_, output_);
- if (ret != ACL_SUCCESS) {
- FDERROR << "execute model failed, modelId is " << modelId_
- << ", errorCode is " << static_cast<int32_t>(ret);
- return false;
- }
- return true;
- }
- bool OmBackend::CreateModelDesc() {
- modelDesc_ = aclmdlCreateDesc();
- if (modelDesc_ == nullptr) {
- FDERROR << "create model description failed";
- return false;
- }
- aclError ret = aclmdlGetDesc(modelDesc_, modelId_);
- if (ret != ACL_SUCCESS) {
- FDERROR << "get model description failed, modelId is " << modelId_
- << ", errorCode is " << static_cast<int32_t>(ret);
- return false;
- }
- return true;
- }
- bool OmBackend::CreateInput() {
- // om used in this sample has only one input
- if (modelDesc_ == nullptr) {
- FDERROR << "no model description, create input failed";
- return false;
- }
- // input:aclmdlDataset
- input_ = aclmdlCreateDataset();
- if (input_ == nullptr) {
- FDERROR << "can't create dataset, create input failed";
- return false;
- }
- // get input nums
- size_t inputNum = aclmdlGetNumInputs(modelDesc_);
- inputs_desc_.resize(inputNum);
- inputBuffer.resize(inputNum, nullptr);
- // inputBuffer = {nullptr};
- for (size_t i = 0; i < inputNum; ++i) {
- // get input size
- size_t modelInputSize = aclmdlGetInputSizeByIndex(modelDesc_, i);
- aclError ret =
- aclrtMalloc(&inputBuffer[i], modelInputSize, ACL_MEM_MALLOC_HUGE_FIRST);
- if (ret != ACL_SUCCESS) {
- FDERROR << "can't malloc buffer, size is " << modelInputSize
- << ", errorCode is " << static_cast<int32_t>(ret);
- return false;
- }
- // inputData:aclDataBuffer
- aclDataBuffer *inputData =
- aclCreateDataBuffer(inputBuffer[i], modelInputSize);
- if (inputData == nullptr) {
- FDERROR << "can't create data buffer, create input failed";
- return false;
- }
- // add aclDataBuffer to input
- ret = aclmdlAddDatasetBuffer(input_, inputData);
- if (ret != ACL_SUCCESS) {
- FDERROR << "add input dataset buffer failed, errorCode is "
- << static_cast<int32_t>(ret);
- (void)aclDestroyDataBuffer(inputData);
- inputData = nullptr;
- return false;
- }
- // get name/shape/dtype of input to build inputs_desc_
- const char *name;
- name = aclmdlGetInputNameByIndex(modelDesc_, i);
- std::string temp_name = name;
- std::vector<int> temp_shape{};
- aclmdlIODims dims;
- ret = aclmdlGetInputDims(modelDesc_, i, &dims);
- if (ret != ACL_SUCCESS) {
- FDERROR << "get input tensor dims fail! ret=" << ret << std::endl;
- return false;
- }
- int n_dims = (int)dims.dimCount;
- temp_shape.resize(n_dims);
- for (int j = 0; j < n_dims; j++) {
- temp_shape[j] = (int)dims.dims[j];
- }
- aclDataType dtype = aclmdlGetInputDataType(modelDesc_, i);
- FDDataType temp_dtype;
- switch (dtype) {
- case ACL_BOOL:
- temp_dtype = FDDataType::BOOL;
- break;
- case ACL_UINT8:
- temp_dtype = FDDataType::UINT8;
- break;
- case ACL_INT8:
- temp_dtype = FDDataType::INT8;
- break;
- case ACL_INT16:
- temp_dtype = FDDataType::INT16;
- break;
- case ACL_INT32:
- temp_dtype = FDDataType::INT32;
- break;
- case ACL_INT64:
- temp_dtype = FDDataType::INT64;
- break;
- case ACL_FLOAT16:
- temp_dtype = FDDataType::FP16;
- break;
- case ACL_FLOAT:
- temp_dtype = FDDataType::FP32;
- break;
- case ACL_DOUBLE:
- temp_dtype = FDDataType::FP64;
- break;
- default:
- FDERROR << "unsupported input tensor dtype: " << (int)dtype;
- return false;
- }
- TensorInfo temp_input_info = {temp_name, temp_shape, temp_dtype};
- inputs_desc_[i] = temp_input_info;
- }
- return true;
- }
- bool OmBackend::CreateOutput() {
- if (modelDesc_ == nullptr) {
- FDERROR << "no model description, create output failed";
- return false;
- }
- output_ = aclmdlCreateDataset();
- if (output_ == nullptr) {
- FDERROR << "can't create dataset, create output failed";
- return false;
- }
- size_t outputSize = aclmdlGetNumOutputs(modelDesc_);
- outputs_desc_.resize(outputSize);
- outputBuffer.resize(outputSize, nullptr);
- for (size_t i = 0; i < outputSize; ++i) {
- size_t modelOutputSize = aclmdlGetOutputSizeByIndex(modelDesc_, i);
- aclError ret = aclrtMalloc(&outputBuffer[i], modelOutputSize,
- ACL_MEM_MALLOC_HUGE_FIRST);
- if (ret != ACL_SUCCESS) {
- FDERROR << "can't malloc buffer, size is " << modelOutputSize
- << ", errorCode is " << static_cast<int32_t>(ret);
- return false;
- }
- aclDataBuffer *outputData =
- aclCreateDataBuffer(outputBuffer[i], modelOutputSize);
- if (outputData == nullptr) {
- FDERROR << "can't create data buffer, create output failed";
- return false;
- }
- ret = aclmdlAddDatasetBuffer(output_, outputData);
- if (ret != ACL_SUCCESS) {
- FDERROR << "add output dataset buffer failed, errorCode is "
- << static_cast<int32_t>(ret);
- (void)aclDestroyDataBuffer(outputData);
- return false;
- }
- const char *name;
- name = aclmdlGetOutputNameByIndex(modelDesc_, i);
- std::string temp_name = name;
- std::vector<int> temp_shape{};
- aclmdlIODims dims;
- ret = aclmdlGetOutputDims(modelDesc_, i, &dims);
- if (ret != ACL_SUCCESS) {
- FDERROR << "get output tensor dims fail! ret=" << ret << std::endl;
- return false;
- }
- int n_dims = (int)dims.dimCount;
- temp_shape.resize(n_dims);
- for (int j = 0; j < n_dims; j++) {
- temp_shape[j] = (int)dims.dims[j];
- }
- aclDataType dtype = aclmdlGetOutputDataType(modelDesc_, i);
- FDDataType temp_dtype;
- switch (dtype) {
- case ACL_BOOL:
- temp_dtype = FDDataType::BOOL;
- break;
- case ACL_UINT8:
- temp_dtype = FDDataType::UINT8;
- break;
- case ACL_INT8:
- temp_dtype = FDDataType::INT8;
- break;
- case ACL_INT16:
- temp_dtype = FDDataType::INT16;
- break;
- case ACL_INT32:
- temp_dtype = FDDataType::INT32;
- break;
- case ACL_INT64:
- temp_dtype = FDDataType::INT64;
- break;
- case ACL_FLOAT16:
- temp_dtype = FDDataType::FP16;
- break;
- case ACL_FLOAT:
- temp_dtype = FDDataType::FP32;
- break;
- case ACL_DOUBLE:
- temp_dtype = FDDataType::FP64;
- break;
- default:
- FDERROR << "unsupported output tensor dtype: " << (int)dtype;
- return false;
- }
- TensorInfo temp_output_info = {temp_name, temp_shape, temp_dtype};
- outputs_desc_[i] = temp_output_info;
- }
- return true;
- }
- void OmBackend::FreeInputBuffer() {
- for (int i = 0; i < (int)inputs_desc_.size(); ++i) {
- if (inputBuffer[i] != nullptr) {
- (void)aclrtFree(inputBuffer[i]);
- inputBuffer[i] = nullptr;
- }
- }
- }
- void OmBackend::FreeOutputBuffer() {
- for (int i = 0; i < (int)outputs_desc_.size(); ++i) {
- if (outputBuffer[i] != nullptr) {
- (void)aclrtFree(outputBuffer[i]);
- outputBuffer[i] = nullptr;
- }
- }
- }
- void OmBackend::DestroyInput() {
- if (input_ == nullptr) {
- return;
- }
- for (size_t i = 0; i < aclmdlGetDatasetNumBuffers(input_); ++i) {
- aclDataBuffer *dataBuffer = aclmdlGetDatasetBuffer(input_, i);
- (void)aclDestroyDataBuffer(dataBuffer);
- }
- (void)aclmdlDestroyDataset(input_);
- input_ = nullptr;
- }
- void OmBackend::DestroyOutput() {
- if (output_ == nullptr) {
- return;
- }
- for (size_t i = 0; i < aclmdlGetDatasetNumBuffers(output_); ++i) {
- aclDataBuffer *dataBuffer = aclmdlGetDatasetBuffer(output_, i);
- void *data = aclGetDataBufferAddr(dataBuffer);
- (void)aclrtFree(data);
- (void)aclDestroyDataBuffer(dataBuffer);
- }
- (void)aclmdlDestroyDataset(output_);
- output_ = nullptr;
- }
- void OmBackend::DestroyResource() {
- // set context
- aclError ret = aclrtSetCurrentContext(context_);
- if (ret != ACL_SUCCESS) {
- FDERROR << "aclrtSetCurrentContext failed"
- << ", errorCode is " << static_cast<int32_t>(ret);
- return;
- }
- if (stream_ != nullptr) {
- ret = aclrtDestroyStream(stream_);
- if (ret != ACL_SUCCESS) {
- FDERROR << "destroy stream failed, errorCode = "
- << static_cast<int32_t>(ret);
- }
- stream_ = nullptr;
- }
- if (context_ != nullptr) {
- ret = aclrtDestroyContext(context_);
- if (ret != ACL_SUCCESS) {
- FDERROR << "destroy context failed, errorCode = "
- << static_cast<int32_t>(ret);
- }
- context_ = nullptr;
- }
- ret = aclrtResetDevice(deviceId_);
- if (ret != ACL_SUCCESS) {
- FDERROR << "reset device " << deviceId_
- << " failed, errorCode = " << static_cast<int32_t>(ret);
- }
- if (aclInitFlag == true) {
- ret = aclFinalize();
- if (ret != ACL_SUCCESS) {
- FDERROR << "finalize acl failed, errorCode = "
- << static_cast<int32_t>(ret);
- }
- aclInitFlag = false;
- }
- }
- } // namespace ultra_infer
|