sophgo_backend.cc 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304
  1. // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. #include "ultra_infer/runtime/backends/sophgo/sophgo_backend.h"
  15. #include <assert.h>
  16. namespace ultra_infer {
  17. SophgoBackend::~SophgoBackend() { bm_dev_free(handle_); }
  18. /***************************************************************
  19. * @name GetSDKAndDeviceVersion
  20. * @brief get Sophgo sdk and device version
  21. * @param None
  22. * @return bool
  23. * @note None
  24. ***************************************************************/
  25. bool SophgoBackend::GetSDKAndDeviceVersion() { return true; }
  26. /***************************************************************
  27. * @name Init
  28. * @brief Initialize Sophgo model
  29. * @param model_file: Binary data for the Sophgo model.
  30. * params_file: None
  31. * option: config
  32. * @return bool
  33. * @note None
  34. ***************************************************************/
  35. bool SophgoBackend::Init(const RuntimeOption &option) {
  36. if (option.model_from_memory_) {
  37. FDERROR << "SophgoBackend doesn't support load model from memory, please "
  38. "load model from disk."
  39. << std::endl;
  40. return false;
  41. }
  42. if (option.model_format != ModelFormat::SOPHGO) {
  43. FDERROR << "SophgoBackend only supports model format SOPHGO, but now it's "
  44. << option.model_format << "." << std::endl;
  45. return false;
  46. }
  47. if (option.device != Device::SOPHGOTPUD) {
  48. FDERROR << "SophgoBackend only supports device::SOPHGOTPUD, but now it's "
  49. << option.device << "." << std::endl;
  50. return false;
  51. }
  52. std::string model_file = option.model_file;
  53. // LoadModel
  54. if (!this->LoadModel((char *)model_file.data())) {
  55. FDERROR << "load model failed" << std::endl;
  56. return false;
  57. }
  58. // GetSDKAndDeviceVersion
  59. if (!this->GetSDKAndDeviceVersion()) {
  60. FDERROR << "get SDK and device version failed" << std::endl;
  61. return false;
  62. }
  63. // GetModelInputOutputInfos
  64. if (!this->GetModelInputOutputInfos()) {
  65. FDERROR << "get model input output infos failed" << std::endl;
  66. return false;
  67. }
  68. return true;
  69. }
  70. /***************************************************************
  71. * @name LoadModel
  72. * @brief read Sophgo bmodel
  73. * @param model: Binary data for the Sophgo model.
  74. * @return bool
  75. * @note None
  76. ***************************************************************/
  77. bool SophgoBackend::LoadModel(void *model) {
  78. unsigned int card_num = 0;
  79. bm_status_t status = bm_get_card_num(&card_num);
  80. status = bm_dev_request(&handle_, 0);
  81. p_bmrt_ = bmrt_create(handle_);
  82. assert(NULL != p_bmrt_);
  83. bool load_status = bmrt_load_bmodel(p_bmrt_, (char *)model);
  84. assert(load_status);
  85. int network_num = bmrt_get_network_number(p_bmrt_);
  86. const char **net_names = NULL;
  87. bmrt_get_network_names(p_bmrt_, &net_names);
  88. net_name_ = net_names[0];
  89. free(net_names);
  90. net_info_ = bmrt_get_network_info(p_bmrt_, net_name_.c_str());
  91. assert(NULL != net_info_);
  92. return true;
  93. }
  94. /***************************************************************
  95. * @name GetModelInputOutputInfos
  96. * @brief Get the detailed input and output infos of Model
  97. * @param None
  98. * @return bool
  99. * @note None
  100. ***************************************************************/
  101. bool SophgoBackend::GetModelInputOutputInfos() {
  102. inputs_desc_.resize(net_info_->input_num);
  103. bm_shape_t *input_shapes = net_info_->stages->input_shapes;
  104. for (int idx = 0; idx < net_info_->input_num; idx++) {
  105. std::string temp_name = (net_info_->input_names)[idx];
  106. std::vector<int> temp_shape{};
  107. temp_shape.resize(input_shapes[idx].num_dims);
  108. for (int i = 0; i < input_shapes[idx].num_dims; i++) {
  109. temp_shape[i] = input_shapes[idx].dims[i];
  110. }
  111. bm_data_type_t *input_dtypes = net_info_->input_dtypes;
  112. // SophgoType to FDDataType
  113. FDDataType temp_dtype = SophgoTensorTypeToFDDataType(*input_dtypes);
  114. TensorInfo temp_input_info = {temp_name, temp_shape, temp_dtype};
  115. inputs_desc_[idx] = temp_input_info;
  116. }
  117. outputs_desc_.resize(net_info_->output_num);
  118. bm_shape_t *output_shapes = net_info_->stages->output_shapes;
  119. for (int idx = 0; idx < net_info_->output_num; idx++) {
  120. std::string temp_name1 = (net_info_->output_names)[idx];
  121. std::vector<int> temp_shape1{};
  122. temp_shape1.resize(output_shapes[idx].num_dims);
  123. for (int i = 0; i < output_shapes[idx].num_dims; i++) {
  124. temp_shape1[i] = output_shapes[idx].dims[i];
  125. }
  126. bm_data_type_t *output_dtypes = net_info_->output_dtypes;
  127. // SophgoType to FDDataType
  128. FDDataType temp_dtype1 = SophgoTensorTypeToFDDataType(*output_dtypes);
  129. TensorInfo temp_output_info = {temp_name1, temp_shape1, temp_dtype1};
  130. outputs_desc_[idx] = temp_output_info;
  131. }
  132. return true;
  133. }
  134. TensorInfo SophgoBackend::GetInputInfo(int index) {
  135. FDASSERT(index < NumInputs(),
  136. "The index: %d should less than the number of inputs: %d.", index,
  137. NumInputs())
  138. return inputs_desc_[index];
  139. }
  140. std::vector<TensorInfo> SophgoBackend::GetInputInfos() { return inputs_desc_; }
  141. TensorInfo SophgoBackend::GetOutputInfo(int index) {
  142. FDASSERT(index < NumOutputs(),
  143. "The index: %d should less than the number of outputs %d.", index,
  144. NumOutputs())
  145. return outputs_desc_[index];
  146. }
  147. std::vector<TensorInfo> SophgoBackend::GetOutputInfos() {
  148. return outputs_desc_;
  149. }
  150. bool SophgoBackend::Infer(std::vector<FDTensor> &inputs,
  151. std::vector<FDTensor> *outputs, bool copy_to_fd) {
  152. int input_size = inputs.size();
  153. assert(input_size != 0);
  154. assert(input_size == NumInputs());
  155. bm_tensor_t input_tensors[input_size];
  156. bm_status_t status = BM_SUCCESS;
  157. RUNTIME_PROFILE_LOOP_H2D_D2H_BEGIN
  158. bm_data_type_t *input_dtypes = net_info_->input_dtypes;
  159. for (int i = 0; i < input_size; i++) {
  160. status = bm_malloc_device_byte(handle_, &input_tensors[i].device_mem,
  161. net_info_->max_input_bytes[i]);
  162. assert(BM_SUCCESS == status);
  163. input_tensors[i].dtype = input_dtypes[i];
  164. input_tensors[i].st_mode = BM_STORE_1N;
  165. input_tensors[i].shape = net_info_->stages[0].input_shapes[i];
  166. unsigned int input_byte = bmrt_tensor_bytesize(&input_tensors[i]);
  167. bm_memcpy_s2d_partial(handle_, input_tensors[i].device_mem,
  168. (void *)inputs[i].Data(),
  169. bmrt_tensor_bytesize(&input_tensors[i]));
  170. }
  171. int output_size = NumOutputs();
  172. bm_tensor_t output_tensors[output_size];
  173. for (int i = 0; i < output_size; i++) {
  174. status = bm_malloc_device_byte(handle_, &output_tensors[i].device_mem,
  175. net_info_->max_output_bytes[i]);
  176. assert(BM_SUCCESS == status);
  177. }
  178. RUNTIME_PROFILE_LOOP_BEGIN(1)
  179. bool launch_status = bmrt_launch_tensor_ex(
  180. p_bmrt_, net_name_.c_str(), input_tensors, net_info_->input_num,
  181. output_tensors, net_info_->output_num, true, false);
  182. assert(launch_status);
  183. status = bm_thread_sync(handle_);
  184. assert(status == BM_SUCCESS);
  185. RUNTIME_PROFILE_LOOP_END
  186. outputs->resize(outputs_desc_.size());
  187. bm_data_type_t *output_dtypes = net_info_->output_dtypes;
  188. for (int i = 0; i < output_size; i++) {
  189. int temp_bytesize = bmrt_tensor_bytesize(&output_tensors[i]); // Byte
  190. float *temp_out = (float *)malloc(temp_bytesize);
  191. bm_memcpy_d2s_partial(handle_, temp_out, output_tensors[i].device_mem,
  192. temp_bytesize);
  193. std::vector<int64_t> temp_shape;
  194. temp_shape.resize(outputs_desc_[i].shape.size());
  195. for (int j = 0; j < outputs_desc_[i].shape.size(); ++j) {
  196. temp_shape[j] = outputs_desc_[i].shape[j];
  197. }
  198. (*outputs)[i].Resize(temp_shape, outputs_desc_[i].dtype,
  199. outputs_desc_[i].name);
  200. memcpy((*outputs)[i].MutableData(), temp_out, (*outputs)[i].Nbytes());
  201. free(temp_out);
  202. }
  203. for (int i = 0; i < input_size; i++) {
  204. bm_free_device(handle_, input_tensors[i].device_mem);
  205. }
  206. for (int i = 0; i < output_size; i++) {
  207. bm_free_device(handle_, output_tensors[i].device_mem);
  208. }
  209. RUNTIME_PROFILE_LOOP_H2D_D2H_END
  210. return true;
  211. }
  212. /***************************************************************
  213. * @name SophgoTensorTypeToFDDataType
  214. * @brief Change SophgoTensorType To FDDataType
  215. * @param bm_data_type_t
  216. * @return None
  217. * @note None
  218. ***************************************************************/
  219. FDDataType SophgoBackend::SophgoTensorTypeToFDDataType(bm_data_type_t type) {
  220. if (type == BM_FLOAT16) {
  221. return FDDataType::FP32;
  222. }
  223. if (type == BM_FLOAT32) {
  224. return FDDataType::FP32;
  225. }
  226. if (type == BM_INT8) {
  227. return FDDataType::INT8;
  228. }
  229. if (type == BM_INT16) {
  230. return FDDataType::INT16;
  231. }
  232. if (type == BM_INT32) {
  233. return FDDataType::INT32;
  234. }
  235. if (type == BM_UINT8) {
  236. return FDDataType::UINT8;
  237. }
  238. FDERROR << "FDDataType don't support this type" << std::endl;
  239. return FDDataType::UNKNOWN1;
  240. }
  241. /***************************************************************
  242. * @name FDDataTypeToSophgoTensorType
  243. * @brief Change FDDataType To SophgoTensorType
  244. * @param FDDataType
  245. * @return None
  246. * @note None
  247. ***************************************************************/
  248. // Sophgo_tensor_type
  249. bm_data_type_t
  250. SophgoBackend::FDDataTypeToSophgoTensorType(ultra_infer::FDDataType type) {
  251. if (type == FDDataType::FP16) {
  252. return BM_FLOAT16;
  253. }
  254. if (type == FDDataType::FP32) {
  255. return BM_FLOAT32;
  256. }
  257. if (type == FDDataType::INT8) {
  258. return BM_INT8;
  259. }
  260. if (type == FDDataType::INT16) {
  261. return BM_INT16;
  262. }
  263. if (type == FDDataType::INT32) {
  264. return BM_INT32;
  265. }
  266. if (type == FDDataType::UINT8) {
  267. return BM_UINT8;
  268. }
  269. FDERROR << "Sophgo_tensor_type don't support this type" << std::endl;
  270. return BM_FLOAT32;
  271. }
  272. } // namespace ultra_infer