tensorrt_engine.cpp 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279
  1. // Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. #include "model_deploy/engine/include/tensorrt_engine.h"
  15. namespace PaddleDeploy {
  16. int DtypeConver(const nvinfer1::DataType& dtype) {
  17. switch (dtype) {
  18. case nvinfer1::DataType::kINT32:
  19. return 2;
  20. case nvinfer1::DataType::kFLOAT:
  21. return 0;
  22. case nvinfer1::DataType::kBOOL:
  23. return 3;
  24. case nvinfer1::DataType::kINT8:
  25. return 3;
  26. }
  27. std::cerr << "Fail trt dtype";
  28. return -1;
  29. }
  30. bool Model::TensorRTInit(const TensorRTEngineConfig& engine_config) {
  31. infer_engine_ = std::make_shared<TensorRTInferenceEngine>();
  32. InferenceConfig config("tensorrt");
  33. YAML::Node node = YAML::LoadFile(engine_config.cfg_file_);
  34. if (!node["input"].IsDefined()) {
  35. std::cout << "Fail to find input in yaml file!" << std::endl;
  36. return false;
  37. }
  38. if (!node["output"].IsDefined()) {
  39. std::cout << "Fail to find output in yaml file!" << std::endl;
  40. return false;
  41. }
  42. *(config.tensorrt_config) = engine_config;
  43. config.tensorrt_config->yaml_config_ = node;
  44. return infer_engine_->Init(config);
  45. }
  46. bool TensorRTInferenceEngine::Init(const InferenceConfig& engine_config) {
  47. const TensorRTEngineConfig& tensorrt_config = *engine_config.tensorrt_config;
  48. TensorRT::setCudaDevice(tensorrt_config.gpu_id_);
  49. std::ifstream engine_file(tensorrt_config.trt_cache_file_, std::ios::binary);
  50. if (engine_file) {
  51. std::cout << "Start load cached optimized tensorrt file." << std::endl;
  52. engine_ = std::shared_ptr<nvinfer1::ICudaEngine>(
  53. LoadEngine(tensorrt_config.trt_cache_file_),
  54. InferDeleter());
  55. if (!engine_) {
  56. std::cerr << "Fail load cached optimized tensorrt" << std::endl;
  57. return false;
  58. }
  59. return true;
  60. }
  61. auto builder = InferUniquePtr<nvinfer1::IBuilder>(
  62. nvinfer1::createInferBuilder(logger_));
  63. if (!builder) {
  64. return false;
  65. }
  66. const auto explicitBatch = 1U << static_cast<uint32_t>(
  67. nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
  68. auto network = InferUniquePtr<nvinfer1::INetworkDefinition>(
  69. builder->createNetworkV2(explicitBatch));
  70. if (!network) {
  71. return false;
  72. }
  73. auto parser = InferUniquePtr<nvonnxparser::IParser>(
  74. nvonnxparser::createParser(*network, logger_));
  75. if (!parser) {
  76. return false;
  77. }
  78. if (!parser->parseFromFile(tensorrt_config.model_file_.c_str(),
  79. static_cast<int>(logger_.mReportableSeverity))) {
  80. return false;
  81. }
  82. auto config = InferUniquePtr<nvinfer1::IBuilderConfig>(
  83. builder->createBuilderConfig());
  84. if (!config) {
  85. return false;
  86. }
  87. config->setMaxWorkspaceSize(tensorrt_config.max_workspace_size_);
  88. // set shape. Currently don't support dynamic shapes
  89. yaml_config_ = tensorrt_config.yaml_config_["output"];
  90. auto profile = builder->createOptimizationProfile();
  91. for (const auto& input : tensorrt_config.yaml_config_["input"]) {
  92. nvinfer1::Dims input_dims;
  93. input_dims.nbDims = static_cast<int>(input["dims"].size());
  94. for (int i = 0; i < input_dims.nbDims; ++i) {
  95. input_dims.d[i] = input["dims"][i].as<int>();
  96. if (input_dims.d[i] < 0) {
  97. std::cerr << "Fail input shape on yaml file" << std::endl;
  98. return false;
  99. }
  100. }
  101. profile->setDimensions(input["name"].as<std::string>().c_str(),
  102. nvinfer1::OptProfileSelector::kMIN, input_dims);
  103. profile->setDimensions(input["name"].as<std::string>().c_str(),
  104. nvinfer1::OptProfileSelector::kMAX, input_dims);
  105. profile->setDimensions(input["name"].as<std::string>().c_str(),
  106. nvinfer1::OptProfileSelector::kOPT, input_dims);
  107. }
  108. config->addOptimizationProfile(profile);
  109. engine_ = std::shared_ptr<nvinfer1::ICudaEngine>(
  110. builder->buildEngineWithConfig(*network,
  111. *config),
  112. InferDeleter());
  113. context_ = std::shared_ptr<nvinfer1::IExecutionContext>(
  114. engine_->createExecutionContext(),
  115. InferDeleter());
  116. if (!context_) {
  117. return false;
  118. }
  119. if (tensorrt_config.save_engine_) {
  120. if (!SaveEngine(*(engine_.get()), tensorrt_config.trt_cache_file_)) {
  121. std::cout << "Fail save Trt Engine to "
  122. << tensorrt_config.trt_cache_file_ << std::endl;
  123. }
  124. }
  125. return true;
  126. }
  127. void TensorRTInferenceEngine::FeedInput(
  128. const std::vector<DataBlob>& input_blobs,
  129. const TensorRT::BufferManager& buffers) {
  130. for (auto input_blob : input_blobs) {
  131. int size = std::accumulate(input_blob.shape.begin(),
  132. input_blob.shape.end(), 1, std::multiplies<int>());
  133. if (input_blob.dtype == 0) {
  134. float* hostDataBuffer =
  135. reinterpret_cast<float*>(buffers.getHostBuffer(input_blob.name));
  136. memcpy(hostDataBuffer,
  137. reinterpret_cast<float*>(input_blob.data.data()),
  138. size * sizeof(float));
  139. } else if (input_blob.dtype == 1) {
  140. int64_t* hostDataBuffer =
  141. reinterpret_cast<int64_t*>(buffers.getHostBuffer(input_blob.name));
  142. memcpy(hostDataBuffer,
  143. reinterpret_cast<int64_t*>(input_blob.data.data()),
  144. size * sizeof(int64_t));
  145. } else if (input_blob.dtype == 2) {
  146. int* hostDataBuffer =
  147. reinterpret_cast<int*>(buffers.getHostBuffer(input_blob.name));
  148. memcpy(hostDataBuffer,
  149. reinterpret_cast<int*>(input_blob.data.data()),
  150. size * sizeof(int));
  151. } else if (input_blob.dtype == 3) {
  152. uint8_t* hostDataBuffer =
  153. reinterpret_cast<uint8_t*>(buffers.getHostBuffer(input_blob.name));
  154. memcpy(hostDataBuffer,
  155. reinterpret_cast<uint8_t*>(input_blob.data.data()),
  156. size * sizeof(uint8_t));
  157. }
  158. }
  159. }
  160. nvinfer1::ICudaEngine* TensorRTInferenceEngine::LoadEngine(
  161. const std::string& engine,
  162. int DLACore) {
  163. std::ifstream engine_file(engine, std::ios::binary);
  164. if (!engine_file) {
  165. std::cerr << "Error opening engine file: " << engine << std::endl;
  166. return nullptr;
  167. }
  168. engine_file.seekg(0, engine_file.end);
  169. int64_t fsize = engine_file.tellg();
  170. engine_file.seekg(0, engine_file.beg);
  171. std::vector<char> engineData(fsize);
  172. engine_file.read(engineData.data(), fsize);
  173. if (!engine_file) {
  174. std::cerr << "Error loading engine file: " << engine << std::endl;
  175. return nullptr;
  176. }
  177. InferUniquePtr<nvinfer1::IRuntime> runtime{
  178. nvinfer1::createInferRuntime(logger_)};
  179. if (DLACore != -1) {
  180. runtime->setDLACore(DLACore);
  181. }
  182. return runtime->deserializeCudaEngine(engineData.data(), fsize, nullptr);
  183. }
  184. bool TensorRTInferenceEngine::SaveEngine(const nvinfer1::ICudaEngine& engine,
  185. const std::string& file_name) {
  186. std::ofstream engine_file(file_name, std::ios::binary);
  187. if (!engine_file) {
  188. return false;
  189. }
  190. InferUniquePtr<nvinfer1::IHostMemory> serializedEngine{engine.serialize()};
  191. if (serializedEngine == nullptr) {
  192. return false;
  193. }
  194. engine_file.write(reinterpret_cast<char *>(serializedEngine->data()),
  195. serializedEngine->size());
  196. return !engine_file.fail();
  197. }
  198. bool TensorRTInferenceEngine::Infer(const std::vector<DataBlob>& input_blobs,
  199. std::vector<DataBlob>* output_blobs) {
  200. TensorRT::BufferManager buffers(engine_);
  201. FeedInput(input_blobs, buffers);
  202. buffers.copyInputToDevice();
  203. bool status = context_->executeV2(buffers.getDeviceBindings().data());
  204. if (!status) {
  205. return false;
  206. }
  207. buffers.copyOutputToHost();
  208. for (const auto& output_config : yaml_config_) {
  209. std::string output_name = output_config["name"].as<std::string>();
  210. int index = engine_->getBindingIndex(output_name.c_str());
  211. nvinfer1::DataType dtype = engine_->getBindingDataType(index);
  212. DataBlob output_blob;
  213. output_blob.name = output_name;
  214. output_blob.dtype = DtypeConver(dtype);
  215. for (auto shape : output_config["dims"]) {
  216. output_blob.shape.push_back(shape.as<int>());
  217. }
  218. size_t size = std::accumulate(output_blob.shape.begin(),
  219. output_blob.shape.end(), 1, std::multiplies<size_t>());
  220. if (output_blob.dtype == 0) {
  221. assert(size * sizeof(float) == buffers.size(output_name));
  222. float* output = static_cast<float*>(buffers.getHostBuffer(output_name));
  223. output_blob.data.resize(size * sizeof(float));
  224. memcpy(output_blob.data.data(), output, size * sizeof(float));
  225. } else if (output_blob.dtype == 1) {
  226. assert(size * sizeof(int64_t) == buffers.size(output_name));
  227. int64_t* output = static_cast<int64_t*>(
  228. buffers.getHostBuffer(output_name));
  229. output_blob.data.resize(size * sizeof(int64_t));
  230. memcpy(output_blob.data.data(), output, size * sizeof(int64_t));
  231. } else if (output_blob.dtype == 2) {
  232. assert(size * sizeof(int) == buffers.size(output_name));
  233. int* output = static_cast<int*>(buffers.getHostBuffer(output_name));
  234. output_blob.data.resize(size * sizeof(int));
  235. memcpy(output_blob.data.data(), output, size * sizeof(int));
  236. } else if (output_blob.dtype == 3) {
  237. assert(size * sizeof(uint8_t) == buffers.size(output_name));
  238. uint8_t* output = static_cast<uint8_t*>(
  239. buffers.getHostBuffer(output_name));
  240. output_blob.data.resize(size * sizeof(uint8_t));
  241. memcpy(output_blob.data.data(), output, size * sizeof(uint8_t));
  242. }
  243. output_blobs->push_back(std::move(output_blob));
  244. }
  245. return true;
  246. }
  247. } // namespace PaddleDeploy