tensorrt_engine.cpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286
  1. // Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. #include "model_deploy/engine/include/tensorrt_engine.h"
  15. namespace PaddleDeploy {
  16. int DtypeConver(const nvinfer1::DataType& dtype) {
  17. switch (dtype) {
  18. case nvinfer1::DataType::kINT32:
  19. return 2;
  20. case nvinfer1::DataType::kFLOAT:
  21. return 0;
  22. case nvinfer1::DataType::kBOOL:
  23. return 3;
  24. case nvinfer1::DataType::kINT8:
  25. return 3;
  26. }
  27. std::cerr << "Fail trt dtype";
  28. return -1;
  29. }
  30. bool Model::TensorRTInit(const TensorRTEngineConfig& engine_config) {
  31. infer_engine_ = std::make_shared<TensorRTInferenceEngine>();
  32. InferenceConfig config("tensorrt");
  33. YAML::Node node = YAML::LoadFile(engine_config.cfg_file_);
  34. if (!node["input"].IsDefined()) {
  35. std::cout << "Fail to find input in yaml file!" << std::endl;
  36. return false;
  37. }
  38. if (!node["output"].IsDefined()) {
  39. std::cout << "Fail to find output in yaml file!" << std::endl;
  40. return false;
  41. }
  42. *(config.tensorrt_config) = engine_config;
  43. config.tensorrt_config->yaml_config_ = node;
  44. return infer_engine_->Init(config);
  45. }
  46. bool TensorRTInferenceEngine::Init(const InferenceConfig& engine_config) {
  47. const TensorRTEngineConfig& tensorrt_config = *engine_config.tensorrt_config;
  48. TensorRT::setCudaDevice(tensorrt_config.gpu_id_);
  49. std::ifstream engine_file(tensorrt_config.trt_cache_file_, std::ios::binary);
  50. if (engine_file) {
  51. std::cout << "Start load cached optimized tensorrt file." << std::endl;
  52. engine_ = std::shared_ptr<nvinfer1::ICudaEngine>(
  53. LoadEngine(tensorrt_config.trt_cache_file_),
  54. InferDeleter());
  55. if (!engine_) {
  56. std::cerr << "Fail load cached optimized tensorrt" << std::endl;
  57. return false;
  58. }
  59. return true;
  60. }
  61. auto builder = InferUniquePtr<nvinfer1::IBuilder>(
  62. nvinfer1::createInferBuilder(logger_));
  63. if (!builder) {
  64. std::cerr << "TensorRT init builder error" << std::endl;
  65. return false;
  66. }
  67. const auto explicitBatch = 1U << static_cast<uint32_t>(
  68. nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
  69. auto network = InferUniquePtr<nvinfer1::INetworkDefinition>(
  70. builder->createNetworkV2(explicitBatch));
  71. if (!network) {
  72. std::cerr << "TensorRT init network error" << std::endl;
  73. return false;
  74. }
  75. auto parser = InferUniquePtr<nvonnxparser::IParser>(
  76. nvonnxparser::createParser(*network, logger_));
  77. if (!parser) {
  78. std::cerr << "TensorRT init parser error" << std::endl;
  79. return false;
  80. }
  81. if (!parser->parseFromFile(tensorrt_config.model_file_.c_str(),
  82. static_cast<int>(logger_.mReportableSeverity))) {
  83. std::cerr << "TensorRT init model_file error" << std::endl;
  84. return false;
  85. }
  86. auto config = InferUniquePtr<nvinfer1::IBuilderConfig>(
  87. builder->createBuilderConfig());
  88. if (!config) {
  89. std::cerr << "TensorRT init config error" << std::endl;
  90. return false;
  91. }
  92. config->setMaxWorkspaceSize(tensorrt_config.max_workspace_size_);
  93. // set shape. Currently don't support dynamic shapes
  94. yaml_config_ = tensorrt_config.yaml_config_["output"];
  95. auto profile = builder->createOptimizationProfile();
  96. for (const auto& input : tensorrt_config.yaml_config_["input"]) {
  97. nvinfer1::Dims input_dims;
  98. input_dims.nbDims = static_cast<int>(input["dims"].size());
  99. for (int i = 0; i < input_dims.nbDims; ++i) {
  100. input_dims.d[i] = input["dims"][i].as<int>();
  101. if (input_dims.d[i] < 0) {
  102. std::cerr << "Fail input shape on yaml file" << std::endl;
  103. return false;
  104. }
  105. }
  106. profile->setDimensions(input["name"].as<std::string>().c_str(),
  107. nvinfer1::OptProfileSelector::kMIN, input_dims);
  108. profile->setDimensions(input["name"].as<std::string>().c_str(),
  109. nvinfer1::OptProfileSelector::kMAX, input_dims);
  110. profile->setDimensions(input["name"].as<std::string>().c_str(),
  111. nvinfer1::OptProfileSelector::kOPT, input_dims);
  112. }
  113. config->addOptimizationProfile(profile);
  114. engine_ = std::shared_ptr<nvinfer1::ICudaEngine>(
  115. builder->buildEngineWithConfig(*network,
  116. *config),
  117. InferDeleter());
  118. context_ = std::shared_ptr<nvinfer1::IExecutionContext>(
  119. engine_->createExecutionContext(),
  120. InferDeleter());
  121. if (!context_) {
  122. std::cerr << "TensorRT init context error" << std::endl;
  123. return false;
  124. }
  125. if (tensorrt_config.save_engine_) {
  126. if (!SaveEngine(*(engine_.get()), tensorrt_config.trt_cache_file_)) {
  127. std::cout << "Fail save Trt Engine to "
  128. << tensorrt_config.trt_cache_file_ << std::endl;
  129. }
  130. }
  131. return true;
  132. }
  133. void TensorRTInferenceEngine::FeedInput(
  134. const std::vector<DataBlob>& input_blobs,
  135. const TensorRT::BufferManager& buffers) {
  136. for (auto input_blob : input_blobs) {
  137. int size = std::accumulate(input_blob.shape.begin(),
  138. input_blob.shape.end(), 1, std::multiplies<int>());
  139. if (input_blob.dtype == 0) {
  140. float* hostDataBuffer =
  141. reinterpret_cast<float*>(buffers.getHostBuffer(input_blob.name));
  142. memcpy(hostDataBuffer,
  143. reinterpret_cast<float*>(input_blob.data.data()),
  144. size * sizeof(float));
  145. } else if (input_blob.dtype == 1) {
  146. int64_t* hostDataBuffer =
  147. reinterpret_cast<int64_t*>(buffers.getHostBuffer(input_blob.name));
  148. memcpy(hostDataBuffer,
  149. reinterpret_cast<int64_t*>(input_blob.data.data()),
  150. size * sizeof(int64_t));
  151. } else if (input_blob.dtype == 2) {
  152. int* hostDataBuffer =
  153. reinterpret_cast<int*>(buffers.getHostBuffer(input_blob.name));
  154. memcpy(hostDataBuffer,
  155. reinterpret_cast<int*>(input_blob.data.data()),
  156. size * sizeof(int));
  157. } else if (input_blob.dtype == 3) {
  158. uint8_t* hostDataBuffer =
  159. reinterpret_cast<uint8_t*>(buffers.getHostBuffer(input_blob.name));
  160. memcpy(hostDataBuffer,
  161. reinterpret_cast<uint8_t*>(input_blob.data.data()),
  162. size * sizeof(uint8_t));
  163. }
  164. }
  165. }
  166. nvinfer1::ICudaEngine* TensorRTInferenceEngine::LoadEngine(
  167. const std::string& engine,
  168. int DLACore) {
  169. std::ifstream engine_file(engine, std::ios::binary);
  170. if (!engine_file) {
  171. std::cerr << "Error opening engine file: " << engine << std::endl;
  172. return nullptr;
  173. }
  174. engine_file.seekg(0, engine_file.end);
  175. int64_t fsize = engine_file.tellg();
  176. engine_file.seekg(0, engine_file.beg);
  177. std::vector<char> engineData(fsize);
  178. engine_file.read(engineData.data(), fsize);
  179. if (!engine_file) {
  180. std::cerr << "Error loading engine file: " << engine << std::endl;
  181. return nullptr;
  182. }
  183. InferUniquePtr<nvinfer1::IRuntime> runtime{
  184. nvinfer1::createInferRuntime(logger_)};
  185. if (DLACore != -1) {
  186. runtime->setDLACore(DLACore);
  187. }
  188. return runtime->deserializeCudaEngine(engineData.data(), fsize, nullptr);
  189. }
  190. bool TensorRTInferenceEngine::SaveEngine(const nvinfer1::ICudaEngine& engine,
  191. const std::string& file_name) {
  192. std::ofstream engine_file(file_name, std::ios::binary);
  193. if (!engine_file) {
  194. return false;
  195. }
  196. InferUniquePtr<nvinfer1::IHostMemory> serializedEngine{engine.serialize()};
  197. if (serializedEngine == nullptr) {
  198. return false;
  199. }
  200. engine_file.write(reinterpret_cast<char *>(serializedEngine->data()),
  201. serializedEngine->size());
  202. return !engine_file.fail();
  203. }
  204. bool TensorRTInferenceEngine::Infer(const std::vector<DataBlob>& input_blobs,
  205. std::vector<DataBlob>* output_blobs) {
  206. TensorRT::BufferManager buffers(engine_);
  207. FeedInput(input_blobs, buffers);
  208. buffers.copyInputToDevice();
  209. bool status = context_->executeV2(buffers.getDeviceBindings().data());
  210. if (!status) {
  211. std::cerr << "TensorRT create execute error" << std::endl;
  212. return false;
  213. }
  214. buffers.copyOutputToHost();
  215. for (const auto& output_config : yaml_config_) {
  216. std::string output_name = output_config["name"].as<std::string>();
  217. int index = engine_->getBindingIndex(output_name.c_str());
  218. nvinfer1::DataType dtype = engine_->getBindingDataType(index);
  219. DataBlob output_blob;
  220. output_blob.name = output_name;
  221. output_blob.dtype = DtypeConver(dtype);
  222. for (auto shape : output_config["dims"]) {
  223. output_blob.shape.push_back(shape.as<int>());
  224. }
  225. size_t size = std::accumulate(output_blob.shape.begin(),
  226. output_blob.shape.end(), 1, std::multiplies<size_t>());
  227. if (output_blob.dtype == 0) {
  228. assert(size * sizeof(float) == buffers.size(output_name));
  229. float* output = static_cast<float*>(buffers.getHostBuffer(output_name));
  230. output_blob.data.resize(size * sizeof(float));
  231. memcpy(output_blob.data.data(), output, size * sizeof(float));
  232. } else if (output_blob.dtype == 1) {
  233. assert(size * sizeof(int64_t) == buffers.size(output_name));
  234. int64_t* output = static_cast<int64_t*>(
  235. buffers.getHostBuffer(output_name));
  236. output_blob.data.resize(size * sizeof(int64_t));
  237. memcpy(output_blob.data.data(), output, size * sizeof(int64_t));
  238. } else if (output_blob.dtype == 2) {
  239. assert(size * sizeof(int) == buffers.size(output_name));
  240. int* output = static_cast<int*>(buffers.getHostBuffer(output_name));
  241. output_blob.data.resize(size * sizeof(int));
  242. memcpy(output_blob.data.data(), output, size * sizeof(int));
  243. } else if (output_blob.dtype == 3) {
  244. assert(size * sizeof(uint8_t) == buffers.size(output_name));
  245. uint8_t* output = static_cast<uint8_t*>(
  246. buffers.getHostBuffer(output_name));
  247. output_blob.data.resize(size * sizeof(uint8_t));
  248. memcpy(output_blob.data.data(), output, size * sizeof(uint8_t));
  249. }
  250. output_blobs->push_back(std::move(output_blob));
  251. }
  252. return true;
  253. }
  254. } // namespace PaddleDeploy