tensorrt_buffers.h 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382
  1. // Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. #pragma once
  15. #include <cassert>
  16. #include <iostream>
  17. #include <iterator>
  18. #include <memory>
  19. #include <new>
  20. #include <numeric>
  21. #include <string>
  22. #include <vector>
  23. #include <utility>
  24. #include <functional>
  25. #include "NvInfer.h"
  26. #include "./cuda_runtime_api.h"
  27. namespace PaddleDeploy {
  28. namespace TensorRT {
  29. inline void setCudaDevice(int device) {
  30. cudaSetDevice(device);
  31. }
  32. //!
  33. //! \brief The GenericBuffer class is a templated class for buffers.
  34. //!
  35. //! \details This templated RAII (Resource Acquisition Is Initialization) class
  36. //! handles the allocation,
  37. //! deallocation, querying of buffers on both the device and the host.
  38. //! It can handle data of arbitrary types because it stores byte
  39. //! buffers. The template parameters AllocFunc and FreeFunc are used
  40. //! for the allocation and deallocation of the buffer. AllocFunc must
  41. //! be a functor that takes in (void** ptr, size_t size) and returns
  42. //! bool. ptr is a pointer to where the allocated buffer address should
  43. //! be stored. size is the amount of memory in bytes to allocate. The
  44. //! boolean indicates whether or not the memory allocation was
  45. //! successful. FreeFunc must be a functor that takes in (void* ptr)
  46. //! and returns void. ptr is the allocated buffer address. It must work
  47. //! with nullptr input.
  48. //!
  49. template <typename A, typename B> inline A divUp(A x, B n) {
  50. return (x + n - 1) / n;
  51. }
  52. inline unsigned int getElementSize(nvinfer1::DataType t) {
  53. switch (t) {
  54. case nvinfer1::DataType::kINT32:
  55. return 4;
  56. case nvinfer1::DataType::kFLOAT:
  57. return 4;
  58. case nvinfer1::DataType::kHALF:
  59. return 2;
  60. case nvinfer1::DataType::kBOOL:
  61. case nvinfer1::DataType::kINT8:
  62. return 1;
  63. }
  64. throw std::runtime_error("Invalid DataType.");
  65. return 0;
  66. }
  67. inline int64_t volume(const nvinfer1::Dims &d) {
  68. return std::accumulate(d.d, d.d + d.nbDims, 1, std::multiplies<int64_t>());
  69. }
  70. template <typename AllocFunc, typename FreeFunc> class GenericBuffer {
  71. public:
  72. //!
  73. //! \brief Construct an empty buffer.
  74. //!
  75. explicit GenericBuffer(nvinfer1::DataType type = nvinfer1::DataType::kFLOAT)
  76. : mSize(0), mCapacity(0), mType(type), mBuffer(nullptr) {}
  77. //!
  78. //! \brief Construct a buffer with the specified allocation size in bytes.
  79. //!
  80. GenericBuffer(size_t size, nvinfer1::DataType type)
  81. : mSize(size), mCapacity(size), mType(type) {
  82. if (!allocFn(&mBuffer, this->nbBytes())) {
  83. throw std::bad_alloc();
  84. }
  85. }
  86. GenericBuffer(GenericBuffer &&buf)
  87. : mSize(buf.mSize), mCapacity(buf.mCapacity), mType(buf.mType),
  88. mBuffer(buf.mBuffer) {
  89. buf.mSize = 0;
  90. buf.mCapacity = 0;
  91. buf.mType = nvinfer1::DataType::kFLOAT;
  92. buf.mBuffer = nullptr;
  93. }
  94. GenericBuffer &operator=(GenericBuffer &&buf) {
  95. if (this != &buf) {
  96. freeFn(mBuffer);
  97. mSize = buf.mSize;
  98. mCapacity = buf.mCapacity;
  99. mType = buf.mType;
  100. mBuffer = buf.mBuffer;
  101. // Reset buf.
  102. buf.mSize = 0;
  103. buf.mCapacity = 0;
  104. buf.mBuffer = nullptr;
  105. }
  106. return *this;
  107. }
  108. //!
  109. //! \brief Returns pointer to underlying array.
  110. //!
  111. void *data() { return mBuffer; }
  112. //!
  113. //! \brief Returns pointer to underlying array.
  114. //!
  115. const void *data() const { return mBuffer; }
  116. //!
  117. //! \brief Returns the size (in number of elements) of the buffer.
  118. //!
  119. size_t size() const { return mSize; }
  120. //!
  121. //! \brief Returns the size (in bytes) of the buffer.
  122. //!
  123. size_t nbBytes() const { return this->size() * getElementSize(mType); }
  124. //!
  125. //! \brief Resizes the buffer. This is a no-op if the new size is smaller than
  126. //! or equal to the current capacity.
  127. //!
  128. void resize(size_t newSize) {
  129. mSize = newSize;
  130. if (mCapacity < newSize) {
  131. freeFn(mBuffer);
  132. if (!allocFn(&mBuffer, this->nbBytes())) {
  133. throw std::bad_alloc{};
  134. }
  135. mCapacity = newSize;
  136. }
  137. }
  138. //!
  139. //! \brief Overload of resize that accepts Dims
  140. //!
  141. void resize(const nvinfer1::Dims &dims) { return this->resize(volume(dims)); }
  142. ~GenericBuffer() { freeFn(mBuffer); }
  143. private:
  144. size_t mSize{0}, mCapacity{0};
  145. nvinfer1::DataType mType;
  146. void *mBuffer;
  147. AllocFunc allocFn;
  148. FreeFunc freeFn;
  149. };
  150. class DeviceAllocator {
  151. public:
  152. bool operator()(void **ptr, size_t size) const {
  153. return cudaMalloc(ptr, size) == cudaSuccess;
  154. }
  155. };
  156. class DeviceFree {
  157. public:
  158. void operator()(void *ptr) const { cudaFree(ptr); }
  159. };
  160. class HostAllocator {
  161. public:
  162. bool operator()(void **ptr, size_t size) const {
  163. *ptr = malloc(size);
  164. return *ptr != nullptr;
  165. }
  166. };
  167. class HostFree {
  168. public:
  169. void operator()(void *ptr) const { free(ptr); }
  170. };
  171. using DeviceBuffer = GenericBuffer<DeviceAllocator, DeviceFree>;
  172. using HostBuffer = GenericBuffer<HostAllocator, HostFree>;
  173. //!
  174. //! \brief The ManagedBuffer class groups together a pair of corresponding
  175. //! device and host buffers.
  176. //!
  177. class ManagedBuffer {
  178. public:
  179. DeviceBuffer deviceBuffer;
  180. HostBuffer hostBuffer;
  181. };
  182. //!
  183. //! \brief The BufferManager class handles host and device buffer allocation
  184. //! and deallocation.
  185. //!
  186. //! \details This RAII class handles host and device buffer allocation and
  187. //! deallocation,
  188. //! memcpy between host and device buffers to aid with inference,
  189. //! and debugging dumps to validate inference. The BufferManager class
  190. //! is meant to be used to simplify buffer management and any
  191. //! interactions between buffers and the engine.
  192. //!
  193. class BufferManager {
  194. public:
  195. static const size_t kINVALID_SIZE_VALUE = ~size_t(0);
  196. //!
  197. //! \brief Create a BufferManager for handling buffer interactions with
  198. //! engine.
  199. //!
  200. BufferManager(std::shared_ptr<nvinfer1::ICudaEngine> engine,
  201. const int batchSize = 0,
  202. const nvinfer1::IExecutionContext *context = nullptr)
  203. : mEngine(engine), mBatchSize(batchSize) {
  204. // Full Dims implies no batch size.
  205. assert(engine->hasImplicitBatchDimension() || mBatchSize == 0);
  206. // Create host and device buffers
  207. for (int i = 0; i < mEngine->getNbBindings(); i++) {
  208. auto dims = context ? context->getBindingDimensions(i)
  209. : mEngine->getBindingDimensions(i);
  210. size_t vol = context || !mBatchSize ? 1 : static_cast<size_t>(mBatchSize);
  211. nvinfer1::DataType type = mEngine->getBindingDataType(i);
  212. int vecDim = mEngine->getBindingVectorizedDim(i);
  213. if (-1 != vecDim) { // i.e., 0 != lgScalarsPerVector
  214. int scalarsPerVec = mEngine->getBindingComponentsPerElement(i);
  215. dims.d[vecDim] = divUp(dims.d[vecDim], scalarsPerVec);
  216. vol *= scalarsPerVec;
  217. }
  218. vol *= volume(dims);
  219. // std::cout << "input-" << i << " initial byteSize:" << vol << std::endl;
  220. std::unique_ptr<ManagedBuffer> manBuf{new ManagedBuffer()};
  221. manBuf->deviceBuffer = DeviceBuffer(vol, type);
  222. manBuf->hostBuffer = HostBuffer(vol, type);
  223. mDeviceBindings.emplace_back(manBuf->deviceBuffer.data());
  224. mManagedBuffers.emplace_back(std::move(manBuf));
  225. // std::cout << "buffer-"<< i << " initial byteSize:"
  226. // << manBuf->hostBuffer.nbBytes() << std::endl;
  227. }
  228. }
  229. //!
  230. //! \brief Returns a vector of device buffers that you can use directly as
  231. //! bindings for the execute and enqueue methods of IExecutionContext.
  232. //!
  233. std::vector<void *> &getDeviceBindings() { return mDeviceBindings; }
  234. //!
  235. //! \brief Returns a vector of device buffers.
  236. //!
  237. const std::vector<void *> &getDeviceBindings() const {
  238. return mDeviceBindings;
  239. }
  240. //!
  241. //! \brief Returns the device buffer corresponding to tensorName.
  242. //! Returns nullptr if no such tensor can be found.
  243. //!
  244. void *getDeviceBuffer(const std::string &tensorName) const {
  245. return getBuffer(false, tensorName);
  246. }
  247. //!
  248. //! \brief Returns the host buffer corresponding to tensorName.
  249. //! Returns nullptr if no such tensor can be found.
  250. //!
  251. void *getHostBuffer(const std::string &tensorName) const {
  252. return getBuffer(true, tensorName);
  253. }
  254. //!
  255. //! \brief Returns the size of the host and device buffers that correspond to
  256. //! tensorName.
  257. //! Returns kINVALID_SIZE_VALUE if no such tensor can be found.
  258. //!
  259. size_t size(const std::string &tensorName) const {
  260. int index = mEngine->getBindingIndex(tensorName.c_str());
  261. if (index == -1)
  262. return kINVALID_SIZE_VALUE;
  263. return mManagedBuffers[index]->hostBuffer.nbBytes();
  264. }
  265. //!
  266. //! \brief Copy the contents of input host buffers to input device buffers
  267. //! synchronously.
  268. //!
  269. void copyInputToDevice() { memcpyBuffers(true, false, false); }
  270. //!
  271. //! \brief Copy the contents of output device buffers to output host buffers
  272. //! synchronously.
  273. //!
  274. void copyOutputToHost() { memcpyBuffers(false, true, false); }
  275. //!
  276. //! \brief Copy the contents of input host buffers to input device buffers
  277. //! asynchronously.
  278. //!
  279. void copyInputToDeviceAsync(const cudaStream_t &stream = 0) {
  280. memcpyBuffers(true, false, true, stream);
  281. }
  282. //!
  283. //! \brief Copy the contents of output device buffers to output host buffers
  284. //! asynchronously.
  285. //!
  286. void copyOutputToHostAsync(const cudaStream_t &stream = 0) {
  287. memcpyBuffers(false, true, true, stream);
  288. }
  289. ~BufferManager() = default;
  290. private:
  291. void *getBuffer(const bool isHost, const std::string &tensorName) const {
  292. int index = mEngine->getBindingIndex(tensorName.c_str());
  293. if (index == -1)
  294. return nullptr;
  295. return (isHost ? mManagedBuffers[index]->hostBuffer.data()
  296. : mManagedBuffers[index]->deviceBuffer.data());
  297. }
  298. void memcpyBuffers(const bool copyInput, const bool deviceToHost,
  299. const bool async, const cudaStream_t &stream = 0) {
  300. for (int i = 0; i < mEngine->getNbBindings(); i++) {
  301. void *dstPtr = deviceToHost ? mManagedBuffers[i]->hostBuffer.data()
  302. : mManagedBuffers[i]->deviceBuffer.data();
  303. const void *srcPtr = deviceToHost
  304. ? mManagedBuffers[i]->deviceBuffer.data()
  305. : mManagedBuffers[i]->hostBuffer.data();
  306. const size_t byteSize = mManagedBuffers[i]->hostBuffer.nbBytes();
  307. // std::cout << "input-host-" << i << " runtime byteSize:"
  308. // << mManagedBuffers[i]->hostBuffer.nbBytes() << std::endl;
  309. // std::cout << "input-device-" << i << " runtime byteSize:"
  310. // << mManagedBuffers[i]->deviceBuffer.nbBytes() << std::endl;
  311. const cudaMemcpyKind memcpyType =
  312. deviceToHost ? cudaMemcpyDeviceToHost : cudaMemcpyHostToDevice;
  313. if ((copyInput && mEngine->bindingIsInput(i)) ||
  314. (!copyInput && !mEngine->bindingIsInput(i))) {
  315. if (async)
  316. cudaMemcpyAsync(dstPtr, srcPtr, byteSize, memcpyType, stream);
  317. else
  318. cudaMemcpy(dstPtr, srcPtr, byteSize, memcpyType);
  319. }
  320. }
  321. }
  322. // !< The pointer to the engine
  323. std::shared_ptr<nvinfer1::ICudaEngine> mEngine;
  324. // !< The batch size for legacy networks, 0 otherwise
  325. int mBatchSize;
  326. // !< The vector of pointers to managed buffers
  327. std::vector<std::unique_ptr<ManagedBuffer>> mManagedBuffers;
  328. // !< The vector of device buffers needed
  329. // !< for engine execution
  330. std::vector<void *> mDeviceBindings;
  331. };
  332. } // namespace TensorRT
  333. } // namespace PaddleDeploy