nvjpeg_decoder.cc 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364
  1. // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. //
  15. // Part of the following code in this file refs to
  16. // https://github.com/CVCUDA/CV-CUDA/blob/release_v0.2.x/samples/common/NvDecoder.cpp
  17. //
  18. // Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  19. // Licensed under the Apache-2.0 license
  20. // \brief
  21. // \author NVIDIA
  22. #ifdef ENABLE_NVJPEG
  23. #include "ultra_infer/vision/common/image_decoder/nvjpeg_decoder.h"
  24. namespace ultra_infer {
  25. namespace vision {
  26. namespace nvjpeg {
  27. #define CHECK_CUDA(call) \
  28. { \
  29. cudaError_t _e = (call); \
  30. if (_e != cudaSuccess) { \
  31. std::cout << "CUDA Runtime failure: '#" << _e << "' at " << __FILE__ \
  32. << ":" << __LINE__ << std::endl; \
  33. exit(1); \
  34. } \
  35. }
  36. #define CHECK_NVJPEG(call) \
  37. { \
  38. nvjpegStatus_t _e = (call); \
  39. if (_e != NVJPEG_STATUS_SUCCESS) { \
  40. std::cout << "NVJPEG failure: '#" << _e << "' at " << __FILE__ << ":" \
  41. << __LINE__ << std::endl; \
  42. exit(1); \
  43. } \
  44. }
  45. static int dev_malloc(void **p, size_t s) { return (int)cudaMalloc(p, s); }
  46. static int dev_free(void *p) { return (int)cudaFree(p); }
  47. static int host_malloc(void **p, size_t s, unsigned int f) {
  48. return (int)cudaHostAlloc(p, s, f);
  49. }
  50. static int host_free(void *p) { return (int)cudaFreeHost(p); }
  51. static int read_images(const FileNames &image_names, FileData &raw_data,
  52. std::vector<size_t> &raw_len) {
  53. for (size_t i = 0; i < image_names.size(); ++i) {
  54. if (image_names.size() == 0) {
  55. std::cerr << "No valid images left in the input list, exit" << std::endl;
  56. return EXIT_FAILURE;
  57. }
  58. // Read an image from disk.
  59. std::ifstream input(image_names[i].c_str(),
  60. std::ios::in | std::ios::binary | std::ios::ate);
  61. if (!(input.is_open())) {
  62. std::cerr << "Cannot open image: " << image_names[i] << std::endl;
  63. FDASSERT(false, "Read file error.");
  64. continue;
  65. }
  66. // Get the size
  67. long unsigned int file_size = input.tellg();
  68. input.seekg(0, std::ios::beg);
  69. // resize if buffer is too small
  70. if (raw_data[i].size() < file_size) {
  71. raw_data[i].resize(file_size);
  72. }
  73. if (!input.read(raw_data[i].data(), file_size)) {
  74. std::cerr << "Cannot read from file: " << image_names[i] << std::endl;
  75. // image_names.erase(cur_iter);
  76. FDASSERT(false, "Read file error.");
  77. continue;
  78. }
  79. raw_len[i] = file_size;
  80. }
  81. return EXIT_SUCCESS;
  82. }
  83. // prepare buffers for RGBi output format
  84. static int prepare_buffers(FileData &file_data, std::vector<size_t> &file_len,
  85. std::vector<int> &img_width,
  86. std::vector<int> &img_height,
  87. std::vector<nvjpegImage_t> &ibuf,
  88. std::vector<nvjpegImage_t> &isz,
  89. std::vector<FDTensor *> &output_buffers,
  90. const FileNames &current_names,
  91. decode_params_t &params) {
  92. int widths[NVJPEG_MAX_COMPONENT];
  93. int heights[NVJPEG_MAX_COMPONENT];
  94. int channels;
  95. nvjpegChromaSubsampling_t subsampling;
  96. for (long unsigned int i = 0; i < file_data.size(); i++) {
  97. nvjpegStatus_t status = nvjpegGetImageInfo(
  98. params.nvjpeg_handle, (unsigned char *)file_data[i].data(), file_len[i],
  99. &channels, &subsampling, widths, heights);
  100. if (status != NVJPEG_STATUS_SUCCESS) {
  101. std::cout << "NVJPEG failure: #" << status << " in nvjpegGetImageInfo."
  102. << std::endl;
  103. return EXIT_FAILURE;
  104. }
  105. img_width[i] = widths[0];
  106. img_height[i] = heights[0];
  107. int mul = 1;
  108. // in the case of interleaved RGB output, write only to single channel, but
  109. // 3 samples at once
  110. if (params.fmt == NVJPEG_OUTPUT_RGBI || params.fmt == NVJPEG_OUTPUT_BGRI) {
  111. channels = 1;
  112. mul = 3;
  113. } else if (params.fmt == NVJPEG_OUTPUT_RGB ||
  114. params.fmt == NVJPEG_OUTPUT_BGR) {
  115. // in the case of rgb create 3 buffers with sizes of original image
  116. channels = 3;
  117. widths[1] = widths[2] = widths[0];
  118. heights[1] = heights[2] = heights[0];
  119. } else {
  120. FDASSERT(false, "Unsupport NVJPEG output format: %d", params.fmt);
  121. }
  122. output_buffers[i]->Resize({heights[0], widths[0], mul * channels},
  123. FDDataType::UINT8, "output_cache", Device::GPU);
  124. uint8_t *cur_buffer =
  125. reinterpret_cast<uint8_t *>(output_buffers[i]->Data());
  126. // realloc output buffer if required
  127. for (int c = 0; c < channels; c++) {
  128. int aw = mul * widths[c];
  129. int ah = heights[c];
  130. size_t sz = aw * ah;
  131. ibuf[i].pitch[c] = aw;
  132. if (sz > isz[i].pitch[c]) {
  133. ibuf[i].channel[c] = cur_buffer;
  134. cur_buffer = cur_buffer + sz;
  135. isz[i].pitch[c] = sz;
  136. }
  137. }
  138. }
  139. return EXIT_SUCCESS;
  140. }
  141. static void create_decoupled_api_handles(decode_params_t &params) {
  142. CHECK_NVJPEG(nvjpegDecoderCreate(params.nvjpeg_handle, NVJPEG_BACKEND_DEFAULT,
  143. &params.nvjpeg_decoder));
  144. CHECK_NVJPEG(nvjpegDecoderStateCreate(params.nvjpeg_handle,
  145. params.nvjpeg_decoder,
  146. &params.nvjpeg_decoupled_state));
  147. CHECK_NVJPEG(nvjpegBufferPinnedCreate(params.nvjpeg_handle, NULL,
  148. &params.pinned_buffers[0]));
  149. CHECK_NVJPEG(nvjpegBufferPinnedCreate(params.nvjpeg_handle, NULL,
  150. &params.pinned_buffers[1]));
  151. CHECK_NVJPEG(nvjpegBufferDeviceCreate(params.nvjpeg_handle, NULL,
  152. &params.device_buffer));
  153. CHECK_NVJPEG(
  154. nvjpegJpegStreamCreate(params.nvjpeg_handle, &params.jpeg_streams[0]));
  155. CHECK_NVJPEG(
  156. nvjpegJpegStreamCreate(params.nvjpeg_handle, &params.jpeg_streams[1]));
  157. CHECK_NVJPEG(nvjpegDecodeParamsCreate(params.nvjpeg_handle,
  158. &params.nvjpeg_decode_params));
  159. }
  160. static void destroy_decoupled_api_handles(decode_params_t &params) {
  161. CHECK_NVJPEG(nvjpegDecodeParamsDestroy(params.nvjpeg_decode_params));
  162. CHECK_NVJPEG(nvjpegJpegStreamDestroy(params.jpeg_streams[0]));
  163. CHECK_NVJPEG(nvjpegJpegStreamDestroy(params.jpeg_streams[1]));
  164. CHECK_NVJPEG(nvjpegBufferPinnedDestroy(params.pinned_buffers[0]));
  165. CHECK_NVJPEG(nvjpegBufferPinnedDestroy(params.pinned_buffers[1]));
  166. CHECK_NVJPEG(nvjpegBufferDeviceDestroy(params.device_buffer));
  167. CHECK_NVJPEG(nvjpegJpegStateDestroy(params.nvjpeg_decoupled_state));
  168. CHECK_NVJPEG(nvjpegDecoderDestroy(params.nvjpeg_decoder));
  169. }
  170. int decode_images(const FileData &img_data, const std::vector<size_t> &img_len,
  171. std::vector<nvjpegImage_t> &out, decode_params_t &params,
  172. double &time) {
  173. CHECK_CUDA(cudaStreamSynchronize(params.stream));
  174. std::vector<const unsigned char *> batched_bitstreams;
  175. std::vector<size_t> batched_bitstreams_size;
  176. std::vector<nvjpegImage_t> batched_output;
  177. // bit-streams that batched decode cannot handle
  178. std::vector<const unsigned char *> otherdecode_bitstreams;
  179. std::vector<size_t> otherdecode_bitstreams_size;
  180. std::vector<nvjpegImage_t> otherdecode_output;
  181. if (params.hw_decode_available) {
  182. for (int i = 0; i < params.batch_size; i++) {
  183. // extract bitstream meta data to figure out whether a bit-stream can be
  184. // decoded
  185. nvjpegJpegStreamParseHeader(params.nvjpeg_handle,
  186. (const unsigned char *)img_data[i].data(),
  187. img_len[i], params.jpeg_streams[0]);
  188. int isSupported = -1;
  189. nvjpegDecodeBatchedSupported(params.nvjpeg_handle, params.jpeg_streams[0],
  190. &isSupported);
  191. if (isSupported == 0) {
  192. batched_bitstreams.push_back((const unsigned char *)img_data[i].data());
  193. batched_bitstreams_size.push_back(img_len[i]);
  194. batched_output.push_back(out[i]);
  195. } else {
  196. otherdecode_bitstreams.push_back(
  197. (const unsigned char *)img_data[i].data());
  198. otherdecode_bitstreams_size.push_back(img_len[i]);
  199. otherdecode_output.push_back(out[i]);
  200. }
  201. }
  202. } else {
  203. for (int i = 0; i < params.batch_size; i++) {
  204. otherdecode_bitstreams.push_back(
  205. (const unsigned char *)img_data[i].data());
  206. otherdecode_bitstreams_size.push_back(img_len[i]);
  207. otherdecode_output.push_back(out[i]);
  208. }
  209. }
  210. if (batched_bitstreams.size() > 0) {
  211. CHECK_NVJPEG(nvjpegDecodeBatchedInitialize(
  212. params.nvjpeg_handle, params.nvjpeg_state, batched_bitstreams.size(), 1,
  213. params.fmt));
  214. CHECK_NVJPEG(nvjpegDecodeBatched(
  215. params.nvjpeg_handle, params.nvjpeg_state, batched_bitstreams.data(),
  216. batched_bitstreams_size.data(), batched_output.data(), params.stream));
  217. }
  218. if (otherdecode_bitstreams.size() > 0) {
  219. CHECK_NVJPEG(nvjpegStateAttachDeviceBuffer(params.nvjpeg_decoupled_state,
  220. params.device_buffer));
  221. int buffer_index = 0;
  222. CHECK_NVJPEG(nvjpegDecodeParamsSetOutputFormat(params.nvjpeg_decode_params,
  223. params.fmt));
  224. for (int i = 0; i < params.batch_size; i++) {
  225. CHECK_NVJPEG(nvjpegJpegStreamParse(params.nvjpeg_handle,
  226. otherdecode_bitstreams[i],
  227. otherdecode_bitstreams_size[i], 0, 0,
  228. params.jpeg_streams[buffer_index]));
  229. CHECK_NVJPEG(nvjpegStateAttachPinnedBuffer(
  230. params.nvjpeg_decoupled_state, params.pinned_buffers[buffer_index]));
  231. CHECK_NVJPEG(nvjpegDecodeJpegHost(
  232. params.nvjpeg_handle, params.nvjpeg_decoder,
  233. params.nvjpeg_decoupled_state, params.nvjpeg_decode_params,
  234. params.jpeg_streams[buffer_index]));
  235. CHECK_CUDA(cudaStreamSynchronize(params.stream));
  236. CHECK_NVJPEG(nvjpegDecodeJpegTransferToDevice(
  237. params.nvjpeg_handle, params.nvjpeg_decoder,
  238. params.nvjpeg_decoupled_state, params.jpeg_streams[buffer_index],
  239. params.stream));
  240. buffer_index = 1 - buffer_index; // switch pinned buffer in pipeline mode
  241. // to avoid an extra sync
  242. CHECK_NVJPEG(
  243. nvjpegDecodeJpegDevice(params.nvjpeg_handle, params.nvjpeg_decoder,
  244. params.nvjpeg_decoupled_state,
  245. &otherdecode_output[i], params.stream));
  246. }
  247. }
  248. return EXIT_SUCCESS;
  249. }
  250. double process_images(const FileNames &image_names, decode_params_t &params,
  251. double &total, std::vector<nvjpegImage_t> &iout,
  252. std::vector<FDTensor *> &output_buffers,
  253. std::vector<int> &widths, std::vector<int> &heights) {
  254. FDASSERT(image_names.size() == params.batch_size,
  255. "Number of images and batch size must be equal.");
  256. // vector for storing raw files and file lengths
  257. FileData file_data(params.batch_size);
  258. std::vector<size_t> file_len(params.batch_size);
  259. FileNames current_names(params.batch_size);
  260. // we wrap over image files to process total_images of files
  261. auto file_iter = image_names.begin();
  262. // output buffer sizes, for convenience
  263. std::vector<nvjpegImage_t> isz(params.batch_size);
  264. for (long unsigned int i = 0; i < iout.size(); i++) {
  265. for (int c = 0; c < NVJPEG_MAX_COMPONENT; c++) {
  266. iout[i].channel[c] = NULL;
  267. iout[i].pitch[c] = 0;
  268. isz[i].pitch[c] = 0;
  269. }
  270. }
  271. if (read_images(image_names, file_data, file_len)) {
  272. return EXIT_FAILURE;
  273. }
  274. if (prepare_buffers(file_data, file_len, widths, heights, iout, isz,
  275. output_buffers, image_names, params)) {
  276. return EXIT_FAILURE;
  277. }
  278. double time;
  279. if (decode_images(file_data, file_len, iout, params, time)) {
  280. return EXIT_FAILURE;
  281. }
  282. return EXIT_SUCCESS;
  283. }
  284. void init_decoder(decode_params_t &params) {
  285. params.hw_decode_available = true;
  286. nvjpegDevAllocator_t dev_allocator = {&dev_malloc, &dev_free};
  287. nvjpegPinnedAllocator_t pinned_allocator = {&host_malloc, &host_free};
  288. nvjpegStatus_t status =
  289. nvjpegCreateEx(NVJPEG_BACKEND_HARDWARE, &dev_allocator, &pinned_allocator,
  290. NVJPEG_FLAGS_DEFAULT, &params.nvjpeg_handle);
  291. if (status == NVJPEG_STATUS_ARCH_MISMATCH) {
  292. std::cout << "Hardware Decoder not supported. "
  293. "Falling back to default backend"
  294. << std::endl;
  295. CHECK_NVJPEG(nvjpegCreateEx(NVJPEG_BACKEND_DEFAULT, &dev_allocator,
  296. &pinned_allocator, NVJPEG_FLAGS_DEFAULT,
  297. &params.nvjpeg_handle));
  298. params.hw_decode_available = false;
  299. } else {
  300. CHECK_NVJPEG(status);
  301. }
  302. CHECK_NVJPEG(
  303. nvjpegJpegStateCreate(params.nvjpeg_handle, &params.nvjpeg_state));
  304. create_decoupled_api_handles(params);
  305. }
  306. void destroy_decoder(decode_params_t &params) {
  307. destroy_decoupled_api_handles(params);
  308. CHECK_NVJPEG(nvjpegJpegStateDestroy(params.nvjpeg_state));
  309. CHECK_NVJPEG(nvjpegDestroy(params.nvjpeg_handle));
  310. }
  311. } // namespace nvjpeg
  312. } // namespace vision
  313. } // namespace ultra_infer
  314. #endif // ENABLE_NVJPEG