| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364 |
- // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
- //
- // Licensed under the Apache License, Version 2.0 (the "License");
- // you may not use this file except in compliance with the License.
- // You may obtain a copy of the License at
- //
- // http://www.apache.org/licenses/LICENSE-2.0
- //
- // Unless required by applicable law or agreed to in writing, software
- // distributed under the License is distributed on an "AS IS" BASIS,
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- // See the License for the specific language governing permissions and
- // limitations under the License.
- //
- // Part of the following code in this file refs to
- // https://github.com/CVCUDA/CV-CUDA/blob/release_v0.2.x/samples/common/NvDecoder.cpp
- //
- // Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
- // Licensed under the Apache-2.0 license
- // \brief
- // \author NVIDIA
- #ifdef ENABLE_NVJPEG
- #include "ultra_infer/vision/common/image_decoder/nvjpeg_decoder.h"
- namespace ultra_infer {
- namespace vision {
- namespace nvjpeg {
- #define CHECK_CUDA(call) \
- { \
- cudaError_t _e = (call); \
- if (_e != cudaSuccess) { \
- std::cout << "CUDA Runtime failure: '#" << _e << "' at " << __FILE__ \
- << ":" << __LINE__ << std::endl; \
- exit(1); \
- } \
- }
- #define CHECK_NVJPEG(call) \
- { \
- nvjpegStatus_t _e = (call); \
- if (_e != NVJPEG_STATUS_SUCCESS) { \
- std::cout << "NVJPEG failure: '#" << _e << "' at " << __FILE__ << ":" \
- << __LINE__ << std::endl; \
- exit(1); \
- } \
- }
- static int dev_malloc(void **p, size_t s) { return (int)cudaMalloc(p, s); }
- static int dev_free(void *p) { return (int)cudaFree(p); }
- static int host_malloc(void **p, size_t s, unsigned int f) {
- return (int)cudaHostAlloc(p, s, f);
- }
- static int host_free(void *p) { return (int)cudaFreeHost(p); }
- static int read_images(const FileNames &image_names, FileData &raw_data,
- std::vector<size_t> &raw_len) {
- for (size_t i = 0; i < image_names.size(); ++i) {
- if (image_names.size() == 0) {
- std::cerr << "No valid images left in the input list, exit" << std::endl;
- return EXIT_FAILURE;
- }
- // Read an image from disk.
- std::ifstream input(image_names[i].c_str(),
- std::ios::in | std::ios::binary | std::ios::ate);
- if (!(input.is_open())) {
- std::cerr << "Cannot open image: " << image_names[i] << std::endl;
- FDASSERT(false, "Read file error.");
- continue;
- }
- // Get the size
- long unsigned int file_size = input.tellg();
- input.seekg(0, std::ios::beg);
- // resize if buffer is too small
- if (raw_data[i].size() < file_size) {
- raw_data[i].resize(file_size);
- }
- if (!input.read(raw_data[i].data(), file_size)) {
- std::cerr << "Cannot read from file: " << image_names[i] << std::endl;
- // image_names.erase(cur_iter);
- FDASSERT(false, "Read file error.");
- continue;
- }
- raw_len[i] = file_size;
- }
- return EXIT_SUCCESS;
- }
- // prepare buffers for RGBi output format
- static int prepare_buffers(FileData &file_data, std::vector<size_t> &file_len,
- std::vector<int> &img_width,
- std::vector<int> &img_height,
- std::vector<nvjpegImage_t> &ibuf,
- std::vector<nvjpegImage_t> &isz,
- std::vector<FDTensor *> &output_buffers,
- const FileNames ¤t_names,
- decode_params_t ¶ms) {
- int widths[NVJPEG_MAX_COMPONENT];
- int heights[NVJPEG_MAX_COMPONENT];
- int channels;
- nvjpegChromaSubsampling_t subsampling;
- for (long unsigned int i = 0; i < file_data.size(); i++) {
- nvjpegStatus_t status = nvjpegGetImageInfo(
- params.nvjpeg_handle, (unsigned char *)file_data[i].data(), file_len[i],
- &channels, &subsampling, widths, heights);
- if (status != NVJPEG_STATUS_SUCCESS) {
- std::cout << "NVJPEG failure: #" << status << " in nvjpegGetImageInfo."
- << std::endl;
- return EXIT_FAILURE;
- }
- img_width[i] = widths[0];
- img_height[i] = heights[0];
- int mul = 1;
- // in the case of interleaved RGB output, write only to single channel, but
- // 3 samples at once
- if (params.fmt == NVJPEG_OUTPUT_RGBI || params.fmt == NVJPEG_OUTPUT_BGRI) {
- channels = 1;
- mul = 3;
- } else if (params.fmt == NVJPEG_OUTPUT_RGB ||
- params.fmt == NVJPEG_OUTPUT_BGR) {
- // in the case of rgb create 3 buffers with sizes of original image
- channels = 3;
- widths[1] = widths[2] = widths[0];
- heights[1] = heights[2] = heights[0];
- } else {
- FDASSERT(false, "Unsupport NVJPEG output format: %d", params.fmt);
- }
- output_buffers[i]->Resize({heights[0], widths[0], mul * channels},
- FDDataType::UINT8, "output_cache", Device::GPU);
- uint8_t *cur_buffer =
- reinterpret_cast<uint8_t *>(output_buffers[i]->Data());
- // realloc output buffer if required
- for (int c = 0; c < channels; c++) {
- int aw = mul * widths[c];
- int ah = heights[c];
- size_t sz = aw * ah;
- ibuf[i].pitch[c] = aw;
- if (sz > isz[i].pitch[c]) {
- ibuf[i].channel[c] = cur_buffer;
- cur_buffer = cur_buffer + sz;
- isz[i].pitch[c] = sz;
- }
- }
- }
- return EXIT_SUCCESS;
- }
- static void create_decoupled_api_handles(decode_params_t ¶ms) {
- CHECK_NVJPEG(nvjpegDecoderCreate(params.nvjpeg_handle, NVJPEG_BACKEND_DEFAULT,
- ¶ms.nvjpeg_decoder));
- CHECK_NVJPEG(nvjpegDecoderStateCreate(params.nvjpeg_handle,
- params.nvjpeg_decoder,
- ¶ms.nvjpeg_decoupled_state));
- CHECK_NVJPEG(nvjpegBufferPinnedCreate(params.nvjpeg_handle, NULL,
- ¶ms.pinned_buffers[0]));
- CHECK_NVJPEG(nvjpegBufferPinnedCreate(params.nvjpeg_handle, NULL,
- ¶ms.pinned_buffers[1]));
- CHECK_NVJPEG(nvjpegBufferDeviceCreate(params.nvjpeg_handle, NULL,
- ¶ms.device_buffer));
- CHECK_NVJPEG(
- nvjpegJpegStreamCreate(params.nvjpeg_handle, ¶ms.jpeg_streams[0]));
- CHECK_NVJPEG(
- nvjpegJpegStreamCreate(params.nvjpeg_handle, ¶ms.jpeg_streams[1]));
- CHECK_NVJPEG(nvjpegDecodeParamsCreate(params.nvjpeg_handle,
- ¶ms.nvjpeg_decode_params));
- }
- static void destroy_decoupled_api_handles(decode_params_t ¶ms) {
- CHECK_NVJPEG(nvjpegDecodeParamsDestroy(params.nvjpeg_decode_params));
- CHECK_NVJPEG(nvjpegJpegStreamDestroy(params.jpeg_streams[0]));
- CHECK_NVJPEG(nvjpegJpegStreamDestroy(params.jpeg_streams[1]));
- CHECK_NVJPEG(nvjpegBufferPinnedDestroy(params.pinned_buffers[0]));
- CHECK_NVJPEG(nvjpegBufferPinnedDestroy(params.pinned_buffers[1]));
- CHECK_NVJPEG(nvjpegBufferDeviceDestroy(params.device_buffer));
- CHECK_NVJPEG(nvjpegJpegStateDestroy(params.nvjpeg_decoupled_state));
- CHECK_NVJPEG(nvjpegDecoderDestroy(params.nvjpeg_decoder));
- }
- int decode_images(const FileData &img_data, const std::vector<size_t> &img_len,
- std::vector<nvjpegImage_t> &out, decode_params_t ¶ms,
- double &time) {
- CHECK_CUDA(cudaStreamSynchronize(params.stream));
- std::vector<const unsigned char *> batched_bitstreams;
- std::vector<size_t> batched_bitstreams_size;
- std::vector<nvjpegImage_t> batched_output;
- // bit-streams that batched decode cannot handle
- std::vector<const unsigned char *> otherdecode_bitstreams;
- std::vector<size_t> otherdecode_bitstreams_size;
- std::vector<nvjpegImage_t> otherdecode_output;
- if (params.hw_decode_available) {
- for (int i = 0; i < params.batch_size; i++) {
- // extract bitstream meta data to figure out whether a bit-stream can be
- // decoded
- nvjpegJpegStreamParseHeader(params.nvjpeg_handle,
- (const unsigned char *)img_data[i].data(),
- img_len[i], params.jpeg_streams[0]);
- int isSupported = -1;
- nvjpegDecodeBatchedSupported(params.nvjpeg_handle, params.jpeg_streams[0],
- &isSupported);
- if (isSupported == 0) {
- batched_bitstreams.push_back((const unsigned char *)img_data[i].data());
- batched_bitstreams_size.push_back(img_len[i]);
- batched_output.push_back(out[i]);
- } else {
- otherdecode_bitstreams.push_back(
- (const unsigned char *)img_data[i].data());
- otherdecode_bitstreams_size.push_back(img_len[i]);
- otherdecode_output.push_back(out[i]);
- }
- }
- } else {
- for (int i = 0; i < params.batch_size; i++) {
- otherdecode_bitstreams.push_back(
- (const unsigned char *)img_data[i].data());
- otherdecode_bitstreams_size.push_back(img_len[i]);
- otherdecode_output.push_back(out[i]);
- }
- }
- if (batched_bitstreams.size() > 0) {
- CHECK_NVJPEG(nvjpegDecodeBatchedInitialize(
- params.nvjpeg_handle, params.nvjpeg_state, batched_bitstreams.size(), 1,
- params.fmt));
- CHECK_NVJPEG(nvjpegDecodeBatched(
- params.nvjpeg_handle, params.nvjpeg_state, batched_bitstreams.data(),
- batched_bitstreams_size.data(), batched_output.data(), params.stream));
- }
- if (otherdecode_bitstreams.size() > 0) {
- CHECK_NVJPEG(nvjpegStateAttachDeviceBuffer(params.nvjpeg_decoupled_state,
- params.device_buffer));
- int buffer_index = 0;
- CHECK_NVJPEG(nvjpegDecodeParamsSetOutputFormat(params.nvjpeg_decode_params,
- params.fmt));
- for (int i = 0; i < params.batch_size; i++) {
- CHECK_NVJPEG(nvjpegJpegStreamParse(params.nvjpeg_handle,
- otherdecode_bitstreams[i],
- otherdecode_bitstreams_size[i], 0, 0,
- params.jpeg_streams[buffer_index]));
- CHECK_NVJPEG(nvjpegStateAttachPinnedBuffer(
- params.nvjpeg_decoupled_state, params.pinned_buffers[buffer_index]));
- CHECK_NVJPEG(nvjpegDecodeJpegHost(
- params.nvjpeg_handle, params.nvjpeg_decoder,
- params.nvjpeg_decoupled_state, params.nvjpeg_decode_params,
- params.jpeg_streams[buffer_index]));
- CHECK_CUDA(cudaStreamSynchronize(params.stream));
- CHECK_NVJPEG(nvjpegDecodeJpegTransferToDevice(
- params.nvjpeg_handle, params.nvjpeg_decoder,
- params.nvjpeg_decoupled_state, params.jpeg_streams[buffer_index],
- params.stream));
- buffer_index = 1 - buffer_index; // switch pinned buffer in pipeline mode
- // to avoid an extra sync
- CHECK_NVJPEG(
- nvjpegDecodeJpegDevice(params.nvjpeg_handle, params.nvjpeg_decoder,
- params.nvjpeg_decoupled_state,
- &otherdecode_output[i], params.stream));
- }
- }
- return EXIT_SUCCESS;
- }
- double process_images(const FileNames &image_names, decode_params_t ¶ms,
- double &total, std::vector<nvjpegImage_t> &iout,
- std::vector<FDTensor *> &output_buffers,
- std::vector<int> &widths, std::vector<int> &heights) {
- FDASSERT(image_names.size() == params.batch_size,
- "Number of images and batch size must be equal.");
- // vector for storing raw files and file lengths
- FileData file_data(params.batch_size);
- std::vector<size_t> file_len(params.batch_size);
- FileNames current_names(params.batch_size);
- // we wrap over image files to process total_images of files
- auto file_iter = image_names.begin();
- // output buffer sizes, for convenience
- std::vector<nvjpegImage_t> isz(params.batch_size);
- for (long unsigned int i = 0; i < iout.size(); i++) {
- for (int c = 0; c < NVJPEG_MAX_COMPONENT; c++) {
- iout[i].channel[c] = NULL;
- iout[i].pitch[c] = 0;
- isz[i].pitch[c] = 0;
- }
- }
- if (read_images(image_names, file_data, file_len)) {
- return EXIT_FAILURE;
- }
- if (prepare_buffers(file_data, file_len, widths, heights, iout, isz,
- output_buffers, image_names, params)) {
- return EXIT_FAILURE;
- }
- double time;
- if (decode_images(file_data, file_len, iout, params, time)) {
- return EXIT_FAILURE;
- }
- return EXIT_SUCCESS;
- }
- void init_decoder(decode_params_t ¶ms) {
- params.hw_decode_available = true;
- nvjpegDevAllocator_t dev_allocator = {&dev_malloc, &dev_free};
- nvjpegPinnedAllocator_t pinned_allocator = {&host_malloc, &host_free};
- nvjpegStatus_t status =
- nvjpegCreateEx(NVJPEG_BACKEND_HARDWARE, &dev_allocator, &pinned_allocator,
- NVJPEG_FLAGS_DEFAULT, ¶ms.nvjpeg_handle);
- if (status == NVJPEG_STATUS_ARCH_MISMATCH) {
- std::cout << "Hardware Decoder not supported. "
- "Falling back to default backend"
- << std::endl;
- CHECK_NVJPEG(nvjpegCreateEx(NVJPEG_BACKEND_DEFAULT, &dev_allocator,
- &pinned_allocator, NVJPEG_FLAGS_DEFAULT,
- ¶ms.nvjpeg_handle));
- params.hw_decode_available = false;
- } else {
- CHECK_NVJPEG(status);
- }
- CHECK_NVJPEG(
- nvjpegJpegStateCreate(params.nvjpeg_handle, ¶ms.nvjpeg_state));
- create_decoupled_api_handles(params);
- }
- void destroy_decoder(decode_params_t ¶ms) {
- destroy_decoupled_api_handles(params);
- CHECK_NVJPEG(nvjpegJpegStateDestroy(params.nvjpeg_state));
- CHECK_NVJPEG(nvjpegDestroy(params.nvjpeg_handle));
- }
- } // namespace nvjpeg
- } // namespace vision
- } // namespace ultra_infer
- #endif // ENABLE_NVJPEG
|