mat_batch.cc 3.5 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192
  1. // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. #include "ultra_infer/vision/common/processors/mat_batch.h"
  15. namespace ultra_infer {
  16. namespace vision {
  17. #ifdef WITH_GPU
  18. void FDMatBatch::SetStream(cudaStream_t s) {
  19. stream = s;
  20. for (size_t i = 0; i < mats->size(); ++i) {
  21. (*mats)[i].SetStream(s);
  22. }
  23. }
  24. #endif
  25. FDTensor *FDMatBatch::Tensor() {
  26. if (has_batched_tensor) {
  27. return fd_tensor.get();
  28. }
  29. FDASSERT(mats != nullptr, "Failed to get batched tensor, Mats are empty.");
  30. FDASSERT(CheckShapeConsistency(mats), "Mats shapes are not consistent.");
  31. // Each mat has its own tensor,
  32. // to get a batched tensor, we need copy these tensors to a batched tensor
  33. FDTensor *src = (*mats)[0].Tensor();
  34. device = src->device;
  35. auto new_shape = src->Shape();
  36. new_shape.insert(new_shape.begin(), mats->size());
  37. input_cache->Resize(new_shape, src->Dtype(), "batch_input_cache", device);
  38. for (size_t i = 0; i < mats->size(); ++i) {
  39. FDASSERT(device == (*mats)[i].Tensor()->device,
  40. "Mats and MatBatch are not on the same device");
  41. uint8_t *p = reinterpret_cast<uint8_t *>(input_cache->Data());
  42. int num_bytes = (*mats)[i].Tensor()->Nbytes();
  43. FDTensor::CopyBuffer(p + i * num_bytes, (*mats)[i].Tensor()->Data(),
  44. num_bytes, device, false);
  45. }
  46. SetTensor(input_cache);
  47. return fd_tensor.get();
  48. }
  49. void FDMatBatch::SetTensor(FDTensor *tensor) {
  50. fd_tensor->SetExternalData(tensor->Shape(), tensor->Dtype(), tensor->Data(),
  51. tensor->device, tensor->device_id);
  52. device = tensor->device;
  53. has_batched_tensor = true;
  54. }
  55. FDTensor *CreateCachedGpuInputTensor(FDMatBatch *mat_batch) {
  56. #ifdef WITH_GPU
  57. // Get the batched tensor
  58. FDTensor *src = mat_batch->Tensor();
  59. // Need to make sure the returned tensor is pointed to the input_cache.
  60. if (src->Data() == mat_batch->output_cache->Data()) {
  61. std::swap(mat_batch->input_cache, mat_batch->output_cache);
  62. std::swap(mat_batch->input_cache->name, mat_batch->output_cache->name);
  63. }
  64. if (src->device == Device::GPU) {
  65. return src;
  66. } else if (src->device == Device::CPU) {
  67. // Batched tensor on CPU, we need copy it to GPU
  68. mat_batch->output_cache->Resize(src->Shape(), src->Dtype(), "output_cache",
  69. Device::GPU);
  70. FDASSERT(cudaMemcpyAsync(mat_batch->output_cache->Data(), src->Data(),
  71. src->Nbytes(), cudaMemcpyHostToDevice,
  72. mat_batch->Stream()) == 0,
  73. "[ERROR] Error occurs while copy memory from CPU to GPU.");
  74. std::swap(mat_batch->input_cache, mat_batch->output_cache);
  75. std::swap(mat_batch->input_cache->name, mat_batch->output_cache->name);
  76. return mat_batch->input_cache;
  77. } else {
  78. FDASSERT(false, "FDMatBatch is on unsupported device: %d", src->device);
  79. }
  80. #else
  81. FDASSERT(false, "UltraInfer didn't compile with WITH_GPU.");
  82. #endif
  83. return nullptr;
  84. }
  85. } // namespace vision
  86. } // namespace ultra_infer