utils.cc 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185
  1. // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. #include "ultra_infer/runtime/backends/poros/poros_backend.h"
  15. #ifdef WITH_GPU
  16. #include <cuda_runtime_api.h>
  17. #endif
  18. namespace ultra_infer {
  19. std::string AtType2String(const at::ScalarType &dtype) {
  20. std::string out;
  21. switch (dtype) {
  22. case at::kByte:
  23. out = "at::kByte";
  24. break;
  25. case at::kChar:
  26. out = "at::kChar";
  27. break;
  28. case at::kShort:
  29. out = "at::kShort";
  30. break;
  31. case at::kInt:
  32. out = "at::kInt";
  33. break;
  34. case at::kLong:
  35. out = "at::kLong";
  36. break;
  37. case at::kHalf:
  38. out = "at::kHalf";
  39. break;
  40. case at::kFloat:
  41. out = "at::kFloat";
  42. break;
  43. case at::kDouble:
  44. out = "at::kDouble";
  45. break;
  46. default:
  47. out = "at::UNKNOWN";
  48. }
  49. return out;
  50. }
  51. at::ScalarType GetPorosDtype(const FDDataType &fd_dtype) {
  52. if (fd_dtype == FDDataType::FP32) {
  53. return at::kFloat;
  54. } else if (fd_dtype == FDDataType::FP64) {
  55. return at::kDouble;
  56. } else if (fd_dtype == FDDataType::INT32) {
  57. return at::kInt;
  58. } else if (fd_dtype == FDDataType::INT64) {
  59. return at::kLong;
  60. }
  61. FDERROR << "Unrecognized fastdeply data type:" << Str(fd_dtype) << "."
  62. << std::endl;
  63. return at::kFloat;
  64. }
  65. FDDataType GetFdDtype(const at::ScalarType &poros_dtype) {
  66. if (poros_dtype == at::kFloat) {
  67. return FDDataType::FP32;
  68. } else if (poros_dtype == at::kDouble) {
  69. return FDDataType::FP64;
  70. } else if (poros_dtype == at::kInt) {
  71. return FDDataType::INT32;
  72. } else if (poros_dtype == at::kLong) {
  73. return FDDataType::INT64;
  74. }
  75. FDERROR << "Unrecognized poros data type:" << AtType2String(poros_dtype)
  76. << "." << std::endl;
  77. return FDDataType::FP32;
  78. }
  79. at::Tensor CreatePorosValue(FDTensor &tensor, bool is_backend_cuda) {
  80. FDASSERT(tensor.device == Device::GPU || tensor.device == Device::CPU,
  81. "Only support tensor which device is CPU or GPU for PorosBackend.");
  82. auto data_type = GetPorosDtype(tensor.dtype);
  83. size_t numel = tensor.Numel();
  84. at::Tensor poros_value;
  85. if (is_backend_cuda) {
  86. poros_value = std::move(
  87. at::empty(tensor.shape, {at::kCUDA}).to(data_type).contiguous());
  88. } else {
  89. poros_value = std::move(
  90. at::empty(tensor.shape, {at::kCPU}).to(data_type).contiguous());
  91. }
  92. if (data_type == at::kFloat) {
  93. if (is_backend_cuda) {
  94. cudaMemcpy(poros_value.data_ptr(), static_cast<void *>(tensor.Data()),
  95. numel * sizeof(float), cudaMemcpyHostToDevice);
  96. } else {
  97. memcpy(poros_value.data_ptr(), static_cast<void *>(tensor.Data()),
  98. numel * sizeof(float));
  99. }
  100. } else if (data_type == at::kInt) {
  101. if (is_backend_cuda) {
  102. cudaMemcpy(poros_value.data_ptr(), static_cast<void *>(tensor.Data()),
  103. numel * sizeof(int32_t), cudaMemcpyHostToDevice);
  104. } else {
  105. memcpy(poros_value.data_ptr(), static_cast<void *>(tensor.Data()),
  106. numel * sizeof(int32_t));
  107. }
  108. } else if (data_type == at::kLong) {
  109. if (is_backend_cuda) {
  110. cudaMemcpy(poros_value.data_ptr(), static_cast<void *>(tensor.Data()),
  111. numel * sizeof(int64_t), cudaMemcpyHostToDevice);
  112. } else {
  113. memcpy(poros_value.data_ptr(), static_cast<void *>(tensor.Data()),
  114. numel * sizeof(int64_t));
  115. }
  116. } else if (data_type == at::kDouble) {
  117. if (is_backend_cuda) {
  118. cudaMemcpy(poros_value.data_ptr(), static_cast<void *>(tensor.Data()),
  119. numel * sizeof(double), cudaMemcpyHostToDevice);
  120. } else {
  121. memcpy(poros_value.data_ptr(), static_cast<void *>(tensor.Data()),
  122. numel * sizeof(double));
  123. }
  124. } else {
  125. FDASSERT(false, "Unrecognized data type while calling "
  126. "PorosBackend::CreatePorosValue().");
  127. }
  128. return poros_value;
  129. }
  130. void CopyTensorToCpu(const at::Tensor &tensor, FDTensor *fd_tensor,
  131. bool is_backend_cuda) {
  132. const auto data_type = tensor.scalar_type();
  133. std::vector<int64_t> shape;
  134. auto sizes = tensor.sizes();
  135. for (size_t i = 0; i < sizes.size(); i++) {
  136. shape.push_back(sizes[i]);
  137. }
  138. auto fd_dtype = GetFdDtype(data_type);
  139. fd_tensor->Resize(shape, fd_dtype);
  140. size_t numel = tensor.numel();
  141. // at::Tensor -> FDTensor
  142. if (data_type == at::kFloat) {
  143. if (is_backend_cuda) {
  144. cudaMemcpy(fd_tensor->Data(), tensor.data_ptr(), numel * sizeof(float),
  145. cudaMemcpyDeviceToHost);
  146. } else {
  147. memcpy(fd_tensor->Data(), tensor.data_ptr(), numel * sizeof(float));
  148. }
  149. return;
  150. } else if (data_type == at::kInt) {
  151. if (is_backend_cuda) {
  152. cudaMemcpy(fd_tensor->Data(), tensor.data_ptr(), numel * sizeof(int32_t),
  153. cudaMemcpyDeviceToHost);
  154. } else {
  155. memcpy(fd_tensor->Data(), tensor.data_ptr(), numel * sizeof(int32_t));
  156. }
  157. return;
  158. } else if (data_type == at::kLong) {
  159. if (is_backend_cuda) {
  160. cudaMemcpy(fd_tensor->Data(), tensor.data_ptr(), numel * sizeof(int64_t),
  161. cudaMemcpyDeviceToHost);
  162. } else {
  163. memcpy(fd_tensor->Data(), tensor.data_ptr(), numel * sizeof(int64_t));
  164. }
  165. return;
  166. } else if (data_type == at::kDouble) {
  167. if (is_backend_cuda) {
  168. cudaMemcpy(fd_tensor->Data(), tensor.data_ptr(), numel * sizeof(double),
  169. cudaMemcpyDeviceToHost);
  170. } else {
  171. memcpy(fd_tensor->Data(), tensor.data_ptr(), numel * sizeof(double));
  172. }
  173. return;
  174. }
  175. }
  176. } // namespace ultra_infer