transpose.cc 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123
  1. // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. #include "ultra_infer/function/transpose.h"
  15. #include "ultra_infer/function/eigen.h"
  16. #include "ultra_infer/utils/utils.h"
  17. namespace ultra_infer {
  18. namespace function {
  19. template <typename T> struct TransposeNormalKernel {
  20. void operator()(const FDTensor &in, FDTensor *out,
  21. const std::vector<int64_t> &axis) {
  22. const int rank = axis.size();
  23. auto in_stride = GetStride(in.shape);
  24. auto out_stride = GetStride(out->shape);
  25. const T *in_ptr = reinterpret_cast<const T *>(in.Data());
  26. T *out_ptr = reinterpret_cast<T *>(out->Data());
  27. auto transpose_helper = [&](int64_t beg, int64_t end) {
  28. for (int64_t out_idx = beg; out_idx < end; ++out_idx) {
  29. int64_t in_idx = 0;
  30. int64_t tmp_idx = out_idx;
  31. // calculate the input index
  32. for (int i = 0; i < rank; ++i) {
  33. const int64_t coordinate = tmp_idx / out_stride[i];
  34. tmp_idx -= coordinate * out_stride[i];
  35. in_idx += coordinate * in_stride[axis[i]];
  36. }
  37. out_ptr[out_idx] = in_ptr[in_idx];
  38. }
  39. };
  40. transpose_helper(0, out->Numel());
  41. }
  42. };
  43. template <typename T, int Rank> struct TransposeKernelImpl {
  44. void operator()(const FDTensor &in, FDTensor *out,
  45. const std::vector<int64_t> &axis) {
  46. Eigen::array<int, Rank> permute;
  47. for (int i = 0; i < Rank; i++) {
  48. permute[i] = axis[i];
  49. }
  50. auto &place = *EigenDeviceWrapper::GetInstance()->GetDevice();
  51. auto eigen_in = EigenTensor<T, Rank>::From(in);
  52. auto eigen_out = EigenTensor<T, Rank>::From(*out);
  53. eigen_out.device(place) = eigen_in.shuffle(permute);
  54. }
  55. };
  56. template <typename T>
  57. void TransposeKernel(const FDTensor &x, FDTensor *out,
  58. const std::vector<int64_t> &axis) {
  59. int rank = axis.size();
  60. switch (rank) {
  61. case 1:
  62. TransposeKernelImpl<T, 1> trans1;
  63. trans1(x, out, axis);
  64. break;
  65. case 2:
  66. TransposeKernelImpl<T, 2> trans2;
  67. trans2(x, out, axis);
  68. break;
  69. case 3:
  70. TransposeKernelImpl<T, 3> trans3;
  71. trans3(x, out, axis);
  72. break;
  73. case 4:
  74. TransposeKernelImpl<T, 4> trans4;
  75. trans4(x, out, axis);
  76. break;
  77. default:
  78. // for rank >= 4 situation
  79. TransposeNormalKernel<T> trans_normal;
  80. trans_normal(x, out, axis);
  81. }
  82. }
  83. void Transpose(const FDTensor &x, FDTensor *out,
  84. const std::vector<int64_t> &dims) {
  85. size_t dims_size = dims.size();
  86. FDASSERT(dims_size == x.shape.size(),
  87. "The input tensor's dimension should be equal to the dims's size. "
  88. "Expect dims size is %lu, but receive %lu.",
  89. x.shape.size(), dims_size);
  90. std::vector<int> count(dims_size, 0);
  91. for (size_t i = 0; i < dims_size; i++) {
  92. FDASSERT(dims[i] >= 0,
  93. "The dims should be greater than or equal to 0, but receive %lld.",
  94. dims[i]);
  95. FDASSERT(dims[i] < static_cast<int>(dims_size) && ++count[dims[i]] == 1,
  96. "Each element of Attribute axis should be a unique value range "
  97. "from 0 to (dims - 1), where the dims is the axis's size, unique "
  98. "value means this axis value can appear only once. ");
  99. }
  100. std::vector<int64_t> out_dims(dims_size);
  101. for (size_t i = 0; i < dims_size; i++) {
  102. out_dims[i] = x.shape[dims[i]];
  103. }
  104. // Note(zhoushunjie): The FDTensor out may equal to FDTensor x, so firstly we
  105. // use out_temp to get the transposed result, then we move the out_temp to
  106. // out.
  107. FDTensor out_temp;
  108. out_temp.Allocate(out_dims, x.dtype);
  109. FD_VISIT_ALL_TYPES(x.dtype, "TransposeKernel",
  110. ([&] { TransposeKernel<data_t>(x, &out_temp, dims); }));
  111. *out = std::move(out_temp);
  112. }
  113. } // namespace function
  114. } // namespace ultra_infer