fd_tensor.cc 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293
  1. // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. #include <dlpack/dlpack.h>
  15. #include "ultra_infer/core/fd_type.h"
  16. #include "ultra_infer/pybind/main.h"
  17. #include "ultra_infer/ultra_infer_model.h"
  18. #include "ultra_infer/utils/utils.h"
  19. namespace ultra_infer {
  20. DLDataType FDToDlpackType(FDDataType fd_dtype) {
  21. DLDataType dl_dtype;
  22. DLDataTypeCode dl_code;
  23. // Number of bits required for the data type.
  24. size_t dt_size = 0;
  25. dl_dtype.lanes = 1;
  26. switch (fd_dtype) {
  27. case FDDataType::BOOL:
  28. dl_code = DLDataTypeCode::kDLInt;
  29. dt_size = 1;
  30. break;
  31. case FDDataType::UINT8:
  32. dl_code = DLDataTypeCode::kDLUInt;
  33. dt_size = 8;
  34. break;
  35. case FDDataType::INT8:
  36. dl_code = DLDataTypeCode::kDLInt;
  37. dt_size = 8;
  38. break;
  39. case FDDataType::INT16:
  40. dl_code = DLDataTypeCode::kDLInt;
  41. dt_size = 16;
  42. break;
  43. case FDDataType::INT32:
  44. dl_code = DLDataTypeCode::kDLInt;
  45. dt_size = 32;
  46. break;
  47. case FDDataType::INT64:
  48. dl_code = DLDataTypeCode::kDLInt;
  49. dt_size = 64;
  50. break;
  51. case FDDataType::FP16:
  52. dl_code = DLDataTypeCode::kDLFloat;
  53. dt_size = 16;
  54. break;
  55. case FDDataType::FP32:
  56. dl_code = DLDataTypeCode::kDLFloat;
  57. dt_size = 32;
  58. break;
  59. case FDDataType::FP64:
  60. dl_code = DLDataTypeCode::kDLFloat;
  61. dt_size = 64;
  62. break;
  63. default:
  64. FDASSERT(false, "Convert to DlPack, FDType \"%s\" is not supported.",
  65. Str(fd_dtype).c_str());
  66. }
  67. dl_dtype.code = dl_code;
  68. dl_dtype.bits = dt_size;
  69. return dl_dtype;
  70. }
  71. FDDataType DlpackToFDType(const DLDataType &data_type) {
  72. FDASSERT(data_type.lanes == 1, "FDTensor does not support dlpack lanes != 1")
  73. if (data_type.code == DLDataTypeCode::kDLFloat) {
  74. if (data_type.bits == 16) {
  75. return FDDataType::FP16;
  76. } else if (data_type.bits == 32) {
  77. return FDDataType::FP32;
  78. } else if (data_type.bits == 64) {
  79. return FDDataType::FP64;
  80. }
  81. }
  82. if (data_type.code == DLDataTypeCode::kDLInt) {
  83. if (data_type.bits == 8) {
  84. return FDDataType::INT8;
  85. } else if (data_type.bits == 16) {
  86. return FDDataType::INT16;
  87. } else if (data_type.bits == 32) {
  88. return FDDataType::INT32;
  89. } else if (data_type.bits == 64) {
  90. return FDDataType::INT64;
  91. } else if (data_type.bits == 1) {
  92. return FDDataType::BOOL;
  93. }
  94. }
  95. if (data_type.code == DLDataTypeCode::kDLUInt) {
  96. if (data_type.bits == 8) {
  97. return FDDataType::UINT8;
  98. }
  99. }
  100. return FDDataType::UNKNOWN1;
  101. }
  102. void DeleteUnusedDltensor(PyObject *dlp) {
  103. if (PyCapsule_IsValid(dlp, "dltensor")) {
  104. DLManagedTensor *dl_managed_tensor =
  105. static_cast<DLManagedTensor *>(PyCapsule_GetPointer(dlp, "dltensor"));
  106. dl_managed_tensor->deleter(dl_managed_tensor);
  107. }
  108. }
  109. pybind11::capsule FDTensorToDLPack(FDTensor &fd_tensor) {
  110. DLManagedTensor *dlpack_tensor = new DLManagedTensor;
  111. dlpack_tensor->dl_tensor.ndim = fd_tensor.shape.size();
  112. dlpack_tensor->dl_tensor.byte_offset = 0;
  113. dlpack_tensor->dl_tensor.data = fd_tensor.MutableData();
  114. dlpack_tensor->dl_tensor.shape = &(fd_tensor.shape[0]);
  115. dlpack_tensor->dl_tensor.strides = nullptr;
  116. dlpack_tensor->manager_ctx = &fd_tensor;
  117. dlpack_tensor->deleter = [](DLManagedTensor *m) {
  118. if (m->manager_ctx == nullptr) {
  119. return;
  120. }
  121. FDTensor *tensor_ptr = reinterpret_cast<FDTensor *>(m->manager_ctx);
  122. pybind11::handle tensor_handle = pybind11::cast(tensor_ptr);
  123. tensor_handle.dec_ref();
  124. free(m);
  125. };
  126. pybind11::handle tensor_handle = pybind11::cast(&fd_tensor);
  127. // Increase the reference count by one to make sure that the DLPack
  128. // representation doesn't become invalid when the tensor object goes out of
  129. // scope.
  130. tensor_handle.inc_ref();
  131. dlpack_tensor->dl_tensor.dtype = FDToDlpackType(fd_tensor.dtype);
  132. dlpack_tensor->dl_tensor.device.device_id = fd_tensor.device_id;
  133. if (fd_tensor.device == Device::GPU) {
  134. if (fd_tensor.is_pinned_memory) {
  135. dlpack_tensor->dl_tensor.device.device_type = DLDeviceType::kDLCUDAHost;
  136. } else {
  137. dlpack_tensor->dl_tensor.device.device_type = DLDeviceType::kDLCUDA;
  138. }
  139. } else {
  140. dlpack_tensor->dl_tensor.device.device_type = DLDeviceType::kDLCPU;
  141. }
  142. return pybind11::capsule(static_cast<void *>(dlpack_tensor), "dltensor",
  143. &DeleteUnusedDltensor);
  144. }
  145. FDTensor FDTensorFromDLPack(const std::string &name,
  146. const pybind11::capsule &dlpack_tensor) {
  147. DLManagedTensor *dl_managed_tensor =
  148. static_cast<DLManagedTensor *>(dlpack_tensor.get_pointer());
  149. void *memory_ptr = dl_managed_tensor->dl_tensor.data;
  150. memory_ptr = reinterpret_cast<char *>(memory_ptr) +
  151. dl_managed_tensor->dl_tensor.byte_offset;
  152. int64_t *strides = dl_managed_tensor->dl_tensor.strides;
  153. int ndim = dl_managed_tensor->dl_tensor.ndim;
  154. std::vector<int64_t> dims(dl_managed_tensor->dl_tensor.shape,
  155. dl_managed_tensor->dl_tensor.shape + ndim);
  156. // Check if the input is contiguous and in C order
  157. if (strides != nullptr) {
  158. int64_t calculated_stride{1};
  159. bool is_contiguous_c_order = true;
  160. for (size_t i = 1; i < dims.size(); i++) {
  161. if (strides[ndim - i] != calculated_stride) {
  162. is_contiguous_c_order = false;
  163. break;
  164. }
  165. calculated_stride *= dims[ndim - i];
  166. }
  167. FDASSERT(is_contiguous_c_order,
  168. "DLPack tensor is not contiguous. Only contiguous DLPack "
  169. "tensors that are stored in C-Order are supported.");
  170. }
  171. Device device;
  172. int32_t device_id = -1;
  173. bool is_pinned_memory = false;
  174. switch (dl_managed_tensor->dl_tensor.device.device_type) {
  175. case DLDeviceType::kDLCUDA:
  176. device = Device::GPU;
  177. device_id = dl_managed_tensor->dl_tensor.device.device_id;
  178. break;
  179. case DLDeviceType::kDLCPU:
  180. device = Device::CPU;
  181. break;
  182. case DLDeviceType::kDLCUDAHost:
  183. device = Device::CPU;
  184. is_pinned_memory = true;
  185. break;
  186. default:
  187. FDASSERT(false,
  188. ("DLDevice type " +
  189. std::to_string(dl_managed_tensor->dl_tensor.device.device_type) +
  190. " is not support by Python backend.")
  191. .c_str());
  192. break;
  193. }
  194. FDDataType dtype = DlpackToFDType(dl_managed_tensor->dl_tensor.dtype);
  195. PyCapsule_SetName(dlpack_tensor.ptr(), "used_dlpack");
  196. FDTensor fd_tensor(name);
  197. fd_tensor.SetExternalData(dims, dtype, memory_ptr, device, device_id);
  198. fd_tensor.is_pinned_memory = is_pinned_memory;
  199. return fd_tensor;
  200. }
  201. void BindFDTensor(pybind11::module &m) {
  202. pybind11::class_<FDTensor>(m, "FDTensor")
  203. .def(pybind11::init<>(), "Default Constructor")
  204. .def_readwrite("name", &FDTensor::name)
  205. .def_readonly("shape", &FDTensor::shape)
  206. .def_readonly("dtype", &FDTensor::dtype)
  207. .def_readonly("device", &FDTensor::device)
  208. .def("numpy", [](FDTensor &self) { return TensorToPyArray(self); })
  209. .def("data", &FDTensor::MutableData)
  210. .def("from_numpy",
  211. [](FDTensor &self, pybind11::array &pyarray,
  212. bool share_buffer = false) {
  213. PyArrayToTensor(pyarray, &self, share_buffer);
  214. })
  215. .def("from_external_data",
  216. [](const std::string &name, size_t data_addr,
  217. const std::vector<int64_t> &shape, const std::string &data_type,
  218. const std::string &data_place, int device_id) {
  219. auto fd_data_type = FDDataType::UNKNOWN1;
  220. if (data_type == "FP32") {
  221. fd_data_type = FDDataType::FP32;
  222. } else if (data_type == "FP16") {
  223. fd_data_type = FDDataType::FP16;
  224. } else if (data_type == "INT32") {
  225. fd_data_type = FDDataType::INT32;
  226. } else if (data_type == "INT64") {
  227. fd_data_type = FDDataType::INT64;
  228. } else {
  229. FDASSERT(false,
  230. "FDTensor.from_external_data, datatype \"%s\" is not "
  231. "supported.",
  232. data_type.c_str());
  233. }
  234. Device fd_data_place;
  235. bool copy = false;
  236. if (data_place.find("gpu") != data_place.npos) {
  237. fd_data_place = Device::GPU;
  238. } else if (data_place.find("cpu") != data_place.npos) {
  239. copy = true;
  240. fd_data_place = Device::CPU;
  241. } else {
  242. FDASSERT(false,
  243. ("Device type " + data_place +
  244. " is not support by FDTensor.from_external_data.")
  245. .c_str());
  246. }
  247. void *data_ptr = nullptr;
  248. data_ptr = reinterpret_cast<void *>(data_addr);
  249. FDTensor fd_tensor(name);
  250. fd_tensor.SetData(shape, fd_data_type,
  251. static_cast<void *>(data_ptr), copy,
  252. fd_data_place, device_id);
  253. return fd_tensor;
  254. })
  255. .def("to_dlpack", &FDTensorToDLPack)
  256. .def("from_dlpack", &FDTensorFromDLPack)
  257. .def("print_info", &FDTensor::PrintInfo);
  258. }
  259. } // namespace ultra_infer