runtime.cc 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174
  1. // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. #include "ultra_infer/pybind/main.h"
  15. namespace ultra_infer {
  16. void BindOption(pybind11::module &m);
  17. void BindRuntime(pybind11::module &m) {
  18. BindOption(m);
  19. pybind11::class_<TensorInfo>(m, "TensorInfo")
  20. .def_readwrite("name", &TensorInfo::name)
  21. .def_readwrite("shape", &TensorInfo::shape)
  22. .def_readwrite("dtype", &TensorInfo::dtype);
  23. pybind11::class_<Runtime>(m, "Runtime")
  24. .def(pybind11::init())
  25. .def("init", &Runtime::Init)
  26. .def("compile",
  27. [](Runtime &self,
  28. std::vector<std::vector<pybind11::array>> &warm_datas,
  29. const RuntimeOption &_option) {
  30. size_t rows = warm_datas.size();
  31. size_t columns = warm_datas[0].size();
  32. std::vector<std::vector<FDTensor>> warm_tensors(
  33. rows, std::vector<FDTensor>(columns));
  34. for (size_t i = 0; i < rows; ++i) {
  35. for (size_t j = 0; j < columns; ++j) {
  36. auto dtype =
  37. NumpyDataTypeToFDDataType(warm_datas[i][j].dtype());
  38. std::vector<int64_t> data_shape;
  39. data_shape.insert(data_shape.begin(), warm_datas[i][j].shape(),
  40. warm_datas[i][j].shape() +
  41. warm_datas[i][j].ndim());
  42. warm_tensors[i][j].Resize(data_shape, dtype);
  43. memcpy(warm_tensors[i][j].MutableData(),
  44. warm_datas[i][j].mutable_data(),
  45. warm_datas[i][j].nbytes());
  46. }
  47. }
  48. return self.Compile(warm_tensors);
  49. })
  50. .def("infer",
  51. [](Runtime &self, std::map<std::string, pybind11::array> &data) {
  52. std::vector<FDTensor> inputs(data.size());
  53. int index = 0;
  54. for (auto iter = data.begin(); iter != data.end(); ++iter) {
  55. std::vector<int64_t> data_shape;
  56. data_shape.insert(data_shape.begin(), iter->second.shape(),
  57. iter->second.shape() + iter->second.ndim());
  58. auto dtype = NumpyDataTypeToFDDataType(iter->second.dtype());
  59. // TODO(jiangjiajun) Maybe skip memory copy is a better choice
  60. // use SetExternalData
  61. inputs[index].Resize(data_shape, dtype);
  62. memcpy(inputs[index].MutableData(), iter->second.mutable_data(),
  63. iter->second.nbytes());
  64. inputs[index].name = iter->first;
  65. index += 1;
  66. }
  67. std::vector<FDTensor> outputs(self.NumOutputs());
  68. self.Infer(inputs, &outputs);
  69. std::vector<pybind11::array> results;
  70. results.reserve(outputs.size());
  71. for (size_t i = 0; i < outputs.size(); ++i) {
  72. auto numpy_dtype = FDDataTypeToNumpyDataType(outputs[i].dtype);
  73. results.emplace_back(
  74. pybind11::array(numpy_dtype, outputs[i].shape));
  75. memcpy(results[i].mutable_data(), outputs[i].Data(),
  76. outputs[i].Numel() * FDDataTypeSize(outputs[i].dtype));
  77. }
  78. return results;
  79. })
  80. .def("infer",
  81. [](Runtime &self, std::map<std::string, FDTensor> &data) {
  82. std::vector<FDTensor> inputs;
  83. inputs.reserve(data.size());
  84. for (auto iter = data.begin(); iter != data.end(); ++iter) {
  85. FDTensor tensor;
  86. tensor.SetExternalData(iter->second.Shape(),
  87. iter->second.Dtype(), iter->second.Data(),
  88. iter->second.device);
  89. tensor.name = iter->first;
  90. inputs.push_back(tensor);
  91. }
  92. std::vector<FDTensor> outputs;
  93. if (!self.Infer(inputs, &outputs)) {
  94. throw std::runtime_error("Failed to inference with Runtime.");
  95. }
  96. return outputs;
  97. })
  98. .def("infer",
  99. [](Runtime &self, std::vector<FDTensor> &inputs) {
  100. std::vector<FDTensor> outputs;
  101. self.Infer(inputs, &outputs);
  102. return outputs;
  103. })
  104. .def("bind_input_tensor", &Runtime::BindInputTensor)
  105. .def("bind_output_tensor", &Runtime::BindOutputTensor)
  106. .def("infer", [](Runtime &self) { self.Infer(); })
  107. .def("get_output_tensor",
  108. [](Runtime &self, const std::string &name) {
  109. FDTensor *output = self.GetOutputTensor(name);
  110. if (output == nullptr) {
  111. return pybind11::cast(nullptr);
  112. }
  113. return pybind11::cast(*output);
  114. })
  115. .def("num_inputs", &Runtime::NumInputs)
  116. .def("num_outputs", &Runtime::NumOutputs)
  117. .def("get_input_info", &Runtime::GetInputInfo)
  118. .def("get_output_info", &Runtime::GetOutputInfo)
  119. .def("get_profile_time", &Runtime::GetProfileTime)
  120. .def_readonly("option", &Runtime::option);
  121. pybind11::enum_<Backend>(m, "Backend", pybind11::arithmetic(),
  122. "Backend for inference.")
  123. .value("UNKOWN", Backend::UNKNOWN)
  124. .value("ORT", Backend::ORT)
  125. .value("TRT", Backend::TRT)
  126. .value("POROS", Backend::POROS)
  127. .value("PDINFER", Backend::PDINFER)
  128. .value("RKNPU2", Backend::RKNPU2)
  129. .value("SOPHGOTPU", Backend::SOPHGOTPU)
  130. .value("TVM", Backend::TVM)
  131. .value("LITE", Backend::LITE)
  132. .value("OMONNPU", Backend::OMONNPU);
  133. pybind11::enum_<ModelFormat>(m, "ModelFormat", pybind11::arithmetic(),
  134. "ModelFormat for inference.")
  135. .value("PADDLE", ModelFormat::PADDLE)
  136. .value("TORCHSCRIPT", ModelFormat::TORCHSCRIPT)
  137. .value("RKNN", ModelFormat::RKNN)
  138. .value("SOPHGO", ModelFormat::SOPHGO)
  139. .value("ONNX", ModelFormat::ONNX)
  140. .value("TVMFormat", ModelFormat::TVMFormat)
  141. .value("OM", ModelFormat::OM);
  142. pybind11::enum_<Device>(m, "Device", pybind11::arithmetic(),
  143. "Device for inference.")
  144. .value("CPU", Device::CPU)
  145. .value("GPU", Device::GPU)
  146. .value("IPU", Device::IPU)
  147. .value("RKNPU", Device::RKNPU)
  148. .value("SOPHGOTPU", Device::SOPHGOTPUD);
  149. pybind11::enum_<FDDataType>(m, "FDDataType", pybind11::arithmetic(),
  150. "Data type of UltraInfer.")
  151. .value("BOOL", FDDataType::BOOL)
  152. .value("INT8", FDDataType::INT8)
  153. .value("INT16", FDDataType::INT16)
  154. .value("INT32", FDDataType::INT32)
  155. .value("INT64", FDDataType::INT64)
  156. .value("FP16", FDDataType::FP16)
  157. .value("FP32", FDDataType::FP32)
  158. .value("FP64", FDDataType::FP64)
  159. .value("UINT8", FDDataType::UINT8);
  160. m.def("get_available_backends", []() { return GetAvailableBackends(); });
  161. }
  162. } // namespace ultra_infer