runtime.h 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127
  1. // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. /*! \file runtime.h
  15. \brief A brief file description.
  16. More details
  17. */
  18. #pragma once
  19. #include "ultra_infer/core/fd_tensor.h"
  20. #include "ultra_infer/runtime/backends/backend.h"
  21. #include "ultra_infer/runtime/runtime_option.h"
  22. #include "ultra_infer/utils/perf.h"
  23. /** \brief All C++ UltraInfer APIs are defined inside this namespace
  24. *
  25. */
  26. namespace ultra_infer {
  27. /*! @brief Runtime object used to inference the loaded model on different
  28. * devices
  29. */
  30. struct ULTRAINFER_DECL Runtime {
  31. public:
  32. /// Initialize a Runtime object with RuntimeOption
  33. bool Init(const RuntimeOption &_option);
  34. /** \brief Inference the model by the input data, and write to the output
  35. *
  36. * \param[in] input_tensors Notice the FDTensor::name should keep same with
  37. * the model's input \param[in] output_tensors Inference results \return true
  38. * if the inference succeeded, otherwise false
  39. */
  40. bool Infer(std::vector<FDTensor> &input_tensors,
  41. std::vector<FDTensor> *output_tensors);
  42. /** \brief No params inference the model.
  43. *
  44. * the input and output data need to pass through the BindInputTensor and
  45. * GetOutputTensor interfaces.
  46. */
  47. bool Infer();
  48. /** \brief Get number of inputs
  49. */
  50. int NumInputs() { return backend_->NumInputs(); }
  51. /** \brief Get number of outputs
  52. */
  53. int NumOutputs() { return backend_->NumOutputs(); }
  54. /** \brief Get input information by index
  55. */
  56. TensorInfo GetInputInfo(int index);
  57. /** \brief Get output information by index
  58. */
  59. TensorInfo GetOutputInfo(int index);
  60. /** \brief Get all the input information
  61. */
  62. std::vector<TensorInfo> GetInputInfos();
  63. /** \brief Get all the output information
  64. */
  65. std::vector<TensorInfo> GetOutputInfos();
  66. /** \brief Bind FDTensor by name, no copy and share input memory
  67. */
  68. void BindInputTensor(const std::string &name, FDTensor &input);
  69. /** \brief Bind FDTensor by name, no copy and share output memory.
  70. * Please make share the correctness of tensor shape of output.
  71. */
  72. void BindOutputTensor(const std::string &name, FDTensor &output);
  73. /** \brief Get output FDTensor by name, no copy and share backend output
  74. * memory
  75. */
  76. FDTensor *GetOutputTensor(const std::string &name);
  77. /** \brief Clone new Runtime when multiple instances of the same model are
  78. * created
  79. *
  80. * \param[in] stream CUDA Stream, default param is nullptr
  81. * \return new Runtime* by this clone
  82. */
  83. Runtime *Clone(void *stream = nullptr, int device_id = -1);
  84. void ReleaseModelMemoryBuffer();
  85. RuntimeOption option;
  86. /** \brief Compile TorchScript Module, only for Poros backend
  87. *
  88. * \param[in] prewarm_tensors Prewarm datas for compile
  89. * \return true if compile succeeded, otherwise false
  90. */
  91. bool Compile(std::vector<std::vector<FDTensor>> &prewarm_tensors);
  92. /** \brief Get profile time of Runtime after the profile process is done.
  93. */
  94. double GetProfileTime() {
  95. return backend_->benchmark_result_.time_of_runtime;
  96. }
  97. private:
  98. void CreateOrtBackend();
  99. void CreatePaddleBackend();
  100. void CreateTrtBackend();
  101. void CreateOpenVINOBackend();
  102. void CreateLiteBackend();
  103. void CreateRKNPU2Backend();
  104. void CreateHorizonBackend();
  105. void CreateSophgoNPUBackend();
  106. void CreatePorosBackend();
  107. void CreateTVMBackend();
  108. void CreateOMBackend();
  109. std::unique_ptr<BaseBackend> backend_;
  110. std::vector<FDTensor> input_tensors_;
  111. std::vector<FDTensor> output_tensors_;
  112. };
  113. } // namespace ultra_infer