// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. /*! \file runtime.h \brief A brief file description. More details */ #pragma once #include "ultra_infer/core/fd_tensor.h" #include "ultra_infer/runtime/backends/backend.h" #include "ultra_infer/runtime/runtime_option.h" #include "ultra_infer/utils/perf.h" /** \brief All C++ UltraInfer APIs are defined inside this namespace * */ namespace ultra_infer { /*! @brief Runtime object used to inference the loaded model on different * devices */ struct ULTRAINFER_DECL Runtime { public: /// Initialize a Runtime object with RuntimeOption bool Init(const RuntimeOption &_option); /** \brief Inference the model by the input data, and write to the output * * \param[in] input_tensors Notice the FDTensor::name should keep same with * the model's input \param[in] output_tensors Inference results \return true * if the inference succeeded, otherwise false */ bool Infer(std::vector &input_tensors, std::vector *output_tensors); /** \brief No params inference the model. * * the input and output data need to pass through the BindInputTensor and * GetOutputTensor interfaces. */ bool Infer(); /** \brief Get number of inputs */ int NumInputs() { return backend_->NumInputs(); } /** \brief Get number of outputs */ int NumOutputs() { return backend_->NumOutputs(); } /** \brief Get input information by index */ TensorInfo GetInputInfo(int index); /** \brief Get output information by index */ TensorInfo GetOutputInfo(int index); /** \brief Get all the input information */ std::vector GetInputInfos(); /** \brief Get all the output information */ std::vector GetOutputInfos(); /** \brief Bind FDTensor by name, no copy and share input memory */ void BindInputTensor(const std::string &name, FDTensor &input); /** \brief Bind FDTensor by name, no copy and share output memory. * Please make share the correctness of tensor shape of output. */ void BindOutputTensor(const std::string &name, FDTensor &output); /** \brief Get output FDTensor by name, no copy and share backend output * memory */ FDTensor *GetOutputTensor(const std::string &name); /** \brief Clone new Runtime when multiple instances of the same model are * created * * \param[in] stream CUDA Stream, default param is nullptr * \return new Runtime* by this clone */ Runtime *Clone(void *stream = nullptr, int device_id = -1); void ReleaseModelMemoryBuffer(); RuntimeOption option; /** \brief Compile TorchScript Module, only for Poros backend * * \param[in] prewarm_tensors Prewarm datas for compile * \return true if compile succeeded, otherwise false */ bool Compile(std::vector> &prewarm_tensors); /** \brief Get profile time of Runtime after the profile process is done. */ double GetProfileTime() { return backend_->benchmark_result_.time_of_runtime; } private: void CreateOrtBackend(); void CreatePaddleBackend(); void CreateTrtBackend(); void CreateOpenVINOBackend(); void CreateLiteBackend(); void CreateRKNPU2Backend(); void CreateHorizonBackend(); void CreateSophgoNPUBackend(); void CreatePorosBackend(); void CreateTVMBackend(); void CreateOMBackend(); std::unique_ptr backend_; std::vector input_tensors_; std::vector output_tensors_; }; } // namespace ultra_infer