zhengchun
/
PaddleX


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189
							// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "ultra_infer/runtime.h"

namespace ultra_infer {

/*! @brief Base model object for all the vision models
 */
class ULTRAINFER_DECL UltraInferModel {
public:
  /// Get model's name
  virtual std::string ModelName() const { return "NameUndefined"; }

  /** \brief Inference the model by the runtime. This interface is included in
   * the `Predict()` function, so we don't call `Infer()` directly in most
   * common situation
   */
  virtual bool Infer(std::vector<FDTensor> &input_tensors,
                     std::vector<FDTensor> *output_tensors);

  /** \brief Inference the model by the runtime. This interface is using class
   * member reused_input_tensors_ to do inference and writing results to
   * reused_output_tensors_
   */
  virtual bool Infer();

  RuntimeOption runtime_option;
  /** \brief Model's valid cpu backends. This member defined all the cpu
   * backends have successfully tested for the model
   */
  std::vector<Backend> valid_cpu_backends = {Backend::ORT};
  /** Model's valid gpu backends. This member defined all the gpu backends have
   * successfully tested for the model
   */
  std::vector<Backend> valid_gpu_backends = {Backend::ORT};
  /** Model's valid ipu backends. This member defined all the ipu backends have
   * successfully tested for the model
   */
  std::vector<Backend> valid_ipu_backends = {};
  /** Model's valid timvx backends. This member defined all the timvx backends
   * have successfully tested for the model
   */
  std::vector<Backend> valid_timvx_backends = {};
  /** Model's valid directml backends. This member defined all the onnxruntime
   * directml backends have successfully tested for the model
   */
  std::vector<Backend> valid_directml_backends = {};
  /** Model's valid ascend backends. This member defined all the cann backends
   * have successfully tested for the model
   */
  std::vector<Backend> valid_ascend_backends = {};
  /** Model's valid KunlunXin xpu backends. This member defined all the
   * KunlunXin xpu backends have successfully tested for the model
   */
  std::vector<Backend> valid_kunlunxin_backends = {};
  /** Model's valid hardware backends. This member defined all the gpu backends
   * have successfully tested for the model
   */
  std::vector<Backend> valid_rknpu_backends = {};
  /** Model's valid hardware backends. This member defined all the sophgo npu
   * backends have successfully tested for the model
   */
  std::vector<Backend> valid_horizon_backends = {};
  std::vector<Backend> valid_sophgonpu_backends = {};

  /// Get number of inputs for this model
  virtual int NumInputsOfRuntime() { return runtime_->NumInputs(); }
  /// Get number of outputs for this model
  virtual int NumOutputsOfRuntime() { return runtime_->NumOutputs(); }
  /// Get input information for this model
  virtual TensorInfo InputInfoOfRuntime(int index) {
    return runtime_->GetInputInfo(index);
  }
  /// Get output information for this model
  virtual TensorInfo OutputInfoOfRuntime(int index) {
    return runtime_->GetOutputInfo(index);
  }
  /// Check if the model is initialized successfully
  virtual bool Initialized() const {
    return runtime_initialized_ && initialized;
  }

  /** \brief This is a debug interface, used to record the time of runtime
   * (backend + h2d + d2h)
   *
   * example code @code
   * auto model = ultra_infer::vision::PPYOLOE("model.pdmodel",
   * "model.pdiparams", "infer_cfg.yml"); if (!model.Initialized()) { std::cerr
   * << "Failed to initialize." << std::endl; return -1;
   * }
   * model.EnableRecordTimeOfRuntime();
   * cv::Mat im = cv::imread("test.jpg");
   * for (auto i = 0; i < 1000; ++i) {
   *   ultra_infer::vision::DetectionResult result;
   *   model.Predict(&im, &result);
   * }
   * model.PrintStatisInfoOfRuntime();
   * @endcode After called the `PrintStatisInfoOfRuntime()`, the statistical
   * information of runtime will be printed in the console
   */
  virtual void EnableRecordTimeOfRuntime() {
    time_of_runtime_.clear();
    std::vector<double>().swap(time_of_runtime_);
    enable_record_time_of_runtime_ = true;
  }

  /** \brief Disable to record the time of runtime, see
   * `EnableRecordTimeOfRuntime()` for more detail
   */
  virtual void DisableRecordTimeOfRuntime() {
    enable_record_time_of_runtime_ = false;
  }

  /** \brief Print the statistic information of runtime in the console, see
   * function `EnableRecordTimeOfRuntime()` for more detail
   */
  virtual std::map<std::string, float> PrintStatisInfoOfRuntime();

  /** \brief Check if the `EnableRecordTimeOfRuntime()` method is enabled.
   */
  virtual bool EnabledRecordTimeOfRuntime() {
    return enable_record_time_of_runtime_;
  }
  /** \brief Get profile time of Runtime after the profile process is done.
   */
  virtual double GetProfileTime() { return runtime_->GetProfileTime(); }
  /** \brief Release reused input/output buffers
   */
  virtual void ReleaseReusedBuffer() {
    std::vector<FDTensor>().swap(reused_input_tensors_);
    std::vector<FDTensor>().swap(reused_output_tensors_);
  }

  virtual ultra_infer::Runtime *CloneRuntime() { return runtime_->Clone(); }

  virtual bool SetRuntime(ultra_infer::Runtime *clone_runtime) {
    runtime_ = std::unique_ptr<Runtime>(clone_runtime);
    return true;
  }

  virtual std::unique_ptr<UltraInferModel> Clone() {
    FDERROR << ModelName() << " doesn't support Cone() now." << std::endl;
    return nullptr;
  }

protected:
  virtual bool InitRuntime();

  bool initialized = false;
  // Reused input tensors
  std::vector<FDTensor> reused_input_tensors_;
  // Reused output tensors
  std::vector<FDTensor> reused_output_tensors_;

private:
  bool InitRuntimeWithSpecifiedBackend();
  bool InitRuntimeWithSpecifiedDevice();
  bool CreateCpuBackend();
  bool CreateGpuBackend();
  bool CreateIpuBackend();
  bool CreateRKNPUBackend();
  bool CreateHorizonBackend();
  bool CreateSophgoNPUBackend();
  bool CreateTimVXBackend();
  bool CreateKunlunXinBackend();
  bool CreateASCENDBackend();
  bool CreateDirectMLBackend();
  bool IsSupported(const std::vector<Backend> &backends, Backend backend);

  std::shared_ptr<Runtime> runtime_;
  bool runtime_initialized_ = false;
  // whether to record inference time
  bool enable_record_time_of_runtime_ = false;
  std::vector<double> time_of_runtime_;
};

} // namespace ultra_infer