zhengchun
/
PaddleX


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158
							// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <iostream>
#include <memory>
#include <string>
#include <vector>

#include "ultra_infer/benchmark/benchmark.h"
#include "ultra_infer/core/fd_tensor.h"
#include "ultra_infer/core/fd_type.h"
#include "ultra_infer/runtime/runtime_option.h"

namespace ultra_infer {

/*! @brief Information of Tensor
 */
struct TensorInfo {
  std::string name;       ///< Name of tensor
  std::vector<int> shape; ///< Shape of tensor
  FDDataType dtype;       ///< Data type of tensor

  friend std::ostream &operator<<(std::ostream &output,
                                  const TensorInfo &info) {
    output << "TensorInfo(name: " << info.name << ", shape: [";
    for (size_t i = 0; i < info.shape.size(); ++i) {
      if (i == info.shape.size() - 1) {
        output << info.shape[i];
      } else {
        output << info.shape[i] << ", ";
      }
    }
    output << "], dtype: " << Str(info.dtype) << ")";
    return output;
  }
};

class BaseBackend {
public:
  bool initialized_ = false;

  BaseBackend() {}
  virtual ~BaseBackend() = default;

  virtual bool Initialized() const { return initialized_; }

  virtual bool Init(const RuntimeOption &option) {
    FDERROR << "Not Implement for " << option.backend << " in " << option.device
            << "." << std::endl;
    return false;
  }

  // Get number of inputs of the model
  virtual int NumInputs() const = 0;
  // Get number of outputs of the model
  virtual int NumOutputs() const = 0;
  // Get information of input tensor
  virtual TensorInfo GetInputInfo(int index) = 0;
  // Get information of output tensor
  virtual TensorInfo GetOutputInfo(int index) = 0;
  // Get information of all the input tensors
  virtual std::vector<TensorInfo> GetInputInfos() = 0;
  // Get information of all the output tensors
  virtual std::vector<TensorInfo> GetOutputInfos() = 0;

  // if copy_to_fd is true, copy memory data to FDTensor
  // else share memory to FDTensor(only Paddle、ORT、TRT、OpenVINO support it)
  virtual bool Infer(std::vector<FDTensor> &inputs,
                     std::vector<FDTensor> *outputs,
                     bool copy_to_fd = true) = 0;
  // Optional: For those backends which can share memory
  // while creating multiple inference engines with same model file
  virtual std::unique_ptr<BaseBackend> Clone(RuntimeOption &runtime_option,
                                             void *stream = nullptr,
                                             int device_id = -1) {
    FDERROR << "Clone no support " << runtime_option.backend << " " << stream
            << " " << device_id << std::endl;
    return nullptr;
  }

  benchmark::BenchmarkOption benchmark_option_;
  benchmark::BenchmarkResult benchmark_result_;
};

/** \brief Macros for Runtime benchmark profiling.
 * The param 'base_loop' for 'RUNTIME_PROFILE_LOOP_BEGIN'
 * indicates that the least number of times the loop
 * will repeat when profiling mode is not enabled.
 * In most cases, the value should be 1, i.e., results are
 * obtained by running the inference process once, when
 * the profile mode is turned off, such as ONNX Runtime,
 * OpenVINO, TensorRT, Paddle Inference, Paddle Lite,
 * RKNPU2, SOPHGO etc.
 *
 * example code @code
 * // OpenVINOBackend::Infer
 * RUNTIME_PROFILE_LOOP_H2D_D2H_BEGIN
 * // do something ....
 * RUNTIME_PROFILE_LOOP_BEGIN(1)
 * // The codes which wrapped by 'BEGIN(1) ~ END' scope
 * // will only run once when profiling mode is not enabled.
 * request_.infer();
 * RUNTIME_PROFILE_LOOP_END
 * // do something ....
 * RUNTIME_PROFILE_LOOP_H2D_D2H_END
 *
 * @endcode In this case, No global variables inside a function
 * are wrapped by BEGIN and END, which may be required for
 * subsequent tasks. But, some times we need to set 'base_loop'
 * as 0, such as POROS.
 *
 * * example code @code
 * // PorosBackend::Infer
 * RUNTIME_PROFILE_LOOP_H2D_D2H_BEGIN
 * // do something ....
 * RUNTIME_PROFILE_LOOP_BEGIN(0) // set 'base_loop' as 0
 * // The codes which wrapped by 'BEGIN(0) ~ END' scope
 * // will not run when profiling mode is not enabled.
 * auto poros_outputs = _poros_module->forward(poros_inputs);
 * RUNTIME_PROFILE_LOOP_END
 * // Run another inference beyond the scope of 'BEGIN ~ END'
 * // to get valid outputs for subsequent tasks.
 * auto poros_outputs = _poros_module->forward(poros_inputs);
 * // do something .... will use 'poros_outputs' ...
 * if (poros_outputs.isTensor()) {
 * // ...
 * }
 * RUNTIME_PROFILE_LOOP_H2D_D2H_END
 *
 * @endcode In this case, 'poros_outputs' inside a function
 * are wrapped by BEGIN and END, which may be required for
 * subsequent tasks. So, we set 'base_loop' as 0 and launch
 * another infer to get the valid outputs beyond the scope
 * of 'BEGIN ~ END' for subsequent tasks.
 */

#define RUNTIME_PROFILE_LOOP_BEGIN(base_loop)                                  \
  __RUNTIME_PROFILE_LOOP_BEGIN(benchmark_option_, (base_loop))
#define RUNTIME_PROFILE_LOOP_END __RUNTIME_PROFILE_LOOP_END(benchmark_result_)
#define RUNTIME_PROFILE_LOOP_H2D_D2H_BEGIN                                     \
  __RUNTIME_PROFILE_LOOP_H2D_D2H_BEGIN(benchmark_option_, 1)
#define RUNTIME_PROFILE_LOOP_H2D_D2H_END                                       \
  __RUNTIME_PROFILE_LOOP_H2D_D2H_END(benchmark_result_)

} // namespace ultra_infer