zhengchun
/
PaddleX


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431
							// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "ultra_infer/runtime/runtime.h"

#include <algorithm>
#include <cassert>
#include <chrono>
#include <cstdlib>

#include "ultra_infer/utils/unique_ptr.h"
#include "ultra_infer/utils/utils.h"
#include "yaml-cpp/yaml.h"

#ifdef ENABLE_ORT_BACKEND
#include "ultra_infer/runtime/backends/ort/ort_backend.h"
#endif

#ifdef ENABLE_TRT_BACKEND
#include "ultra_infer/runtime/backends/tensorrt/trt_backend.h"
#endif

#ifdef ENABLE_PADDLE_BACKEND
#include "ultra_infer/runtime/backends/paddle/paddle_backend.h"
#endif

#ifdef ENABLE_POROS_BACKEND
#include "ultra_infer/runtime/backends/poros/poros_backend.h"
#endif

#ifdef ENABLE_OPENVINO_BACKEND
#include "ultra_infer/runtime/backends/openvino/ov_backend.h"
#endif

#ifdef ENABLE_LITE_BACKEND
#include "ultra_infer/runtime/backends/lite/lite_backend.h"
#endif

#ifdef ENABLE_RKNPU2_BACKEND
#include "ultra_infer/runtime/backends/rknpu2/rknpu2_backend.h"
#endif

#ifdef ENABLE_SOPHGO_BACKEND
#include "ultra_infer/runtime/backends/sophgo/sophgo_backend.h"
#endif

#ifdef ENABLE_HORIZON_BACKEND
#include "ultra_infer/runtime/backends/horizon/horizon_backend.h"
#endif

#ifdef ENABLE_TVM_BACKEND
#include "ultra_infer/runtime/backends/tvm/tvm_backend.h"
#endif

namespace ultra_infer {

bool AutoSelectBackend(RuntimeOption &option) {
  auto iter0 = s_default_backends_by_format.find(option.model_format);
  if (iter0 == s_default_backends_by_format.end()) {
    FDERROR << "Cannot found a default backend for model format: "
            << option.model_format
            << ", please define the inference backend in RuntimeOption."
            << std::endl;
    return false;
  }

  auto iter1 = s_default_backends_by_device.find(option.device);
  if (iter1 == s_default_backends_by_device.end()) {
    FDERROR << "Cannot found a default backend for device: " << option.device
            << ", please define the inference backend in RuntimeOption."
            << std::endl;
    return false;
  }

  std::vector<Backend> candidates;
  for (const auto &b0 : iter0->second) {
    for (const auto &b1 : iter1->second) {
      if (b0 == b1) {
        candidates.push_back(b0);
      }
    }
  }

  if (candidates.size() == 0) {
    FDERROR << "Cannot found availabel inference backends by model format: "
            << option.model_format << " with device: " << option.device
            << std::endl;
    return false;
  }

  for (const auto &b : candidates) {
    if (IsBackendAvailable(b)) {
      option.backend = b;
      FDINFO << "UltraInfer will choose " << b << " to inference this model."
             << std::endl;
      return true;
    }
  }
  std::string debug_message = Str(candidates);
  FDERROR << "The candiate backends for " << option.model_format << " & "
          << option.device << " are " << debug_message
          << ", but both of them have not been compiled with current "
             "UltraInfer yet."
          << std::endl;
  return false;
}

bool Runtime::Init(const RuntimeOption &_option) {
  option = _option;

  // Choose default backend by model format and device if backend is not
  // specified
  if (option.backend == Backend::UNKNOWN) {
    if (!AutoSelectBackend(option)) {
      return false;
    }
  }

  if (option.backend == Backend::ORT) {
    CreateOrtBackend();
  } else if (option.backend == Backend::TRT) {
    CreateTrtBackend();
  } else if (option.backend == Backend::PDINFER) {
    CreatePaddleBackend();
  } else if (option.backend == Backend::OPENVINO) {
    CreateOpenVINOBackend();
  } else if (option.backend == Backend::LITE) {
    CreateLiteBackend();
  } else if (option.backend == Backend::RKNPU2) {
    CreateRKNPU2Backend();
  } else if (option.backend == Backend::SOPHGOTPU) {
    CreateSophgoNPUBackend();
  } else if (option.backend == Backend::POROS) {
    CreatePorosBackend();
  } else if (option.backend == Backend::HORIZONNPU) {
    CreateHorizonBackend();
  } else if (option.backend == Backend::TVM) {
    CreateTVMBackend();
  } else {
    std::string msg = Str(GetAvailableBackends());
    FDERROR << "The compiled UltraInfer only supports " << msg << ", "
            << option.backend << " is not supported now." << std::endl;
    return false;
  }
  backend_->benchmark_option_ = option.benchmark_option;
  return true;
}

TensorInfo Runtime::GetInputInfo(int index) {
  return backend_->GetInputInfo(index);
}

TensorInfo Runtime::GetOutputInfo(int index) {
  return backend_->GetOutputInfo(index);
}

std::vector<TensorInfo> Runtime::GetInputInfos() {
  return backend_->GetInputInfos();
}

std::vector<TensorInfo> Runtime::GetOutputInfos() {
  return backend_->GetOutputInfos();
}

bool Runtime::Infer(std::vector<FDTensor> &input_tensors,
                    std::vector<FDTensor> *output_tensors) {
  for (auto &tensor : input_tensors) {
    FDASSERT(tensor.device_id < 0 || tensor.device_id == option.device_id,
             "Device id of input tensor(%d) and runtime(%d) are not same.",
             tensor.device_id, option.device_id);
  }
  return backend_->Infer(input_tensors, output_tensors);
}

bool Runtime::Infer() {
  bool result = false;
  if (option.device == Device::KUNLUNXIN) {
    // FDTensor SetExternalData is not support for Device::KUNLUNXIN
    // now, so, we need to set copy_to_fd as 'true'.
    result = backend_->Infer(input_tensors_, &output_tensors_, true);
  } else {
    result = backend_->Infer(input_tensors_, &output_tensors_, false);
  }

  for (auto &tensor : output_tensors_) {
    tensor.device_id = option.device_id;
  }
  return result;
}

void Runtime::BindInputTensor(const std::string &name, FDTensor &input) {
  bool is_exist = false;
  for (auto &t : input_tensors_) {
    if (t.name == name) {
      is_exist = true;
      t.SetExternalData(input.shape, input.dtype, input.MutableData(),
                        input.device, input.device_id);
      break;
    }
  }
  if (!is_exist) {
    FDTensor new_tensor(name);
    new_tensor.SetExternalData(input.shape, input.dtype, input.MutableData(),
                               input.device, input.device_id);
    input_tensors_.emplace_back(std::move(new_tensor));
  }
}

void Runtime::BindOutputTensor(const std::string &name, FDTensor &output) {
  bool is_exist = false;
  for (auto &t : output_tensors_) {
    if (t.name == name) {
      is_exist = true;
      t.SetExternalData(output.shape, output.dtype, output.MutableData(),
                        output.device, output.device_id);
      break;
    }
  }
  if (!is_exist) {
    FDTensor new_tensor(name);
    new_tensor.SetExternalData(output.shape, output.dtype, output.MutableData(),
                               output.device, output.device_id);
    output_tensors_.emplace_back(std::move(new_tensor));
  }
}
FDTensor *Runtime::GetOutputTensor(const std::string &name) {
  for (auto &t : output_tensors_) {
    if (t.name == name) {
      return &t;
    }
  }
  FDWARNING << "The output name [" << name << "] don't exist." << std::endl;
  return nullptr;
}

void Runtime::ReleaseModelMemoryBuffer() {
  if (option.model_from_memory_) {
    option.model_file.clear();
    option.model_file.shrink_to_fit();
    option.params_file.clear();
    option.params_file.shrink_to_fit();
  }
}

void Runtime::CreatePaddleBackend() {
#ifdef ENABLE_PADDLE_BACKEND
  backend_ = utils::make_unique<PaddleBackend>();
  FDASSERT(backend_->Init(option),
           "Failed to initialized Paddle Inference backend.");
#else
  FDASSERT(false, "PaddleBackend is not available, please compiled with "
                  "ENABLE_PADDLE_BACKEND=ON.");
#endif
  FDINFO << "Runtime initialized with Backend::PDINFER in " << option.device
         << "." << std::endl;

  const char *pirApiFlag = "FLAGS_enable_pir_api";
  const char *envValue = getenv(pirApiFlag);
  if (envValue == nullptr ||
      (std::string(envValue) != "0" && std::string(envValue) != "False")) {
    FDWARNING
        << "To ensure the correct operation of the Paddle backend, please set"
           "the environment variable 'FLAGS_enable_pir_api' to 'False'."
        << std::endl;
  }
}

void Runtime::CreateOpenVINOBackend() {
#ifdef ENABLE_OPENVINO_BACKEND
  backend_ = utils::make_unique<OpenVINOBackend>();
  FDASSERT(backend_->Init(option), "Failed to initialize OpenVINOBackend.");
#else
  FDASSERT(false, "OpenVINOBackend is not available, please compiled with "
                  "ENABLE_OPENVINO_BACKEND=ON.");
#endif
  FDINFO << "Runtime initialized with Backend::OPENVINO in " << option.device
         << "." << std::endl;
}

void Runtime::CreateTVMBackend() {
#ifdef ENABLE_TVM_BACKEND
  backend_ = utils::make_unique<TVMBackend>();
  FDASSERT(backend_->Init(option), "Failed to initialize TVM backend.");
#else
  FDASSERT(false, "TVMBackend is not available, please compiled with "
                  "ENABLE_TVM_BACKEND=ON.");
#endif
  FDINFO << "Runtime initialized with Backend::TVM in " << option.device << "."
         << std::endl;
}

void Runtime::CreateOrtBackend() {
#ifdef ENABLE_ORT_BACKEND
  backend_ = utils::make_unique<OrtBackend>();

  FDASSERT(backend_->Init(option), "Failed to initialize Backend::ORT.");
#else
  FDASSERT(false, "OrtBackend is not available, please compiled with "
                  "ENABLE_ORT_BACKEND=ON.");
#endif
  FDINFO << "Runtime initialized with Backend::ORT in " << option.device << "."
         << std::endl;
}

void Runtime::CreateTrtBackend() {
#ifdef ENABLE_TRT_BACKEND
  backend_ = utils::make_unique<TrtBackend>();
  FDASSERT(backend_->Init(option), "Failed to initialize TensorRT backend.");
#else
  FDASSERT(false, "TrtBackend is not available, please compiled with "
                  "ENABLE_TRT_BACKEND=ON.");
#endif
  FDINFO << "Runtime initialized with Backend::TRT in " << option.device << "."
         << std::endl;
}

void Runtime::CreateLiteBackend() {
#ifdef ENABLE_LITE_BACKEND
  backend_ = utils::make_unique<LiteBackend>();

  FDASSERT(backend_->Init(option),
           "Load model from nb file failed while initializing LiteBackend.");
#else
  FDASSERT(false, "LiteBackend is not available, please compiled with "
                  "ENABLE_LITE_BACKEND=ON.");
#endif
  FDINFO << "Runtime initialized with Backend::PDLITE in " << option.device
         << "." << std::endl;
}

void Runtime::CreateRKNPU2Backend() {
#ifdef ENABLE_RKNPU2_BACKEND
  backend_ = utils::make_unique<RKNPU2Backend>();
  FDASSERT(backend_->Init(option), "Failed to initialize RKNPU2 backend.");
#else
  FDASSERT(false, "RKNPU2Backend is not available, please compiled with "
                  "ENABLE_RKNPU2_BACKEND=ON.");
#endif
  FDINFO << "Runtime initialized with Backend::RKNPU2 in " << option.device
         << "." << std::endl;
}

void Runtime::CreateHorizonBackend() {
#ifdef ENABLE_HORIZON_BACKEND
  backend_ = utils::make_unique<HorizonBackend>();
  FDASSERT(backend_->Init(option), "Failed to initialize Horizon backend.");
#else
  FDASSERT(false, "HorizonBackend is not available, please compiled with ",
           " ENABLE_HORIZON_BACKEND=ON.");
#endif
  FDINFO << "Runtime initialized with Backend::HORIZONNPU in " << option.device
         << "." << std::endl;
}

void Runtime::CreateSophgoNPUBackend() {
#ifdef ENABLE_SOPHGO_BACKEND
  backend_ = utils::make_unique<SophgoBackend>();
  FDASSERT(backend_->Init(option), "Failed to initialize Sophgo backend.");
#else
  FDASSERT(false, "SophgoBackend is not available, please compiled with "
                  "ENABLE_SOPHGO_BACKEND=ON.");
#endif
  FDINFO << "Runtime initialized with Backend::SOPHGO in " << option.device
         << "." << std::endl;
}

Runtime *Runtime::Clone(void *stream, int device_id) {
  Runtime *runtime = new Runtime();
  if (option.backend != Backend::OPENVINO &&
      option.backend != Backend::PDINFER) {
    runtime->Init(option);
    FDWARNING << "Only OpenVINO/Paddle Inference support \
                  clone engine to  reduce CPU/GPU memory usage now. For "
              << option.backend
              << ", UltraInfer will create a new engine which \
                  will not share memory  with the current runtime."
              << std::endl;
    return runtime;
  }
  FDINFO << "Runtime Clone with Backend:: " << option.backend << " in "
         << option.device << "." << std::endl;
  runtime->option = option;
  runtime->backend_ = backend_->Clone(option, stream, device_id);
  return runtime;
}

void Runtime::CreatePorosBackend() {
#ifdef ENABLE_POROS_BACKEND
  backend_ = utils::make_unique<PorosBackend>();
  FDASSERT(backend_->Init(option), "Failed to initialize Poros backend.");
#else
  FDASSERT(false, "PorosBackend is not available, please compiled with "
                  "ENABLE_POROS_BACKEND=ON.");
#endif
  FDINFO << "Runtime initialized with Backend::POROS in " << option.device
         << "." << std::endl;
}

// only for poros backend
bool Runtime::Compile(std::vector<std::vector<FDTensor>> &prewarm_tensors) {
#ifdef ENABLE_POROS_BACKEND
  option.poros_option.device = option.device;
  option.poros_option.device_id = option.device_id;
  option.poros_option.enable_fp16 = option.trt_option.enable_fp16;
  option.poros_option.max_batch_size = option.trt_option.max_batch_size;
  option.poros_option.max_workspace_size = option.trt_option.max_workspace_size;

  auto casted_backend = dynamic_cast<PorosBackend *>(backend_.get());
  FDASSERT(
      casted_backend->Compile(option.model_file, prewarm_tensors,
                              option.poros_option),
      "Load model from Torchscript failed while initliazing PorosBackend.");
#else
  FDASSERT(false, "PorosBackend is not available, please compiled with "
                  "ENABLE_POROS_BACKEND=ON.");
#endif
  return true;
}

} // namespace ultra_infer