zhengchun
/
PaddleX


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650
							// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "ultra_infer/runtime/backends/paddle/paddle_backend.h"

#include <sstream>

#include "ultra_infer/utils/path.h"

namespace ultra_infer {

void PaddleBackend::BuildOption(const PaddleBackendOption &option) {
  option_ = option;
  if (option.device == Device::GPU) {
    auto inference_precision = paddle_infer::PrecisionType::kFloat32;
    if (option_.inference_precision == "float32") {
      FDINFO << "Will inference_precision float32" << std::endl;
      inference_precision = paddle_infer::PrecisionType::kFloat32;
    } else if (option_.inference_precision == "float16") {
      FDINFO << "Will inference_precision float16" << std::endl;
      inference_precision = paddle_infer::PrecisionType::kHalf;
    } else if (option_.inference_precision == "bfloat16") {
      FDINFO << "Will inference_precision bfloat16" << std::endl;
      inference_precision = paddle_infer::PrecisionType::kBf16;
    } else if (option_.inference_precision == "int8") {
      FDINFO << "Will inference_precision int8" << std::endl;
      inference_precision = paddle_infer::PrecisionType::kInt8;
    } else {
      FDERROR << "paddle inference only support precision in float32,"
              << " float16, bfloat16 and int8" << std::endl;
    }
    config_.Exp_DisableMixedPrecisionOps({"feed", "fetch"});
    config_.EnableUseGpu(option.gpu_mem_init_size, option.device_id,
                         inference_precision);
    // config_.EnableUseGpu(option.gpu_mem_init_size, option.device_id);
    if (option_.switch_ir_debug) {
      FDINFO << "Will Enable ir_debug for Paddle Backend." << std::endl;
      config_.SwitchIrDebug();
    }
    if (option_.enable_inference_cutlass) {
#ifdef PADDLEINFERENCE_API_COMPAT_2_4_x
      FDWARNING
          << "Your are using Paddle infernence 2.4.x, cutlass is not supported!"
          << std::endl;
#else
      FDINFO << "Will enable_inference_cutlass" << std::endl;
      config_.Exp_EnableUseCutlass();
#endif
    }
    if (option_.external_stream_) {
      FDINFO << "Will use external stream for Paddle Backend." << std::endl;
      config_.SetExecStream(option_.external_stream_);
    }
    if (option.enable_trt) {
      if (!option.trt_option.enable_fp16) {
        FDINFO << "Will try to use tensorrt inference with Paddle Backend."
               << std::endl;
      }
      config_.Exp_DisableTensorRtOPs(option.trt_disabled_ops_);
      auto precision = paddle_infer::PrecisionType::kFloat32;
      if (option.trt_option.enable_fp16) {
        FDINFO << "Will try to use tensorrt fp16 inference with Paddle Backend."
               << std::endl;
        precision = paddle_infer::PrecisionType::kHalf;
      }
      bool use_static = false;
      if (option.trt_option.serialize_file != "") {
        FDWARNING
            << "Detect that tensorrt cache file has been set to "
            << option.trt_option.serialize_file
            << ", but while enable paddle2trt, please notice that the cache "
               "file will save to the directory where paddle model saved."
            << std::endl;
        use_static = true;
        std::string opt_cache_dir =
            GetDirFromPath(option.trt_option.serialize_file);

        config_.SetOptimCacheDir(opt_cache_dir);
      }
      config_.EnableTensorRtEngine(option.trt_option.max_workspace_size,
                                   option.trt_option.max_batch_size,
                                   option.trt_min_subgraph_size, precision,
                                   use_static);

      SetTRTDynamicShapeToConfig(option);
      if (option_.enable_fixed_size_opt) {
        paddle_infer::experimental::InternalUtils::SetTransformerMaskid(
            &config_, "opt");
      }
    }
  } else if (option.device == Device::IPU) {
#ifdef WITH_IPU
    config_.EnableIpu(option.ipu_option.ipu_device_num,
                      option.ipu_option.ipu_micro_batch_size,
                      option.ipu_option.ipu_enable_pipelining,
                      option.ipu_option.ipu_batches_per_step);
    config_.SetIpuConfig(option.ipu_option.ipu_enable_fp16,
                         option.ipu_option.ipu_replica_num,
                         option.ipu_option.ipu_available_memory_proportion,
                         option.ipu_option.ipu_enable_half_partial);
#else
    FDWARNING << "The UltraInfer is not compiled with IPU device, so will "
                 "fallback to CPU with Paddle Inference Backend."
              << std::endl;
#endif
  } else if (option.device == Device::KUNLUNXIN) {
#ifdef WITH_KUNLUNXIN
    // Note(qiuyanjun): For Paddle XPU L3 Cache, please set
    // export XPU_PADDLE_L3_SIZE=67104768 (XPU R200)
    // export FLAGS_fuse_multi_transformer_quant_type="float"
    config_.EnableXpu(option.xpu_option.kunlunxin_l3_workspace_size,
                      option.xpu_option.kunlunxin_locked,
                      option.xpu_option.kunlunxin_autotune,
                      option.xpu_option.kunlunxin_autotune_file,
                      option.xpu_option.kunlunxin_precision,
                      option.xpu_option.kunlunxin_adaptive_seqlen,
                      option.xpu_option.kunlunxin_enable_multi_stream);
    config_.SetXpuConfig(
        option.xpu_option.kunlunxin_quant_post_dynamic_weight_bits,
        option.xpu_option.kunlunxin_quant_post_dynamic_op_types);
    config_.SetXpuDeviceId(option.xpu_option.kunlunxin_device_id);
#else
    FDWARNING
        << "The UltraInfer is not compiled with KUNLUNXIN device, so will "
           "fallback to CPU with Paddle Inference Backend."
        << std::endl;
#endif
  } else {
    config_.DisableGpu();
    if (option.enable_mkldnn) {
      config_.EnableMKLDNN();
      config_.SetMkldnnCacheCapacity(option.mkldnn_cache_size);
    } else {
#if defined(PADDLEINFERENCE_API_COMPAT_2_6_x) ||                               \
    (PADDLEINFERENCE_VERSION_MAJOR != 2)
      config_.DisableMKLDNN();
#endif
    }
  }

  if (!option.enable_log_info) {
    config_.DisableGlogInfo();
  }
  if (option.cpu_thread_num <= 0) {
    config_.SetCpuMathLibraryNumThreads(8);
  } else {
    config_.SetCpuMathLibraryNumThreads(option.cpu_thread_num);
  }
  // Note: SwitchIrOptim is enabled by default for paddle inference
  // backend. So, we don't need to set it manually.
  // config_.SwitchIrOptim(option.switch_ir_optimize);

  if (option.enable_new_ir) {
#if PADDLEINFERENCE_VERSION_MAJOR == 2
    FDWARNING << "UltraInfer was compiled with Paddle Inference v2.0+ "
                 "which does not support the new IR."
              << std::endl;
#else
    if (option.device == Device::GPU && option.enable_trt) {
      FDWARNING << "Currently, Paddle-TensorRT does not support the new IR, "
                   "and the old IR will be used."
                << std::endl;
    } else {
      config_.EnableNewIR();
      config_.EnableNewExecutor();
      if (option.device == Device::CPU || option.device == Device::GPU) {
        config_.SetOptimizationLevel(3);
      }
    }
#endif
  }
}

bool PaddleBackend::Init(const RuntimeOption &runtime_option) {
  if (!(Supported(runtime_option.model_format, Backend::PDINFER) &&
        Supported(runtime_option.device, Backend::PDINFER))) {
    return false;
  }

  auto option = runtime_option;
  // Collect basic paddle inference option and trt option.
  option.paddle_infer_option.model_file = runtime_option.model_file;
  option.paddle_infer_option.params_file = runtime_option.params_file;
  option.paddle_infer_option.model_from_memory_ =
      runtime_option.model_from_memory_;
  option.paddle_infer_option.device = runtime_option.device;
  option.paddle_infer_option.device_id = runtime_option.device_id;
  option.paddle_infer_option.enable_pinned_memory =
      runtime_option.enable_pinned_memory;
  option.paddle_infer_option.external_stream_ = runtime_option.external_stream_;
  option.paddle_infer_option.trt_option = runtime_option.trt_option;
  option.paddle_infer_option.trt_option.gpu_id = runtime_option.device_id;
  // Note(qiuyanjun): For Ipu option and XPU option, please check the
  // details of RuntimeOption::UseIpu() and RuntimeOption::UseKunlunXin().
  // Futhermore, please check paddle_infer_option.SetIpuConfig() and
  // paddle_infer_option.SetXpuConfig() for more details of extra configs.
  return InitFromPaddle(option.model_file, option.params_file,
                        option.model_from_memory_, option.paddle_infer_option);
}

bool PaddleBackend::InitFromPaddle(const std::string &model,
                                   const std::string &params,
                                   bool model_from_memory,
                                   const PaddleBackendOption &option) {
  if (initialized_) {
    FDERROR << "PaddleBackend is already initlized, cannot initialize again."
            << std::endl;
    return false;
  }
  if (model_from_memory) {
    config_.SetModelBuffer(model.c_str(), model.size(), params.c_str(),
                           params.size());
  } else {
    config_.SetModel(model, params);
  }
  if (option.enable_memory_optimize) {
    config_.EnableMemoryOptim();
  }
  BuildOption(option);
  // The input/output information get from predictor is not right, use
  // PaddleReader instead now
  std::string model_content = model;
  if (!model_from_memory) {
    FDASSERT(ReadBinaryFromFile(model, &model_content),
             "Failed to read file %s.", model.c_str());
  }

  if (option.is_quantize_model) {
    if (option.device == Device::GPU) {
      FDWARNING << "The loaded model is a quantized model, while inference on "
                   "GPU, please use TensorRT backend to get better performance."
                << std::endl;
      if (option.enable_trt) {
        bool use_static = false;
        if (option.trt_option.serialize_file != "") {
          FDWARNING
              << "Detect that tensorrt cache file has been set to "
              << option.trt_option.serialize_file
              << ", but while enable paddle2trt, please notice that the cache "
                 "file will save to the directory where paddle model saved."
              << std::endl;
          use_static = true;
        }
#if PADDLEINFERENCE_VERSION_MAJOR != 2
        config_.EnableTensorRtEngine(
            option.trt_option.max_workspace_size,
            option.trt_option.max_batch_size, option.trt_min_subgraph_size,
            paddle_infer::PrecisionType::kInt8, use_static, false, true);
#else
        config_.EnableTensorRtEngine(
            option.trt_option.max_workspace_size,
            option.trt_option.max_batch_size, option.trt_min_subgraph_size,
            paddle_infer::PrecisionType::kInt8, use_static, false);
#endif
        SetTRTDynamicShapeToConfig(option);
      }
    }
    if (option.enable_mkldnn) {
      config_.EnableMkldnnInt8();
    } else {
      FDWARNING << "The loaded model is a quantized model, while inference on "
                   "CPU, please enable MKLDNN to get better performance."
                << std::endl;
    }
  }
  if (option.collect_trt_shape) {
    // Set the shape info file.
    std::string curr_model_dir = "./";
    if (!option.model_from_memory_) {
      curr_model_dir = GetDirFromPath(option.model_file);
    }
    std::string shape_range_info =
        PathJoin(curr_model_dir, "shape_range_info.pbtxt");
    if (!CheckFileExists(shape_range_info)) {
      FDINFO << "Start generating shape range info file." << std::endl;
      paddle_infer::Config analysis_config;
      if (model_from_memory) {
        analysis_config.SetModelBuffer(model.c_str(), model.size(),
                                       params.c_str(), params.size());
      } else {
        analysis_config.SetModel(model, params);
      }
      if (option.collect_trt_shape_by_device) {
        if (option.device == Device::GPU) {
          analysis_config.EnableUseGpu(option.gpu_mem_init_size,
                                       option.device_id,
                                       paddle_infer::PrecisionType::kFloat32);
        }
      }
      analysis_config.CollectShapeRangeInfo(shape_range_info);
      auto predictor_tmp = paddle_infer::CreatePredictor(analysis_config);
      std::map<std::string, std::vector<int>> max_shape;
      std::map<std::string, std::vector<int>> min_shape;
      std::map<std::string, std::vector<int>> opt_shape;
      GetDynamicShapeFromOption(option, &max_shape, &min_shape, &opt_shape);
      std::map<std::string, std::vector<float>> max_input_data;
      std::map<std::string, std::vector<float>> min_input_data;
      std::map<std::string, std::vector<float>> opt_input_data;
      if (!option.trt_option.min_input_data.empty()) {
        GetInputDataFromOption(option, &max_input_data, &min_input_data,
                               &opt_input_data);
      }
      // Need to run once to get the shape range info file.
      CollectShapeRun(predictor_tmp.get(), max_shape, max_input_data);
      CollectShapeRun(predictor_tmp.get(), min_shape, min_input_data);
      CollectShapeRun(predictor_tmp.get(), opt_shape, opt_input_data);
      CollectShapeRun(predictor_tmp.get(), opt_shape, opt_input_data);
      FDINFO << "Finish generating shape range info file." << std::endl;
    }
    FDINFO << "Start loading shape range info file " << shape_range_info
           << " to set TensorRT dynamic shape." << std::endl;
    config_.EnableTunedTensorRtDynamicShape(shape_range_info, true);
  }
  // Note(zhoushunjie): The pass deletion should be executed just before
  // creating predictor.
  if (!option.delete_pass_names.empty()) {
    auto pass_builder = config_.pass_builder();
    for (int i = 0; i < option.delete_pass_names.size(); i++) {
      FDINFO << "Delete pass : " << option.delete_pass_names[i] << std::endl;
      pass_builder->DeletePass(option.delete_pass_names[i]);
    }
  }
  if (option.enable_log_info) {
    FDINFO << "Finish paddle inference config with summary as: " << std::endl
           << config_.Summary() << std::endl;
  }
  predictor_ = paddle_infer::CreatePredictor(config_);
  auto input_names = predictor_->GetInputNames();
  auto output_names = predictor_->GetOutputNames();
  auto input_dtypes = predictor_->GetInputTypes();

#ifdef PADDLEINFERENCE_API_COMPAT_2_4_x
  // Note: GetInputTensorShape, GetOutputTensorShape and GetOutputTypes
  // are not supported when Paddle Inference API version is 2.4.x.
  std::map<std::string, std::vector<int64_t>> input_shapes;
  std::map<std::string, std::vector<int64_t>> output_shapes;
  std::map<std::string, paddle_infer::DataType> output_dtypes;
  // Get the all the input shape info.
  for (size_t i = 0; i < input_names.size(); ++i) {
    std::vector<int64_t> shape;
    auto handle = predictor_->GetInputHandle(input_names[i]);
    for (int j = 0; j < handle->shape().size(); ++j) {
      shape.push_back(
          static_cast<int64_t>(handle->shape()[j])); // int32 -> int64
    }
    input_shapes[input_names[i]] = shape;
  }
  // Get the all the output shape and dtype info.
  for (size_t i = 0; i < output_names.size(); ++i) {
    std::vector<int64_t> shape;
    auto handle = predictor_->GetOutputHandle(output_names[i]);
    for (int j = 0; j < handle->shape().size(); ++j) {
      shape.push_back(
          static_cast<int64_t>(handle->shape()[j])); // int32 -> int64
    }
    output_shapes[output_names[i]] = shape;
    output_dtypes[output_names[i]] = handle->type();
  }
#else
  auto input_shapes = predictor_->GetInputTensorShape();
  auto output_shapes = predictor_->GetOutputTensorShape();
  auto output_dtypes = predictor_->GetOutputTypes();
#endif

  inputs_desc_.resize(input_names.size());
  for (int i = 0; i < input_names.size(); ++i) {
    inputs_desc_[i].name = input_names[i];
    auto iter = input_shapes.find(inputs_desc_[i].name);
    FDASSERT(iter != input_shapes.end(), "Cannot find shape for input %s.",
             inputs_desc_[i].name.c_str());
    inputs_desc_[i].shape.assign(iter->second.begin(), iter->second.end());
    auto iter1 = input_dtypes.find(inputs_desc_[i].name);
    FDASSERT(iter1 != input_dtypes.end(), "Cannot find data type for input %s.",
             inputs_desc_[i].name.c_str());
    inputs_desc_[i].dtype = PaddleDataTypeToFD(iter1->second);
  }
  outputs_desc_.resize(output_names.size());
  for (int i = 0; i < output_names.size(); ++i) {
    outputs_desc_[i].name = output_names[i];
    auto iter = output_shapes.find(outputs_desc_[i].name);
    FDASSERT(iter != output_shapes.end(), "Cannot find shape for output %s.",
             outputs_desc_[i].name.c_str());
    outputs_desc_[i].shape.assign(iter->second.begin(), iter->second.end());
    auto iter1 = output_dtypes.find(outputs_desc_[i].name);
    FDASSERT(iter1 != output_dtypes.end(),
             "Cannot find data type for output %s.",
             outputs_desc_[i].name.c_str());
    outputs_desc_[i].dtype = PaddleDataTypeToFD(iter1->second);
  }

  initialized_ = true;
  return true;
}

TensorInfo PaddleBackend::GetInputInfo(int index) {
  FDASSERT(index < NumInputs(),
           "The index: %d should less than the number of inputs: %d.", index,
           NumInputs());
  return inputs_desc_[index];
}

std::vector<TensorInfo> PaddleBackend::GetInputInfos() { return inputs_desc_; }

TensorInfo PaddleBackend::GetOutputInfo(int index) {
  FDASSERT(index < NumOutputs(),
           "The index: %d should less than the number of outputs %d.", index,
           NumOutputs());
  return outputs_desc_[index];
}

std::vector<TensorInfo> PaddleBackend::GetOutputInfos() {
  return outputs_desc_;
}

bool PaddleBackend::Infer(std::vector<FDTensor> &inputs,
                          std::vector<FDTensor> *outputs, bool copy_to_fd) {
  if (inputs.size() != inputs_desc_.size()) {
    FDERROR << "[PaddleBackend] Size of inputs(" << inputs.size()
            << ") should keep same with the inputs of this model("
            << inputs_desc_.size() << ")." << std::endl;
    return false;
  }
  // output share backend memory only support CPU or GPU
  if (option_.device == Device::IPU) {
    copy_to_fd = true;
  }

  RUNTIME_PROFILE_LOOP_H2D_D2H_BEGIN
  for (size_t i = 0; i < inputs.size(); ++i) {
    auto handle = predictor_->GetInputHandle(inputs[i].name);
    ShareTensorFromFDTensor(handle.get(), inputs[i]);
  }
  // prebinded output only support for GPU
  // if (!copy_to_fd) {
  //   for (size_t i = 0; i < (*outputs).size(); ++i) {
  //     auto output_name = (*outputs)[i].name;
  //     // if a output is not prebinded,
  //     // the name of output is expected to be empty.
  //     // We skip here
  //     if (output_name.empty()) {
  //       continue;
  //     }
  //     // Record the prebinded output_name.
  //     // Those outputs do not need PaddleTensorToFDTensor
  //     // after predictor_.Run()
  //     auto handle = predictor_->GetOutputHandle(output_name);
  //     ShareOutTensorFromFDTensor(handle.get(), (*outputs)[i]);
  //   }
  // }

  RUNTIME_PROFILE_LOOP_BEGIN(1)
  predictor_->Run();
  RUNTIME_PROFILE_LOOP_END

  outputs->resize(outputs_desc_.size());
  for (size_t i = 0; i < outputs_desc_.size(); ++i) {
    auto handle = predictor_->GetOutputHandle(outputs_desc_[i].name);
    if (copy_to_fd) {
      (*outputs)[i].is_pinned_memory = option_.enable_pinned_memory;
    }
    PaddleTensorToFDTensor(handle, &((*outputs)[i]), copy_to_fd);
  }
  RUNTIME_PROFILE_LOOP_H2D_D2H_END
  return true;
}

std::unique_ptr<BaseBackend> PaddleBackend::Clone(RuntimeOption &runtime_option,
                                                  void *stream, int device_id) {
  std::unique_ptr<BaseBackend> new_backend =
      utils::make_unique<PaddleBackend>();
  auto casted_backend = dynamic_cast<PaddleBackend *>(new_backend.get());
  if (device_id > 0 && (option_.device == Device::GPU) &&
      device_id != option_.device_id) {
    auto clone_option = option_;
    clone_option.device_id = device_id;
    clone_option.external_stream_ = stream;
    FDASSERT(casted_backend->InitFromPaddle(
                 runtime_option.model_file, runtime_option.params_file,
                 runtime_option.model_from_memory_, clone_option),
             "Clone model from Paddle failed while initialize PaddleBackend.");
    FDWARNING << "The target device id:" << device_id
              << " is different from current device id:" << option_.device_id
              << ", cannot share memory with current engine." << std::endl;
    return new_backend;
  }
  casted_backend->inputs_desc_.assign(inputs_desc_.begin(), inputs_desc_.end());
  casted_backend->outputs_desc_.assign(outputs_desc_.begin(),
                                       outputs_desc_.end());
  casted_backend->predictor_ = std::move(predictor_->Clone(stream));
  return new_backend;
}

void PaddleBackend::SetTRTDynamicShapeToConfig(
    const PaddleBackendOption &option) {
  std::map<std::string, std::vector<int>> max_shape;
  std::map<std::string, std::vector<int>> min_shape;
  std::map<std::string, std::vector<int>> opt_shape;
  GetDynamicShapeFromOption(option, &max_shape, &min_shape, &opt_shape);
  if (min_shape.size() > 0) {
    FDINFO << "Start setting trt dynamic shape." << std::endl;
    config_.SetTRTDynamicShapeInfo(min_shape, max_shape, opt_shape);
    FDINFO << "Finish setting trt dynamic shape." << std::endl;
  }
}

void PaddleBackend::GetDynamicShapeFromOption(
    const PaddleBackendOption &option,
    std::map<std::string, std::vector<int>> *max_shape,
    std::map<std::string, std::vector<int>> *min_shape,
    std::map<std::string, std::vector<int>> *opt_shape) const {
  auto print_shape = [](const std::vector<int> &shape) -> std::string {
    std::ostringstream oss;
    oss << "[";
    for (int i = 0; i < shape.size(); ++i) {
      oss << shape[i];
      if (i < shape.size() - 1) {
        oss << ", ";
      }
    }
    oss << "]";
    return oss.str();
  };
  for (const auto &item : option.trt_option.min_shape) {
    auto max_iter = option.trt_option.max_shape.find(item.first);
    auto opt_iter = option.trt_option.opt_shape.find(item.first);
    FDASSERT(max_iter != option.trt_option.max_shape.end(),
             "Cannot find %s in TrtBackendOption::min_shape.",
             item.first.c_str());
    FDASSERT(opt_iter != option.trt_option.opt_shape.end(),
             "Cannot find %s in TrtBackendOption::opt_shape.",
             item.first.c_str());
    (*max_shape)[item.first].assign(max_iter->second.begin(),
                                    max_iter->second.end());
    (*opt_shape)[item.first].assign(opt_iter->second.begin(),
                                    opt_iter->second.end());
    (*min_shape)[item.first].assign(item.second.begin(), item.second.end());
    FDINFO << item.first
           << ": the max shape = " << print_shape(max_iter->second)
           << ", the min shape = " << print_shape(item.second)
           << ", the opt shape = " << print_shape(opt_iter->second)
           << std::endl;
  }
}

void PaddleBackend::GetInputDataFromOption(
    const PaddleBackendOption &option,
    std::map<std::string, std::vector<float>> *max_input_data,
    std::map<std::string, std::vector<float>> *min_input_data,
    std::map<std::string, std::vector<float>> *opt_input_data) const {
  for (const auto &item : option.trt_option.min_input_data) {
    auto max_iter = option.trt_option.max_input_data.find(item.first);
    auto opt_iter = option.trt_option.opt_input_data.find(item.first);
    FDASSERT(max_iter != option.trt_option.max_input_data.end(),
             "Cannot find %s in TrtBackendOption::min_input_data.",
             item.first.c_str());
    FDASSERT(opt_iter != option.trt_option.opt_input_data.end(),
             "Cannot find %s in TrtBackendOption::opt_input_data.",
             item.first.c_str());
    (*max_input_data)[item.first].assign(max_iter->second.begin(),
                                         max_iter->second.end());
    (*opt_input_data)[item.first].assign(opt_iter->second.begin(),
                                         opt_iter->second.end());
    (*min_input_data)[item.first].assign(item.second.begin(),
                                         item.second.end());
  }
}

void PaddleBackend::CollectShapeRun(
    paddle_infer::Predictor *predictor,
    const std::map<std::string, std::vector<int>> &shape,
    const std::map<std::string, std::vector<float>> &data) const {
  auto input_names = predictor->GetInputNames();
  auto input_type = predictor->GetInputTypes();
  for (const auto &name : input_names) {
    FDASSERT(shape.find(name) != shape.end() &&
                 input_type.find(name) != input_type.end(),
             "When collect_trt_shape is true, please define max/opt/min shape "
             "for model's input:[\"%s\"] by "
             "(C++)RuntimeOption.trt_option.SetShape/"
             "(Python)RuntimeOption.trt_option.set_shape.",
             name.c_str());
    auto tensor = predictor->GetInputHandle(name);
    auto shape_value = shape.at(name);
    int shape_num = std::accumulate(shape_value.begin(), shape_value.end(), 1,
                                    std::multiplies<int>());
    tensor->Reshape(shape_value);

    if (data.find(name) != data.end()) {
      FDASSERT(data.at(name).size() == shape_num,
               "The data num and accumulate of shape must be equal for input: "
               "[\"%s\"], "
               " When Use the (C++)RuntimeOption.trt_option.SetInputData/ "
               " (Python)RuntimeOption.trt_option.set_input_data/",
               name.c_str());
    }

    auto dtype = input_type[name];
    switch (dtype) {
    case paddle_infer::DataType::FLOAT32: {
      if (data.find(name) != data.end()) {
        tensor->CopyFromCpu(data.at(name).data());
      } else {
        std::vector<float> input_data(shape_num, 1.0);
        tensor->CopyFromCpu(input_data.data());
      }
      break;
    }
    case paddle_infer::DataType::INT32: {
      if (data.find(name) != data.end()) {
        std::vector<int> input_data(data.at(name).begin(), data.at(name).end());
        tensor->CopyFromCpu(input_data.data());
      } else {
        std::vector<int> input_data(shape_num, 1);
        tensor->CopyFromCpu(input_data.data());
      }
      break;
    }
    case paddle_infer::DataType::INT64: {
      if (data.find(name) != data.end()) {
        std::vector<int64_t> input_data(data.at(name).begin(),
                                        data.at(name).end());
        tensor->CopyFromCpu(input_data.data());
      } else {
        std::vector<int64_t> input_data(shape_num, 1);
        tensor->CopyFromCpu(input_data.data());
      }
      break;
    }
    default: {
      FDASSERT(false, "Input data Paddle backend only supports "
                      "FP32/INT32/INT64 currently.");
      break;
    }
    }
  }
  predictor->Run();
}

} // namespace ultra_infer