paddle_backend.cc 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653
  1. // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. #include "ultra_infer/runtime/backends/paddle/paddle_backend.h"
  15. #include <sstream>
  16. #include "ultra_infer/utils/path.h"
  17. namespace ultra_infer {
  18. void PaddleBackend::BuildOption(const PaddleBackendOption &option) {
  19. option_ = option;
  20. if (option.device == Device::GPU) {
  21. auto inference_precision = paddle_infer::PrecisionType::kFloat32;
  22. if (option_.inference_precision == "float32") {
  23. FDINFO << "Will inference_precision float32" << std::endl;
  24. inference_precision = paddle_infer::PrecisionType::kFloat32;
  25. } else if (option_.inference_precision == "float16") {
  26. FDINFO << "Will inference_precision float16" << std::endl;
  27. inference_precision = paddle_infer::PrecisionType::kHalf;
  28. } else if (option_.inference_precision == "bfloat16") {
  29. FDINFO << "Will inference_precision bfloat16" << std::endl;
  30. inference_precision = paddle_infer::PrecisionType::kBf16;
  31. } else if (option_.inference_precision == "int8") {
  32. FDINFO << "Will inference_precision int8" << std::endl;
  33. inference_precision = paddle_infer::PrecisionType::kInt8;
  34. } else {
  35. FDERROR << "paddle inference only support precision in float32,"
  36. << " float16, bfloat16 and int8" << std::endl;
  37. }
  38. config_.Exp_DisableMixedPrecisionOps({"feed", "fetch"});
  39. config_.EnableUseGpu(option.gpu_mem_init_size, option.device_id,
  40. inference_precision);
  41. // config_.EnableUseGpu(option.gpu_mem_init_size, option.device_id);
  42. if (option_.switch_ir_debug) {
  43. FDINFO << "Will Enable ir_debug for Paddle Backend." << std::endl;
  44. config_.SwitchIrDebug();
  45. }
  46. if (option_.enable_inference_cutlass) {
  47. #ifdef PADDLEINFERENCE_API_COMPAT_2_4_x
  48. FDWARNING
  49. << "Your are using Paddle infernence 2.4.x, cutlass is not supported!"
  50. << std::endl;
  51. #else
  52. FDINFO << "Will enable_inference_cutlass" << std::endl;
  53. config_.Exp_EnableUseCutlass();
  54. #endif
  55. }
  56. if (option_.external_stream_) {
  57. FDINFO << "Will use external stream for Paddle Backend." << std::endl;
  58. config_.SetExecStream(option_.external_stream_);
  59. }
  60. if (option.enable_trt) {
  61. if (!option.trt_option.enable_fp16) {
  62. FDINFO << "Will try to use tensorrt inference with Paddle Backend."
  63. << std::endl;
  64. }
  65. config_.Exp_DisableTensorRtOPs(option.trt_disabled_ops_);
  66. auto precision = paddle_infer::PrecisionType::kFloat32;
  67. if (option.trt_option.enable_fp16) {
  68. FDINFO << "Will try to use tensorrt fp16 inference with Paddle Backend."
  69. << std::endl;
  70. precision = paddle_infer::PrecisionType::kHalf;
  71. }
  72. bool use_static = false;
  73. if (option.trt_option.serialize_file != "") {
  74. FDWARNING
  75. << "Detect that tensorrt cache file has been set to "
  76. << option.trt_option.serialize_file
  77. << ", but while enable paddle2trt, please notice that the cache "
  78. "file will save to the directory where paddle model saved."
  79. << std::endl;
  80. use_static = true;
  81. std::string opt_cache_dir =
  82. GetDirFromPath(option.trt_option.serialize_file);
  83. config_.SetOptimCacheDir(opt_cache_dir);
  84. }
  85. config_.EnableTensorRtEngine(option.trt_option.max_workspace_size,
  86. option.trt_option.max_batch_size,
  87. option.trt_min_subgraph_size, precision,
  88. use_static);
  89. if (!option.collect_trt_shape) {
  90. SetTRTDynamicShapeToConfig(option);
  91. }
  92. if (option_.enable_fixed_size_opt) {
  93. paddle_infer::experimental::InternalUtils::SetTransformerMaskid(
  94. &config_, "opt");
  95. }
  96. }
  97. } else if (option.device == Device::IPU) {
  98. #ifdef WITH_IPU
  99. config_.EnableIpu(option.ipu_option.ipu_device_num,
  100. option.ipu_option.ipu_micro_batch_size,
  101. option.ipu_option.ipu_enable_pipelining,
  102. option.ipu_option.ipu_batches_per_step);
  103. config_.SetIpuConfig(option.ipu_option.ipu_enable_fp16,
  104. option.ipu_option.ipu_replica_num,
  105. option.ipu_option.ipu_available_memory_proportion,
  106. option.ipu_option.ipu_enable_half_partial);
  107. #else
  108. FDWARNING << "The UltraInfer is not compiled with IPU device, so will "
  109. "fallback to CPU with Paddle Inference Backend."
  110. << std::endl;
  111. #endif
  112. } else if (option.device == Device::KUNLUNXIN) {
  113. #ifdef WITH_KUNLUNXIN
  114. // Note(qiuyanjun): For Paddle XPU L3 Cache, please set
  115. // export XPU_PADDLE_L3_SIZE=67104768 (XPU R200)
  116. // export FLAGS_fuse_multi_transformer_quant_type="float"
  117. config_.EnableXpu(option.xpu_option.kunlunxin_l3_workspace_size,
  118. option.xpu_option.kunlunxin_locked,
  119. option.xpu_option.kunlunxin_autotune,
  120. option.xpu_option.kunlunxin_autotune_file,
  121. option.xpu_option.kunlunxin_precision,
  122. option.xpu_option.kunlunxin_adaptive_seqlen,
  123. option.xpu_option.kunlunxin_enable_multi_stream);
  124. config_.SetXpuConfig(
  125. option.xpu_option.kunlunxin_quant_post_dynamic_weight_bits,
  126. option.xpu_option.kunlunxin_quant_post_dynamic_op_types);
  127. config_.SetXpuDeviceId(option.xpu_option.kunlunxin_device_id);
  128. #else
  129. FDWARNING
  130. << "The UltraInfer is not compiled with KUNLUNXIN device, so will "
  131. "fallback to CPU with Paddle Inference Backend."
  132. << std::endl;
  133. #endif
  134. } else {
  135. config_.DisableGpu();
  136. if (option.enable_mkldnn) {
  137. config_.EnableMKLDNN();
  138. config_.SetMkldnnCacheCapacity(option.mkldnn_cache_size);
  139. } else {
  140. #if defined(PADDLEINFERENCE_API_COMPAT_2_6_x) || \
  141. (PADDLEINFERENCE_VERSION_MAJOR != 2)
  142. config_.DisableMKLDNN();
  143. #endif
  144. }
  145. }
  146. if (!option.enable_log_info) {
  147. config_.DisableGlogInfo();
  148. }
  149. if (option.cpu_thread_num <= 0) {
  150. config_.SetCpuMathLibraryNumThreads(8);
  151. } else {
  152. config_.SetCpuMathLibraryNumThreads(option.cpu_thread_num);
  153. }
  154. // Note: SwitchIrOptim is enabled by default for paddle inference
  155. // backend. So, we don't need to set it manually.
  156. // config_.SwitchIrOptim(option.switch_ir_optimize);
  157. if (option.enable_new_ir) {
  158. #if PADDLEINFERENCE_VERSION_MAJOR == 2
  159. FDWARNING << "UltraInfer was compiled with Paddle Inference v2.0+ "
  160. "which does not support the new IR."
  161. << std::endl;
  162. #else
  163. if (option.device == Device::GPU && option.enable_trt) {
  164. FDWARNING << "Currently, Paddle-TensorRT does not support the new IR, "
  165. "and the old IR will be used."
  166. << std::endl;
  167. } else {
  168. config_.EnableNewIR();
  169. config_.EnableNewExecutor();
  170. if (option.device == Device::CPU || option.device == Device::GPU) {
  171. config_.SetOptimizationLevel(3);
  172. }
  173. }
  174. #endif
  175. }
  176. }
  177. bool PaddleBackend::Init(const RuntimeOption &runtime_option) {
  178. if (!(Supported(runtime_option.model_format, Backend::PDINFER) &&
  179. Supported(runtime_option.device, Backend::PDINFER))) {
  180. return false;
  181. }
  182. auto option = runtime_option;
  183. // Collect basic paddle inference option and trt option.
  184. option.paddle_infer_option.model_file = runtime_option.model_file;
  185. option.paddle_infer_option.params_file = runtime_option.params_file;
  186. option.paddle_infer_option.model_from_memory_ =
  187. runtime_option.model_from_memory_;
  188. option.paddle_infer_option.device = runtime_option.device;
  189. option.paddle_infer_option.device_id = runtime_option.device_id;
  190. option.paddle_infer_option.enable_pinned_memory =
  191. runtime_option.enable_pinned_memory;
  192. option.paddle_infer_option.external_stream_ = runtime_option.external_stream_;
  193. option.paddle_infer_option.trt_option = runtime_option.trt_option;
  194. option.paddle_infer_option.trt_option.gpu_id = runtime_option.device_id;
  195. // Note(qiuyanjun): For Ipu option and XPU option, please check the
  196. // details of RuntimeOption::UseIpu() and RuntimeOption::UseKunlunXin().
  197. // Futhermore, please check paddle_infer_option.SetIpuConfig() and
  198. // paddle_infer_option.SetXpuConfig() for more details of extra configs.
  199. return InitFromPaddle(option.model_file, option.params_file,
  200. option.model_from_memory_, option.paddle_infer_option);
  201. }
  202. bool PaddleBackend::InitFromPaddle(const std::string &model,
  203. const std::string &params,
  204. bool model_from_memory,
  205. const PaddleBackendOption &option) {
  206. if (initialized_) {
  207. FDERROR << "PaddleBackend is already initlized, cannot initialize again."
  208. << std::endl;
  209. return false;
  210. }
  211. if (model_from_memory) {
  212. config_.SetModelBuffer(model.c_str(), model.size(), params.c_str(),
  213. params.size());
  214. } else {
  215. config_.SetModel(model, params);
  216. }
  217. if (option.enable_memory_optimize) {
  218. config_.EnableMemoryOptim();
  219. }
  220. BuildOption(option);
  221. // The input/output information get from predictor is not right, use
  222. // PaddleReader instead now
  223. std::string model_content = model;
  224. if (!model_from_memory) {
  225. FDASSERT(ReadBinaryFromFile(model, &model_content),
  226. "Failed to read file %s.", model.c_str());
  227. }
  228. if (option.is_quantize_model) {
  229. if (option.device == Device::GPU) {
  230. FDWARNING << "The loaded model is a quantized model, while inference on "
  231. "GPU, please use TensorRT backend to get better performance."
  232. << std::endl;
  233. if (option.enable_trt) {
  234. bool use_static = false;
  235. if (option.trt_option.serialize_file != "") {
  236. FDWARNING
  237. << "Detect that tensorrt cache file has been set to "
  238. << option.trt_option.serialize_file
  239. << ", but while enable paddle2trt, please notice that the cache "
  240. "file will save to the directory where paddle model saved."
  241. << std::endl;
  242. use_static = true;
  243. }
  244. #if PADDLEINFERENCE_VERSION_MAJOR != 2
  245. config_.EnableTensorRtEngine(
  246. option.trt_option.max_workspace_size,
  247. option.trt_option.max_batch_size, option.trt_min_subgraph_size,
  248. paddle_infer::PrecisionType::kInt8, use_static, false, true);
  249. #else
  250. config_.EnableTensorRtEngine(
  251. option.trt_option.max_workspace_size,
  252. option.trt_option.max_batch_size, option.trt_min_subgraph_size,
  253. paddle_infer::PrecisionType::kInt8, use_static, false);
  254. #endif
  255. SetTRTDynamicShapeToConfig(option);
  256. }
  257. }
  258. if (option.enable_mkldnn) {
  259. config_.EnableMkldnnInt8();
  260. } else {
  261. FDWARNING << "The loaded model is a quantized model, while inference on "
  262. "CPU, please enable MKLDNN to get better performance."
  263. << std::endl;
  264. }
  265. }
  266. if (option.collect_trt_shape) {
  267. // Set the shape info file.
  268. std::string curr_model_dir = "./";
  269. if (!option.model_from_memory_) {
  270. curr_model_dir = GetDirFromPath(option.model_file);
  271. }
  272. std::string shape_range_info =
  273. PathJoin(curr_model_dir, "shape_range_info.pbtxt");
  274. if (!CheckFileExists(shape_range_info)) {
  275. FDINFO << "Start generating shape range info file." << std::endl;
  276. paddle_infer::Config analysis_config;
  277. if (model_from_memory) {
  278. analysis_config.SetModelBuffer(model.c_str(), model.size(),
  279. params.c_str(), params.size());
  280. } else {
  281. analysis_config.SetModel(model, params);
  282. }
  283. if (option.collect_trt_shape_by_device) {
  284. if (option.device == Device::GPU) {
  285. analysis_config.EnableUseGpu(option.gpu_mem_init_size,
  286. option.device_id,
  287. paddle_infer::PrecisionType::kFloat32);
  288. }
  289. }
  290. analysis_config.CollectShapeRangeInfo(shape_range_info);
  291. auto predictor_tmp = paddle_infer::CreatePredictor(analysis_config);
  292. std::map<std::string, std::vector<int>> max_shape;
  293. std::map<std::string, std::vector<int>> min_shape;
  294. std::map<std::string, std::vector<int>> opt_shape;
  295. GetDynamicShapeFromOption(option, &max_shape, &min_shape, &opt_shape);
  296. std::map<std::string, std::vector<float>> max_input_data;
  297. std::map<std::string, std::vector<float>> min_input_data;
  298. std::map<std::string, std::vector<float>> opt_input_data;
  299. if (!option.trt_option.min_input_data.empty()) {
  300. GetInputDataFromOption(option, &max_input_data, &min_input_data,
  301. &opt_input_data);
  302. }
  303. // Need to run once to get the shape range info file.
  304. CollectShapeRun(predictor_tmp.get(), max_shape, max_input_data);
  305. CollectShapeRun(predictor_tmp.get(), min_shape, min_input_data);
  306. CollectShapeRun(predictor_tmp.get(), opt_shape, opt_input_data);
  307. CollectShapeRun(predictor_tmp.get(), opt_shape, opt_input_data);
  308. FDINFO << "Finish generating shape range info file." << std::endl;
  309. }
  310. FDINFO << "Start loading shape range info file " << shape_range_info
  311. << " to set TensorRT dynamic shape." << std::endl;
  312. config_.EnableTunedTensorRtDynamicShape(shape_range_info,
  313. option.allow_build_trt_at_runtime);
  314. }
  315. // Note(zhoushunjie): The pass deletion should be executed just before
  316. // creating predictor.
  317. if (!option.delete_pass_names.empty()) {
  318. auto pass_builder = config_.pass_builder();
  319. for (int i = 0; i < option.delete_pass_names.size(); i++) {
  320. FDINFO << "Delete pass : " << option.delete_pass_names[i] << std::endl;
  321. pass_builder->DeletePass(option.delete_pass_names[i]);
  322. }
  323. }
  324. if (option.enable_log_info) {
  325. FDINFO << "Finish paddle inference config with summary as: " << std::endl
  326. << config_.Summary() << std::endl;
  327. }
  328. predictor_ = paddle_infer::CreatePredictor(config_);
  329. auto input_names = predictor_->GetInputNames();
  330. auto output_names = predictor_->GetOutputNames();
  331. auto input_dtypes = predictor_->GetInputTypes();
  332. #ifdef PADDLEINFERENCE_API_COMPAT_2_4_x
  333. // Note: GetInputTensorShape, GetOutputTensorShape and GetOutputTypes
  334. // are not supported when Paddle Inference API version is 2.4.x.
  335. std::map<std::string, std::vector<int64_t>> input_shapes;
  336. std::map<std::string, std::vector<int64_t>> output_shapes;
  337. std::map<std::string, paddle_infer::DataType> output_dtypes;
  338. // Get the all the input shape info.
  339. for (size_t i = 0; i < input_names.size(); ++i) {
  340. std::vector<int64_t> shape;
  341. auto handle = predictor_->GetInputHandle(input_names[i]);
  342. for (int j = 0; j < handle->shape().size(); ++j) {
  343. shape.push_back(
  344. static_cast<int64_t>(handle->shape()[j])); // int32 -> int64
  345. }
  346. input_shapes[input_names[i]] = shape;
  347. }
  348. // Get the all the output shape and dtype info.
  349. for (size_t i = 0; i < output_names.size(); ++i) {
  350. std::vector<int64_t> shape;
  351. auto handle = predictor_->GetOutputHandle(output_names[i]);
  352. for (int j = 0; j < handle->shape().size(); ++j) {
  353. shape.push_back(
  354. static_cast<int64_t>(handle->shape()[j])); // int32 -> int64
  355. }
  356. output_shapes[output_names[i]] = shape;
  357. output_dtypes[output_names[i]] = handle->type();
  358. }
  359. #else
  360. auto input_shapes = predictor_->GetInputTensorShape();
  361. auto output_shapes = predictor_->GetOutputTensorShape();
  362. auto output_dtypes = predictor_->GetOutputTypes();
  363. #endif
  364. inputs_desc_.resize(input_names.size());
  365. for (int i = 0; i < input_names.size(); ++i) {
  366. inputs_desc_[i].name = input_names[i];
  367. auto iter = input_shapes.find(inputs_desc_[i].name);
  368. FDASSERT(iter != input_shapes.end(), "Cannot find shape for input %s.",
  369. inputs_desc_[i].name.c_str());
  370. inputs_desc_[i].shape.assign(iter->second.begin(), iter->second.end());
  371. auto iter1 = input_dtypes.find(inputs_desc_[i].name);
  372. FDASSERT(iter1 != input_dtypes.end(), "Cannot find data type for input %s.",
  373. inputs_desc_[i].name.c_str());
  374. inputs_desc_[i].dtype = PaddleDataTypeToFD(iter1->second);
  375. }
  376. outputs_desc_.resize(output_names.size());
  377. for (int i = 0; i < output_names.size(); ++i) {
  378. outputs_desc_[i].name = output_names[i];
  379. auto iter = output_shapes.find(outputs_desc_[i].name);
  380. FDASSERT(iter != output_shapes.end(), "Cannot find shape for output %s.",
  381. outputs_desc_[i].name.c_str());
  382. outputs_desc_[i].shape.assign(iter->second.begin(), iter->second.end());
  383. auto iter1 = output_dtypes.find(outputs_desc_[i].name);
  384. FDASSERT(iter1 != output_dtypes.end(),
  385. "Cannot find data type for output %s.",
  386. outputs_desc_[i].name.c_str());
  387. outputs_desc_[i].dtype = PaddleDataTypeToFD(iter1->second);
  388. }
  389. initialized_ = true;
  390. return true;
  391. }
  392. TensorInfo PaddleBackend::GetInputInfo(int index) {
  393. FDASSERT(index < NumInputs(),
  394. "The index: %d should less than the number of inputs: %d.", index,
  395. NumInputs());
  396. return inputs_desc_[index];
  397. }
  398. std::vector<TensorInfo> PaddleBackend::GetInputInfos() { return inputs_desc_; }
  399. TensorInfo PaddleBackend::GetOutputInfo(int index) {
  400. FDASSERT(index < NumOutputs(),
  401. "The index: %d should less than the number of outputs %d.", index,
  402. NumOutputs());
  403. return outputs_desc_[index];
  404. }
  405. std::vector<TensorInfo> PaddleBackend::GetOutputInfos() {
  406. return outputs_desc_;
  407. }
  408. bool PaddleBackend::Infer(std::vector<FDTensor> &inputs,
  409. std::vector<FDTensor> *outputs, bool copy_to_fd) {
  410. if (inputs.size() != inputs_desc_.size()) {
  411. FDERROR << "[PaddleBackend] Size of inputs(" << inputs.size()
  412. << ") should keep same with the inputs of this model("
  413. << inputs_desc_.size() << ")." << std::endl;
  414. return false;
  415. }
  416. // output share backend memory only support CPU or GPU
  417. if (option_.device == Device::IPU) {
  418. copy_to_fd = true;
  419. }
  420. RUNTIME_PROFILE_LOOP_H2D_D2H_BEGIN
  421. for (size_t i = 0; i < inputs.size(); ++i) {
  422. auto handle = predictor_->GetInputHandle(inputs[i].name);
  423. ShareTensorFromFDTensor(handle.get(), inputs[i]);
  424. }
  425. // prebinded output only support for GPU
  426. // if (!copy_to_fd) {
  427. // for (size_t i = 0; i < (*outputs).size(); ++i) {
  428. // auto output_name = (*outputs)[i].name;
  429. // // if a output is not prebinded,
  430. // // the name of output is expected to be empty.
  431. // // We skip here
  432. // if (output_name.empty()) {
  433. // continue;
  434. // }
  435. // // Record the prebinded output_name.
  436. // // Those outputs do not need PaddleTensorToFDTensor
  437. // // after predictor_.Run()
  438. // auto handle = predictor_->GetOutputHandle(output_name);
  439. // ShareOutTensorFromFDTensor(handle.get(), (*outputs)[i]);
  440. // }
  441. // }
  442. RUNTIME_PROFILE_LOOP_BEGIN(1)
  443. predictor_->Run();
  444. RUNTIME_PROFILE_LOOP_END
  445. outputs->resize(outputs_desc_.size());
  446. for (size_t i = 0; i < outputs_desc_.size(); ++i) {
  447. auto handle = predictor_->GetOutputHandle(outputs_desc_[i].name);
  448. if (copy_to_fd) {
  449. (*outputs)[i].is_pinned_memory = option_.enable_pinned_memory;
  450. }
  451. PaddleTensorToFDTensor(handle, &((*outputs)[i]), copy_to_fd);
  452. }
  453. RUNTIME_PROFILE_LOOP_H2D_D2H_END
  454. return true;
  455. }
  456. std::unique_ptr<BaseBackend> PaddleBackend::Clone(RuntimeOption &runtime_option,
  457. void *stream, int device_id) {
  458. std::unique_ptr<BaseBackend> new_backend =
  459. utils::make_unique<PaddleBackend>();
  460. auto casted_backend = dynamic_cast<PaddleBackend *>(new_backend.get());
  461. if (device_id > 0 && (option_.device == Device::GPU) &&
  462. device_id != option_.device_id) {
  463. auto clone_option = option_;
  464. clone_option.device_id = device_id;
  465. clone_option.external_stream_ = stream;
  466. FDASSERT(casted_backend->InitFromPaddle(
  467. runtime_option.model_file, runtime_option.params_file,
  468. runtime_option.model_from_memory_, clone_option),
  469. "Clone model from Paddle failed while initialize PaddleBackend.");
  470. FDWARNING << "The target device id:" << device_id
  471. << " is different from current device id:" << option_.device_id
  472. << ", cannot share memory with current engine." << std::endl;
  473. return new_backend;
  474. }
  475. casted_backend->inputs_desc_.assign(inputs_desc_.begin(), inputs_desc_.end());
  476. casted_backend->outputs_desc_.assign(outputs_desc_.begin(),
  477. outputs_desc_.end());
  478. casted_backend->predictor_ = std::move(predictor_->Clone(stream));
  479. return new_backend;
  480. }
  481. void PaddleBackend::SetTRTDynamicShapeToConfig(
  482. const PaddleBackendOption &option) {
  483. std::map<std::string, std::vector<int>> max_shape;
  484. std::map<std::string, std::vector<int>> min_shape;
  485. std::map<std::string, std::vector<int>> opt_shape;
  486. GetDynamicShapeFromOption(option, &max_shape, &min_shape, &opt_shape);
  487. if (min_shape.size() > 0) {
  488. FDINFO << "Start setting trt dynamic shape." << std::endl;
  489. config_.SetTRTDynamicShapeInfo(min_shape, max_shape, opt_shape);
  490. FDINFO << "Finish setting trt dynamic shape." << std::endl;
  491. }
  492. }
  493. void PaddleBackend::GetDynamicShapeFromOption(
  494. const PaddleBackendOption &option,
  495. std::map<std::string, std::vector<int>> *max_shape,
  496. std::map<std::string, std::vector<int>> *min_shape,
  497. std::map<std::string, std::vector<int>> *opt_shape) const {
  498. auto print_shape = [](const std::vector<int> &shape) -> std::string {
  499. std::ostringstream oss;
  500. oss << "[";
  501. for (int i = 0; i < shape.size(); ++i) {
  502. oss << shape[i];
  503. if (i < shape.size() - 1) {
  504. oss << ", ";
  505. }
  506. }
  507. oss << "]";
  508. return oss.str();
  509. };
  510. for (const auto &item : option.trt_option.min_shape) {
  511. auto max_iter = option.trt_option.max_shape.find(item.first);
  512. auto opt_iter = option.trt_option.opt_shape.find(item.first);
  513. FDASSERT(max_iter != option.trt_option.max_shape.end(),
  514. "Cannot find %s in TrtBackendOption::min_shape.",
  515. item.first.c_str());
  516. FDASSERT(opt_iter != option.trt_option.opt_shape.end(),
  517. "Cannot find %s in TrtBackendOption::opt_shape.",
  518. item.first.c_str());
  519. (*max_shape)[item.first].assign(max_iter->second.begin(),
  520. max_iter->second.end());
  521. (*opt_shape)[item.first].assign(opt_iter->second.begin(),
  522. opt_iter->second.end());
  523. (*min_shape)[item.first].assign(item.second.begin(), item.second.end());
  524. FDINFO << item.first
  525. << ": the max shape = " << print_shape(max_iter->second)
  526. << ", the min shape = " << print_shape(item.second)
  527. << ", the opt shape = " << print_shape(opt_iter->second)
  528. << std::endl;
  529. }
  530. }
  531. void PaddleBackend::GetInputDataFromOption(
  532. const PaddleBackendOption &option,
  533. std::map<std::string, std::vector<float>> *max_input_data,
  534. std::map<std::string, std::vector<float>> *min_input_data,
  535. std::map<std::string, std::vector<float>> *opt_input_data) const {
  536. for (const auto &item : option.trt_option.min_input_data) {
  537. auto max_iter = option.trt_option.max_input_data.find(item.first);
  538. auto opt_iter = option.trt_option.opt_input_data.find(item.first);
  539. FDASSERT(max_iter != option.trt_option.max_input_data.end(),
  540. "Cannot find %s in TrtBackendOption::min_input_data.",
  541. item.first.c_str());
  542. FDASSERT(opt_iter != option.trt_option.opt_input_data.end(),
  543. "Cannot find %s in TrtBackendOption::opt_input_data.",
  544. item.first.c_str());
  545. (*max_input_data)[item.first].assign(max_iter->second.begin(),
  546. max_iter->second.end());
  547. (*opt_input_data)[item.first].assign(opt_iter->second.begin(),
  548. opt_iter->second.end());
  549. (*min_input_data)[item.first].assign(item.second.begin(),
  550. item.second.end());
  551. }
  552. }
  553. void PaddleBackend::CollectShapeRun(
  554. paddle_infer::Predictor *predictor,
  555. const std::map<std::string, std::vector<int>> &shape,
  556. const std::map<std::string, std::vector<float>> &data) const {
  557. auto input_names = predictor->GetInputNames();
  558. auto input_type = predictor->GetInputTypes();
  559. for (const auto &name : input_names) {
  560. FDASSERT(shape.find(name) != shape.end() &&
  561. input_type.find(name) != input_type.end(),
  562. "When collect_trt_shape is true, please define max/opt/min shape "
  563. "for model's input:[\"%s\"] by "
  564. "(C++)RuntimeOption.trt_option.SetShape/"
  565. "(Python)RuntimeOption.trt_option.set_shape.",
  566. name.c_str());
  567. auto tensor = predictor->GetInputHandle(name);
  568. auto shape_value = shape.at(name);
  569. int shape_num = std::accumulate(shape_value.begin(), shape_value.end(), 1,
  570. std::multiplies<int>());
  571. tensor->Reshape(shape_value);
  572. if (data.find(name) != data.end()) {
  573. FDASSERT(data.at(name).size() == shape_num,
  574. "The data num and accumulate of shape must be equal for input: "
  575. "[\"%s\"], "
  576. " When Use the (C++)RuntimeOption.trt_option.SetInputData/ "
  577. " (Python)RuntimeOption.trt_option.set_input_data/",
  578. name.c_str());
  579. }
  580. auto dtype = input_type[name];
  581. switch (dtype) {
  582. case paddle_infer::DataType::FLOAT32: {
  583. if (data.find(name) != data.end()) {
  584. tensor->CopyFromCpu(data.at(name).data());
  585. } else {
  586. std::vector<float> input_data(shape_num, 1.0);
  587. tensor->CopyFromCpu(input_data.data());
  588. }
  589. break;
  590. }
  591. case paddle_infer::DataType::INT32: {
  592. if (data.find(name) != data.end()) {
  593. std::vector<int> input_data(data.at(name).begin(), data.at(name).end());
  594. tensor->CopyFromCpu(input_data.data());
  595. } else {
  596. std::vector<int> input_data(shape_num, 1);
  597. tensor->CopyFromCpu(input_data.data());
  598. }
  599. break;
  600. }
  601. case paddle_infer::DataType::INT64: {
  602. if (data.find(name) != data.end()) {
  603. std::vector<int64_t> input_data(data.at(name).begin(),
  604. data.at(name).end());
  605. tensor->CopyFromCpu(input_data.data());
  606. } else {
  607. std::vector<int64_t> input_data(shape_num, 1);
  608. tensor->CopyFromCpu(input_data.data());
  609. }
  610. break;
  611. }
  612. default: {
  613. FDASSERT(false, "Input data Paddle backend only supports "
  614. "FP32/INT32/INT64 currently.");
  615. break;
  616. }
  617. }
  618. }
  619. predictor->Run();
  620. }
  621. } // namespace ultra_infer