paddle_backend.cc 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650
  1. // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. #include "ultra_infer/runtime/backends/paddle/paddle_backend.h"
  15. #include <sstream>
  16. #include "ultra_infer/utils/path.h"
  17. namespace ultra_infer {
  18. void PaddleBackend::BuildOption(const PaddleBackendOption &option) {
  19. option_ = option;
  20. if (option.device == Device::GPU) {
  21. auto inference_precision = paddle_infer::PrecisionType::kFloat32;
  22. if (option_.inference_precision == "float32") {
  23. FDINFO << "Will inference_precision float32" << std::endl;
  24. inference_precision = paddle_infer::PrecisionType::kFloat32;
  25. } else if (option_.inference_precision == "float16") {
  26. FDINFO << "Will inference_precision float16" << std::endl;
  27. inference_precision = paddle_infer::PrecisionType::kHalf;
  28. } else if (option_.inference_precision == "bfloat16") {
  29. FDINFO << "Will inference_precision bfloat16" << std::endl;
  30. inference_precision = paddle_infer::PrecisionType::kBf16;
  31. } else if (option_.inference_precision == "int8") {
  32. FDINFO << "Will inference_precision int8" << std::endl;
  33. inference_precision = paddle_infer::PrecisionType::kInt8;
  34. } else {
  35. FDERROR << "paddle inference only support precision in float32,"
  36. << " float16, bfloat16 and int8" << std::endl;
  37. }
  38. config_.Exp_DisableMixedPrecisionOps({"feed", "fetch"});
  39. config_.EnableUseGpu(option.gpu_mem_init_size, option.device_id,
  40. inference_precision);
  41. // config_.EnableUseGpu(option.gpu_mem_init_size, option.device_id);
  42. if (option_.switch_ir_debug) {
  43. FDINFO << "Will Enable ir_debug for Paddle Backend." << std::endl;
  44. config_.SwitchIrDebug();
  45. }
  46. if (option_.enable_inference_cutlass) {
  47. #ifdef PADDLEINFERENCE_API_COMPAT_2_4_x
  48. FDWARNING
  49. << "Your are using Paddle infernence 2.4.x, cutlass is not supported!"
  50. << std::endl;
  51. #else
  52. FDINFO << "Will enable_inference_cutlass" << std::endl;
  53. config_.Exp_EnableUseCutlass();
  54. #endif
  55. }
  56. if (option_.external_stream_) {
  57. FDINFO << "Will use external stream for Paddle Backend." << std::endl;
  58. config_.SetExecStream(option_.external_stream_);
  59. }
  60. if (option.enable_trt) {
  61. if (!option.trt_option.enable_fp16) {
  62. FDINFO << "Will try to use tensorrt inference with Paddle Backend."
  63. << std::endl;
  64. }
  65. config_.Exp_DisableTensorRtOPs(option.trt_disabled_ops_);
  66. auto precision = paddle_infer::PrecisionType::kFloat32;
  67. if (option.trt_option.enable_fp16) {
  68. FDINFO << "Will try to use tensorrt fp16 inference with Paddle Backend."
  69. << std::endl;
  70. precision = paddle_infer::PrecisionType::kHalf;
  71. }
  72. bool use_static = false;
  73. if (option.trt_option.serialize_file != "") {
  74. FDWARNING
  75. << "Detect that tensorrt cache file has been set to "
  76. << option.trt_option.serialize_file
  77. << ", but while enable paddle2trt, please notice that the cache "
  78. "file will save to the directory where paddle model saved."
  79. << std::endl;
  80. use_static = true;
  81. std::string opt_cache_dir =
  82. GetDirFromPath(option.trt_option.serialize_file);
  83. config_.SetOptimCacheDir(opt_cache_dir);
  84. }
  85. config_.EnableTensorRtEngine(option.trt_option.max_workspace_size,
  86. option.trt_option.max_batch_size,
  87. option.trt_min_subgraph_size, precision,
  88. use_static);
  89. SetTRTDynamicShapeToConfig(option);
  90. if (option_.enable_fixed_size_opt) {
  91. paddle_infer::experimental::InternalUtils::SetTransformerMaskid(
  92. &config_, "opt");
  93. }
  94. }
  95. } else if (option.device == Device::IPU) {
  96. #ifdef WITH_IPU
  97. config_.EnableIpu(option.ipu_option.ipu_device_num,
  98. option.ipu_option.ipu_micro_batch_size,
  99. option.ipu_option.ipu_enable_pipelining,
  100. option.ipu_option.ipu_batches_per_step);
  101. config_.SetIpuConfig(option.ipu_option.ipu_enable_fp16,
  102. option.ipu_option.ipu_replica_num,
  103. option.ipu_option.ipu_available_memory_proportion,
  104. option.ipu_option.ipu_enable_half_partial);
  105. #else
  106. FDWARNING << "The UltraInfer is not compiled with IPU device, so will "
  107. "fallback to CPU with Paddle Inference Backend."
  108. << std::endl;
  109. #endif
  110. } else if (option.device == Device::KUNLUNXIN) {
  111. #ifdef WITH_KUNLUNXIN
  112. // Note(qiuyanjun): For Paddle XPU L3 Cache, please set
  113. // export XPU_PADDLE_L3_SIZE=67104768 (XPU R200)
  114. // export FLAGS_fuse_multi_transformer_quant_type="float"
  115. config_.EnableXpu(option.xpu_option.kunlunxin_l3_workspace_size,
  116. option.xpu_option.kunlunxin_locked,
  117. option.xpu_option.kunlunxin_autotune,
  118. option.xpu_option.kunlunxin_autotune_file,
  119. option.xpu_option.kunlunxin_precision,
  120. option.xpu_option.kunlunxin_adaptive_seqlen,
  121. option.xpu_option.kunlunxin_enable_multi_stream);
  122. config_.SetXpuConfig(
  123. option.xpu_option.kunlunxin_quant_post_dynamic_weight_bits,
  124. option.xpu_option.kunlunxin_quant_post_dynamic_op_types);
  125. config_.SetXpuDeviceId(option.xpu_option.kunlunxin_device_id);
  126. #else
  127. FDWARNING
  128. << "The UltraInfer is not compiled with KUNLUNXIN device, so will "
  129. "fallback to CPU with Paddle Inference Backend."
  130. << std::endl;
  131. #endif
  132. } else {
  133. config_.DisableGpu();
  134. if (option.enable_mkldnn) {
  135. config_.EnableMKLDNN();
  136. config_.SetMkldnnCacheCapacity(option.mkldnn_cache_size);
  137. } else {
  138. #if defined(PADDLEINFERENCE_API_COMPAT_2_6_x) || \
  139. (PADDLEINFERENCE_VERSION_MAJOR != 2)
  140. config_.DisableMKLDNN();
  141. #endif
  142. }
  143. }
  144. if (!option.enable_log_info) {
  145. config_.DisableGlogInfo();
  146. }
  147. if (option.cpu_thread_num <= 0) {
  148. config_.SetCpuMathLibraryNumThreads(8);
  149. } else {
  150. config_.SetCpuMathLibraryNumThreads(option.cpu_thread_num);
  151. }
  152. // Note: SwitchIrOptim is enabled by default for paddle inference
  153. // backend. So, we don't need to set it manually.
  154. // config_.SwitchIrOptim(option.switch_ir_optimize);
  155. if (option.enable_new_ir) {
  156. #if PADDLEINFERENCE_VERSION_MAJOR == 2
  157. FDWARNING << "UltraInfer was compiled with Paddle Inference v2.0+ "
  158. "which does not support the new IR."
  159. << std::endl;
  160. #else
  161. if (option.device == Device::GPU && option.enable_trt) {
  162. FDWARNING << "Currently, Paddle-TensorRT does not support the new IR, "
  163. "and the old IR will be used."
  164. << std::endl;
  165. } else {
  166. config_.EnableNewIR();
  167. config_.EnableNewExecutor();
  168. if (option.device == Device::CPU || option.device == Device::GPU) {
  169. config_.SetOptimizationLevel(3);
  170. }
  171. }
  172. #endif
  173. }
  174. }
  175. bool PaddleBackend::Init(const RuntimeOption &runtime_option) {
  176. if (!(Supported(runtime_option.model_format, Backend::PDINFER) &&
  177. Supported(runtime_option.device, Backend::PDINFER))) {
  178. return false;
  179. }
  180. auto option = runtime_option;
  181. // Collect basic paddle inference option and trt option.
  182. option.paddle_infer_option.model_file = runtime_option.model_file;
  183. option.paddle_infer_option.params_file = runtime_option.params_file;
  184. option.paddle_infer_option.model_from_memory_ =
  185. runtime_option.model_from_memory_;
  186. option.paddle_infer_option.device = runtime_option.device;
  187. option.paddle_infer_option.device_id = runtime_option.device_id;
  188. option.paddle_infer_option.enable_pinned_memory =
  189. runtime_option.enable_pinned_memory;
  190. option.paddle_infer_option.external_stream_ = runtime_option.external_stream_;
  191. option.paddle_infer_option.trt_option = runtime_option.trt_option;
  192. option.paddle_infer_option.trt_option.gpu_id = runtime_option.device_id;
  193. // Note(qiuyanjun): For Ipu option and XPU option, please check the
  194. // details of RuntimeOption::UseIpu() and RuntimeOption::UseKunlunXin().
  195. // Futhermore, please check paddle_infer_option.SetIpuConfig() and
  196. // paddle_infer_option.SetXpuConfig() for more details of extra configs.
  197. return InitFromPaddle(option.model_file, option.params_file,
  198. option.model_from_memory_, option.paddle_infer_option);
  199. }
  200. bool PaddleBackend::InitFromPaddle(const std::string &model,
  201. const std::string &params,
  202. bool model_from_memory,
  203. const PaddleBackendOption &option) {
  204. if (initialized_) {
  205. FDERROR << "PaddleBackend is already initlized, cannot initialize again."
  206. << std::endl;
  207. return false;
  208. }
  209. if (model_from_memory) {
  210. config_.SetModelBuffer(model.c_str(), model.size(), params.c_str(),
  211. params.size());
  212. } else {
  213. config_.SetModel(model, params);
  214. }
  215. if (option.enable_memory_optimize) {
  216. config_.EnableMemoryOptim();
  217. }
  218. BuildOption(option);
  219. // The input/output information get from predictor is not right, use
  220. // PaddleReader instead now
  221. std::string model_content = model;
  222. if (!model_from_memory) {
  223. FDASSERT(ReadBinaryFromFile(model, &model_content),
  224. "Failed to read file %s.", model.c_str());
  225. }
  226. if (option.is_quantize_model) {
  227. if (option.device == Device::GPU) {
  228. FDWARNING << "The loaded model is a quantized model, while inference on "
  229. "GPU, please use TensorRT backend to get better performance."
  230. << std::endl;
  231. if (option.enable_trt) {
  232. bool use_static = false;
  233. if (option.trt_option.serialize_file != "") {
  234. FDWARNING
  235. << "Detect that tensorrt cache file has been set to "
  236. << option.trt_option.serialize_file
  237. << ", but while enable paddle2trt, please notice that the cache "
  238. "file will save to the directory where paddle model saved."
  239. << std::endl;
  240. use_static = true;
  241. }
  242. #if PADDLEINFERENCE_VERSION_MAJOR != 2
  243. config_.EnableTensorRtEngine(
  244. option.trt_option.max_workspace_size,
  245. option.trt_option.max_batch_size, option.trt_min_subgraph_size,
  246. paddle_infer::PrecisionType::kInt8, use_static, false, true);
  247. #else
  248. config_.EnableTensorRtEngine(
  249. option.trt_option.max_workspace_size,
  250. option.trt_option.max_batch_size, option.trt_min_subgraph_size,
  251. paddle_infer::PrecisionType::kInt8, use_static, false);
  252. #endif
  253. SetTRTDynamicShapeToConfig(option);
  254. }
  255. }
  256. if (option.enable_mkldnn) {
  257. config_.EnableMkldnnInt8();
  258. } else {
  259. FDWARNING << "The loaded model is a quantized model, while inference on "
  260. "CPU, please enable MKLDNN to get better performance."
  261. << std::endl;
  262. }
  263. }
  264. if (option.collect_trt_shape) {
  265. // Set the shape info file.
  266. std::string curr_model_dir = "./";
  267. if (!option.model_from_memory_) {
  268. curr_model_dir = GetDirFromPath(option.model_file);
  269. }
  270. std::string shape_range_info =
  271. PathJoin(curr_model_dir, "shape_range_info.pbtxt");
  272. if (!CheckFileExists(shape_range_info)) {
  273. FDINFO << "Start generating shape range info file." << std::endl;
  274. paddle_infer::Config analysis_config;
  275. if (model_from_memory) {
  276. analysis_config.SetModelBuffer(model.c_str(), model.size(),
  277. params.c_str(), params.size());
  278. } else {
  279. analysis_config.SetModel(model, params);
  280. }
  281. if (option.collect_trt_shape_by_device) {
  282. if (option.device == Device::GPU) {
  283. analysis_config.EnableUseGpu(option.gpu_mem_init_size,
  284. option.device_id,
  285. paddle_infer::PrecisionType::kFloat32);
  286. }
  287. }
  288. analysis_config.CollectShapeRangeInfo(shape_range_info);
  289. auto predictor_tmp = paddle_infer::CreatePredictor(analysis_config);
  290. std::map<std::string, std::vector<int>> max_shape;
  291. std::map<std::string, std::vector<int>> min_shape;
  292. std::map<std::string, std::vector<int>> opt_shape;
  293. GetDynamicShapeFromOption(option, &max_shape, &min_shape, &opt_shape);
  294. std::map<std::string, std::vector<float>> max_input_data;
  295. std::map<std::string, std::vector<float>> min_input_data;
  296. std::map<std::string, std::vector<float>> opt_input_data;
  297. if (!option.trt_option.min_input_data.empty()) {
  298. GetInputDataFromOption(option, &max_input_data, &min_input_data,
  299. &opt_input_data);
  300. }
  301. // Need to run once to get the shape range info file.
  302. CollectShapeRun(predictor_tmp.get(), max_shape, max_input_data);
  303. CollectShapeRun(predictor_tmp.get(), min_shape, min_input_data);
  304. CollectShapeRun(predictor_tmp.get(), opt_shape, opt_input_data);
  305. CollectShapeRun(predictor_tmp.get(), opt_shape, opt_input_data);
  306. FDINFO << "Finish generating shape range info file." << std::endl;
  307. }
  308. FDINFO << "Start loading shape range info file " << shape_range_info
  309. << " to set TensorRT dynamic shape." << std::endl;
  310. config_.EnableTunedTensorRtDynamicShape(shape_range_info, true);
  311. }
  312. // Note(zhoushunjie): The pass deletion should be executed just before
  313. // creating predictor.
  314. if (!option.delete_pass_names.empty()) {
  315. auto pass_builder = config_.pass_builder();
  316. for (int i = 0; i < option.delete_pass_names.size(); i++) {
  317. FDINFO << "Delete pass : " << option.delete_pass_names[i] << std::endl;
  318. pass_builder->DeletePass(option.delete_pass_names[i]);
  319. }
  320. }
  321. if (option.enable_log_info) {
  322. FDINFO << "Finish paddle inference config with summary as: " << std::endl
  323. << config_.Summary() << std::endl;
  324. }
  325. predictor_ = paddle_infer::CreatePredictor(config_);
  326. auto input_names = predictor_->GetInputNames();
  327. auto output_names = predictor_->GetOutputNames();
  328. auto input_dtypes = predictor_->GetInputTypes();
  329. #ifdef PADDLEINFERENCE_API_COMPAT_2_4_x
  330. // Note: GetInputTensorShape, GetOutputTensorShape and GetOutputTypes
  331. // are not supported when Paddle Inference API version is 2.4.x.
  332. std::map<std::string, std::vector<int64_t>> input_shapes;
  333. std::map<std::string, std::vector<int64_t>> output_shapes;
  334. std::map<std::string, paddle_infer::DataType> output_dtypes;
  335. // Get the all the input shape info.
  336. for (size_t i = 0; i < input_names.size(); ++i) {
  337. std::vector<int64_t> shape;
  338. auto handle = predictor_->GetInputHandle(input_names[i]);
  339. for (int j = 0; j < handle->shape().size(); ++j) {
  340. shape.push_back(
  341. static_cast<int64_t>(handle->shape()[j])); // int32 -> int64
  342. }
  343. input_shapes[input_names[i]] = shape;
  344. }
  345. // Get the all the output shape and dtype info.
  346. for (size_t i = 0; i < output_names.size(); ++i) {
  347. std::vector<int64_t> shape;
  348. auto handle = predictor_->GetOutputHandle(output_names[i]);
  349. for (int j = 0; j < handle->shape().size(); ++j) {
  350. shape.push_back(
  351. static_cast<int64_t>(handle->shape()[j])); // int32 -> int64
  352. }
  353. output_shapes[output_names[i]] = shape;
  354. output_dtypes[output_names[i]] = handle->type();
  355. }
  356. #else
  357. auto input_shapes = predictor_->GetInputTensorShape();
  358. auto output_shapes = predictor_->GetOutputTensorShape();
  359. auto output_dtypes = predictor_->GetOutputTypes();
  360. #endif
  361. inputs_desc_.resize(input_names.size());
  362. for (int i = 0; i < input_names.size(); ++i) {
  363. inputs_desc_[i].name = input_names[i];
  364. auto iter = input_shapes.find(inputs_desc_[i].name);
  365. FDASSERT(iter != input_shapes.end(), "Cannot find shape for input %s.",
  366. inputs_desc_[i].name.c_str());
  367. inputs_desc_[i].shape.assign(iter->second.begin(), iter->second.end());
  368. auto iter1 = input_dtypes.find(inputs_desc_[i].name);
  369. FDASSERT(iter1 != input_dtypes.end(), "Cannot find data type for input %s.",
  370. inputs_desc_[i].name.c_str());
  371. inputs_desc_[i].dtype = PaddleDataTypeToFD(iter1->second);
  372. }
  373. outputs_desc_.resize(output_names.size());
  374. for (int i = 0; i < output_names.size(); ++i) {
  375. outputs_desc_[i].name = output_names[i];
  376. auto iter = output_shapes.find(outputs_desc_[i].name);
  377. FDASSERT(iter != output_shapes.end(), "Cannot find shape for output %s.",
  378. outputs_desc_[i].name.c_str());
  379. outputs_desc_[i].shape.assign(iter->second.begin(), iter->second.end());
  380. auto iter1 = output_dtypes.find(outputs_desc_[i].name);
  381. FDASSERT(iter1 != output_dtypes.end(),
  382. "Cannot find data type for output %s.",
  383. outputs_desc_[i].name.c_str());
  384. outputs_desc_[i].dtype = PaddleDataTypeToFD(iter1->second);
  385. }
  386. initialized_ = true;
  387. return true;
  388. }
  389. TensorInfo PaddleBackend::GetInputInfo(int index) {
  390. FDASSERT(index < NumInputs(),
  391. "The index: %d should less than the number of inputs: %d.", index,
  392. NumInputs());
  393. return inputs_desc_[index];
  394. }
  395. std::vector<TensorInfo> PaddleBackend::GetInputInfos() { return inputs_desc_; }
  396. TensorInfo PaddleBackend::GetOutputInfo(int index) {
  397. FDASSERT(index < NumOutputs(),
  398. "The index: %d should less than the number of outputs %d.", index,
  399. NumOutputs());
  400. return outputs_desc_[index];
  401. }
  402. std::vector<TensorInfo> PaddleBackend::GetOutputInfos() {
  403. return outputs_desc_;
  404. }
  405. bool PaddleBackend::Infer(std::vector<FDTensor> &inputs,
  406. std::vector<FDTensor> *outputs, bool copy_to_fd) {
  407. if (inputs.size() != inputs_desc_.size()) {
  408. FDERROR << "[PaddleBackend] Size of inputs(" << inputs.size()
  409. << ") should keep same with the inputs of this model("
  410. << inputs_desc_.size() << ")." << std::endl;
  411. return false;
  412. }
  413. // output share backend memory only support CPU or GPU
  414. if (option_.device == Device::IPU) {
  415. copy_to_fd = true;
  416. }
  417. RUNTIME_PROFILE_LOOP_H2D_D2H_BEGIN
  418. for (size_t i = 0; i < inputs.size(); ++i) {
  419. auto handle = predictor_->GetInputHandle(inputs[i].name);
  420. ShareTensorFromFDTensor(handle.get(), inputs[i]);
  421. }
  422. // prebinded output only support for GPU
  423. // if (!copy_to_fd) {
  424. // for (size_t i = 0; i < (*outputs).size(); ++i) {
  425. // auto output_name = (*outputs)[i].name;
  426. // // if a output is not prebinded,
  427. // // the name of output is expected to be empty.
  428. // // We skip here
  429. // if (output_name.empty()) {
  430. // continue;
  431. // }
  432. // // Record the prebinded output_name.
  433. // // Those outputs do not need PaddleTensorToFDTensor
  434. // // after predictor_.Run()
  435. // auto handle = predictor_->GetOutputHandle(output_name);
  436. // ShareOutTensorFromFDTensor(handle.get(), (*outputs)[i]);
  437. // }
  438. // }
  439. RUNTIME_PROFILE_LOOP_BEGIN(1)
  440. predictor_->Run();
  441. RUNTIME_PROFILE_LOOP_END
  442. outputs->resize(outputs_desc_.size());
  443. for (size_t i = 0; i < outputs_desc_.size(); ++i) {
  444. auto handle = predictor_->GetOutputHandle(outputs_desc_[i].name);
  445. if (copy_to_fd) {
  446. (*outputs)[i].is_pinned_memory = option_.enable_pinned_memory;
  447. }
  448. PaddleTensorToFDTensor(handle, &((*outputs)[i]), copy_to_fd);
  449. }
  450. RUNTIME_PROFILE_LOOP_H2D_D2H_END
  451. return true;
  452. }
  453. std::unique_ptr<BaseBackend> PaddleBackend::Clone(RuntimeOption &runtime_option,
  454. void *stream, int device_id) {
  455. std::unique_ptr<BaseBackend> new_backend =
  456. utils::make_unique<PaddleBackend>();
  457. auto casted_backend = dynamic_cast<PaddleBackend *>(new_backend.get());
  458. if (device_id > 0 && (option_.device == Device::GPU) &&
  459. device_id != option_.device_id) {
  460. auto clone_option = option_;
  461. clone_option.device_id = device_id;
  462. clone_option.external_stream_ = stream;
  463. FDASSERT(casted_backend->InitFromPaddle(
  464. runtime_option.model_file, runtime_option.params_file,
  465. runtime_option.model_from_memory_, clone_option),
  466. "Clone model from Paddle failed while initialize PaddleBackend.");
  467. FDWARNING << "The target device id:" << device_id
  468. << " is different from current device id:" << option_.device_id
  469. << ", cannot share memory with current engine." << std::endl;
  470. return new_backend;
  471. }
  472. casted_backend->inputs_desc_.assign(inputs_desc_.begin(), inputs_desc_.end());
  473. casted_backend->outputs_desc_.assign(outputs_desc_.begin(),
  474. outputs_desc_.end());
  475. casted_backend->predictor_ = std::move(predictor_->Clone(stream));
  476. return new_backend;
  477. }
  478. void PaddleBackend::SetTRTDynamicShapeToConfig(
  479. const PaddleBackendOption &option) {
  480. std::map<std::string, std::vector<int>> max_shape;
  481. std::map<std::string, std::vector<int>> min_shape;
  482. std::map<std::string, std::vector<int>> opt_shape;
  483. GetDynamicShapeFromOption(option, &max_shape, &min_shape, &opt_shape);
  484. if (min_shape.size() > 0) {
  485. FDINFO << "Start setting trt dynamic shape." << std::endl;
  486. config_.SetTRTDynamicShapeInfo(min_shape, max_shape, opt_shape);
  487. FDINFO << "Finish setting trt dynamic shape." << std::endl;
  488. }
  489. }
  490. void PaddleBackend::GetDynamicShapeFromOption(
  491. const PaddleBackendOption &option,
  492. std::map<std::string, std::vector<int>> *max_shape,
  493. std::map<std::string, std::vector<int>> *min_shape,
  494. std::map<std::string, std::vector<int>> *opt_shape) const {
  495. auto print_shape = [](const std::vector<int> &shape) -> std::string {
  496. std::ostringstream oss;
  497. oss << "[";
  498. for (int i = 0; i < shape.size(); ++i) {
  499. oss << shape[i];
  500. if (i < shape.size() - 1) {
  501. oss << ", ";
  502. }
  503. }
  504. oss << "]";
  505. return oss.str();
  506. };
  507. for (const auto &item : option.trt_option.min_shape) {
  508. auto max_iter = option.trt_option.max_shape.find(item.first);
  509. auto opt_iter = option.trt_option.opt_shape.find(item.first);
  510. FDASSERT(max_iter != option.trt_option.max_shape.end(),
  511. "Cannot find %s in TrtBackendOption::min_shape.",
  512. item.first.c_str());
  513. FDASSERT(opt_iter != option.trt_option.opt_shape.end(),
  514. "Cannot find %s in TrtBackendOption::opt_shape.",
  515. item.first.c_str());
  516. (*max_shape)[item.first].assign(max_iter->second.begin(),
  517. max_iter->second.end());
  518. (*opt_shape)[item.first].assign(opt_iter->second.begin(),
  519. opt_iter->second.end());
  520. (*min_shape)[item.first].assign(item.second.begin(), item.second.end());
  521. FDINFO << item.first
  522. << ": the max shape = " << print_shape(max_iter->second)
  523. << ", the min shape = " << print_shape(item.second)
  524. << ", the opt shape = " << print_shape(opt_iter->second)
  525. << std::endl;
  526. }
  527. }
  528. void PaddleBackend::GetInputDataFromOption(
  529. const PaddleBackendOption &option,
  530. std::map<std::string, std::vector<float>> *max_input_data,
  531. std::map<std::string, std::vector<float>> *min_input_data,
  532. std::map<std::string, std::vector<float>> *opt_input_data) const {
  533. for (const auto &item : option.trt_option.min_input_data) {
  534. auto max_iter = option.trt_option.max_input_data.find(item.first);
  535. auto opt_iter = option.trt_option.opt_input_data.find(item.first);
  536. FDASSERT(max_iter != option.trt_option.max_input_data.end(),
  537. "Cannot find %s in TrtBackendOption::min_input_data.",
  538. item.first.c_str());
  539. FDASSERT(opt_iter != option.trt_option.opt_input_data.end(),
  540. "Cannot find %s in TrtBackendOption::opt_input_data.",
  541. item.first.c_str());
  542. (*max_input_data)[item.first].assign(max_iter->second.begin(),
  543. max_iter->second.end());
  544. (*opt_input_data)[item.first].assign(opt_iter->second.begin(),
  545. opt_iter->second.end());
  546. (*min_input_data)[item.first].assign(item.second.begin(),
  547. item.second.end());
  548. }
  549. }
  550. void PaddleBackend::CollectShapeRun(
  551. paddle_infer::Predictor *predictor,
  552. const std::map<std::string, std::vector<int>> &shape,
  553. const std::map<std::string, std::vector<float>> &data) const {
  554. auto input_names = predictor->GetInputNames();
  555. auto input_type = predictor->GetInputTypes();
  556. for (const auto &name : input_names) {
  557. FDASSERT(shape.find(name) != shape.end() &&
  558. input_type.find(name) != input_type.end(),
  559. "When collect_trt_shape is true, please define max/opt/min shape "
  560. "for model's input:[\"%s\"] by "
  561. "(C++)RuntimeOption.trt_option.SetShape/"
  562. "(Python)RuntimeOption.trt_option.set_shape.",
  563. name.c_str());
  564. auto tensor = predictor->GetInputHandle(name);
  565. auto shape_value = shape.at(name);
  566. int shape_num = std::accumulate(shape_value.begin(), shape_value.end(), 1,
  567. std::multiplies<int>());
  568. tensor->Reshape(shape_value);
  569. if (data.find(name) != data.end()) {
  570. FDASSERT(data.at(name).size() == shape_num,
  571. "The data num and accumulate of shape must be equal for input: "
  572. "[\"%s\"], "
  573. " When Use the (C++)RuntimeOption.trt_option.SetInputData/ "
  574. " (Python)RuntimeOption.trt_option.set_input_data/",
  575. name.c_str());
  576. }
  577. auto dtype = input_type[name];
  578. switch (dtype) {
  579. case paddle_infer::DataType::FLOAT32: {
  580. if (data.find(name) != data.end()) {
  581. tensor->CopyFromCpu(data.at(name).data());
  582. } else {
  583. std::vector<float> input_data(shape_num, 1.0);
  584. tensor->CopyFromCpu(input_data.data());
  585. }
  586. break;
  587. }
  588. case paddle_infer::DataType::INT32: {
  589. if (data.find(name) != data.end()) {
  590. std::vector<int> input_data(data.at(name).begin(), data.at(name).end());
  591. tensor->CopyFromCpu(input_data.data());
  592. } else {
  593. std::vector<int> input_data(shape_num, 1);
  594. tensor->CopyFromCpu(input_data.data());
  595. }
  596. break;
  597. }
  598. case paddle_infer::DataType::INT64: {
  599. if (data.find(name) != data.end()) {
  600. std::vector<int64_t> input_data(data.at(name).begin(),
  601. data.at(name).end());
  602. tensor->CopyFromCpu(input_data.data());
  603. } else {
  604. std::vector<int64_t> input_data(shape_num, 1);
  605. tensor->CopyFromCpu(input_data.data());
  606. }
  607. break;
  608. }
  609. default: {
  610. FDASSERT(false, "Input data Paddle backend only supports "
  611. "FP32/INT32/INT64 currently.");
  612. break;
  613. }
  614. }
  615. }
  616. predictor->Run();
  617. }
  618. } // namespace ultra_infer