|
@@ -73,10 +73,44 @@ class BasePaddlePredictor(BaseComponent):
|
|
|
params_file = (self.model_dir / f"{self.model_prefix}.pdiparams").as_posix()
|
|
params_file = (self.model_dir / f"{self.model_prefix}.pdiparams").as_posix()
|
|
|
config = Config(model_file, params_file)
|
|
config = Config(model_file, params_file)
|
|
|
|
|
|
|
|
|
|
+ config.enable_memory_optim()
|
|
|
if self.option.device in ("gpu", "dcu"):
|
|
if self.option.device in ("gpu", "dcu"):
|
|
|
- config.enable_use_gpu(200, self.option.device_id)
|
|
|
|
|
- if hasattr(config, "enable_new_ir"):
|
|
|
|
|
- config.enable_new_ir(self.option.enable_new_ir)
|
|
|
|
|
|
|
+ if self.option.device == "gpu":
|
|
|
|
|
+ config.exp_disable_mixed_precision_ops({"feed", "fetch"})
|
|
|
|
|
+ config.enable_use_gpu(100, self.option.device_id)
|
|
|
|
|
+ if self.option.device == "gpu":
|
|
|
|
|
+ # NOTE: The pptrt settings are not aligned with those of FD.
|
|
|
|
|
+ precision_map = {
|
|
|
|
|
+ "trt_int8": Config.Precision.Int8,
|
|
|
|
|
+ "trt_fp32": Config.Precision.Float32,
|
|
|
|
|
+ "trt_fp16": Config.Precision.Half,
|
|
|
|
|
+ }
|
|
|
|
|
+ if self.option.run_mode in precision_map.keys():
|
|
|
|
|
+ config.enable_tensorrt_engine(
|
|
|
|
|
+ workspace_size=(1 << 25) * self.option.batch_size,
|
|
|
|
|
+ max_batch_size=self.option.batch_size,
|
|
|
|
|
+ min_subgraph_size=self.option.min_subgraph_size,
|
|
|
|
|
+ precision_mode=precision_map[self.option.run_mode],
|
|
|
|
|
+ use_static=self.option.trt_use_static,
|
|
|
|
|
+ use_calib_mode=self.option.trt_calib_mode,
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
+ if self.option.shape_info_filename is not None:
|
|
|
|
|
+ if not os.path.exists(self.option.shape_info_filename):
|
|
|
|
|
+ config.collect_shape_range_info(
|
|
|
|
|
+ self.option.shape_info_filename
|
|
|
|
|
+ )
|
|
|
|
|
+ logging.info(
|
|
|
|
|
+ f"Dynamic shape info is collected into: {self.option.shape_info_filename}"
|
|
|
|
|
+ )
|
|
|
|
|
+ else:
|
|
|
|
|
+ logging.info(
|
|
|
|
|
+ f"A dynamic shape info file ( {self.option.shape_info_filename} ) already exists. \
|
|
|
|
|
+ No need to generate again."
|
|
|
|
|
+ )
|
|
|
|
|
+ config.enable_tuned_tensorrt_dynamic_shape(
|
|
|
|
|
+ self.option.shape_info_filename, True
|
|
|
|
|
+ )
|
|
|
elif self.option.device == "npu":
|
|
elif self.option.device == "npu":
|
|
|
config.enable_custom_device("npu")
|
|
config.enable_custom_device("npu")
|
|
|
elif self.option.device == "xpu":
|
|
elif self.option.device == "xpu":
|
|
@@ -86,53 +120,32 @@ class BasePaddlePredictor(BaseComponent):
|
|
|
else:
|
|
else:
|
|
|
assert self.option.device == "cpu"
|
|
assert self.option.device == "cpu"
|
|
|
config.disable_gpu()
|
|
config.disable_gpu()
|
|
|
- if hasattr(config, "enable_new_ir"):
|
|
|
|
|
- config.enable_new_ir(self.option.enable_new_ir)
|
|
|
|
|
- if hasattr(config, "enable_new_executor"):
|
|
|
|
|
- config.enable_new_executor(True)
|
|
|
|
|
if "mkldnn" in self.option.run_mode:
|
|
if "mkldnn" in self.option.run_mode:
|
|
|
try:
|
|
try:
|
|
|
config.enable_mkldnn()
|
|
config.enable_mkldnn()
|
|
|
- config.set_cpu_math_library_num_threads(self.option.cpu_threads)
|
|
|
|
|
if "bf16" in self.option.run_mode:
|
|
if "bf16" in self.option.run_mode:
|
|
|
config.enable_mkldnn_bfloat16()
|
|
config.enable_mkldnn_bfloat16()
|
|
|
except Exception as e:
|
|
except Exception as e:
|
|
|
logging.warning(
|
|
logging.warning(
|
|
|
"MKL-DNN is not available. We will disable MKL-DNN."
|
|
"MKL-DNN is not available. We will disable MKL-DNN."
|
|
|
)
|
|
)
|
|
|
-
|
|
|
|
|
- precision_map = {
|
|
|
|
|
- "trt_int8": Config.Precision.Int8,
|
|
|
|
|
- "trt_fp32": Config.Precision.Float32,
|
|
|
|
|
- "trt_fp16": Config.Precision.Half,
|
|
|
|
|
- }
|
|
|
|
|
- if self.option.run_mode in precision_map.keys():
|
|
|
|
|
- config.enable_tensorrt_engine(
|
|
|
|
|
- workspace_size=(1 << 25) * self.option.batch_size,
|
|
|
|
|
- max_batch_size=self.option.batch_size,
|
|
|
|
|
- min_subgraph_size=self.option.min_subgraph_size,
|
|
|
|
|
- precision_mode=precision_map[self.option.run_mode],
|
|
|
|
|
- use_static=self.option.trt_use_static,
|
|
|
|
|
- use_calib_mode=self.option.trt_calib_mode,
|
|
|
|
|
- )
|
|
|
|
|
-
|
|
|
|
|
- if self.option.shape_info_filename is not None:
|
|
|
|
|
- if not os.path.exists(self.option.shape_info_filename):
|
|
|
|
|
- config.collect_shape_range_info(self.option.shape_info_filename)
|
|
|
|
|
- logging.info(
|
|
|
|
|
- f"Dynamic shape info is collected into: {self.option.shape_info_filename}"
|
|
|
|
|
- )
|
|
|
|
|
- else:
|
|
|
|
|
- logging.info(
|
|
|
|
|
- f"A dynamic shape info file ( {self.option.shape_info_filename} ) already exists. \
|
|
|
|
|
-No need to generate again."
|
|
|
|
|
- )
|
|
|
|
|
- config.enable_tuned_tensorrt_dynamic_shape(
|
|
|
|
|
- self.option.shape_info_filename, True
|
|
|
|
|
- )
|
|
|
|
|
|
|
+ config.set_mkldnn_cache_capacity(-1)
|
|
|
|
|
+ else:
|
|
|
|
|
+ if hasattr(config, "disable_mkldnn"):
|
|
|
|
|
+ config.disable_mkldnn()
|
|
|
|
|
|
|
|
# Disable paddle inference logging
|
|
# Disable paddle inference logging
|
|
|
config.disable_glog_info()
|
|
config.disable_glog_info()
|
|
|
|
|
+
|
|
|
|
|
+ config.set_cpu_math_library_num_threads(self.option.cpu_threads)
|
|
|
|
|
+
|
|
|
|
|
+ if not (self.option.device == "gpu" and self.option.run_mode.startswith("trt")):
|
|
|
|
|
+ if hasattr(config, "enable_new_ir"):
|
|
|
|
|
+ config.enable_new_ir(self.option.enable_new_ir)
|
|
|
|
|
+ if hasattr(config, "enable_new_executor"):
|
|
|
|
|
+ config.enable_new_executor()
|
|
|
|
|
+ config.set_optimization_level(3)
|
|
|
|
|
+
|
|
|
for del_p in self.option.delete_pass:
|
|
for del_p in self.option.delete_pass:
|
|
|
config.delete_pass(del_p)
|
|
config.delete_pass(del_p)
|
|
|
|
|
|
|
@@ -142,11 +155,6 @@ No need to generate again."
|
|
|
config.delete_pass("conv2d_add_act_fuse_pass")
|
|
config.delete_pass("conv2d_add_act_fuse_pass")
|
|
|
config.delete_pass("conv2d_add_fuse_pass")
|
|
config.delete_pass("conv2d_add_fuse_pass")
|
|
|
|
|
|
|
|
- # Enable shared memory
|
|
|
|
|
- config.enable_memory_optim()
|
|
|
|
|
- config.switch_ir_optim(True)
|
|
|
|
|
- # Disable feed, fetch OP, needed by zero_copy_run
|
|
|
|
|
- config.switch_use_feed_fetch_ops(False)
|
|
|
|
|
predictor = create_predictor(config)
|
|
predictor = create_predictor(config)
|
|
|
|
|
|
|
|
# Get input and output handlers
|
|
# Get input and output handlers
|