Explorar el Código

support PIR TRT

gaotingquan hace 10 meses
padre
commit
e0fcc1ff5f

+ 1 - 2
paddlex/configs/modules/text_recognition/PP-OCRv4_mobile_rec.yaml

@@ -36,5 +36,4 @@ Predict:
   model_dir: "output/best_accuracy/inference"
   input: "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/general_ocr_rec_001.png"
   kernel_option:
-    run_mode: trt_fp32
-    enable_new_ir: True
+    run_mode: paddle

+ 49 - 38
paddlex/inference/models/common/static_infer.py

@@ -19,7 +19,7 @@ from pathlib import Path
 import lazy_paddle as paddle
 import numpy as np
 
-from ....utils.flags import DEBUG, FLAGS_json_format_model
+from ....utils.flags import DEBUG, FLAGS_json_format_model, USE_PIR_TRT
 from ....utils import logging
 from ...utils.pp_option import PaddlePredictorOption
 
@@ -47,7 +47,7 @@ def collect_trt_shapes(
         predictor.run()
 
 
-def convert_trt(mode, pp_model_path, trt_dynamic_shapes):
+def convert_trt(mode, pp_model_path, trt_save_path, trt_dynamic_shapes):
     from lazy_paddle.tensorrt.export import (
         Input,
         TensorRTConfig,
@@ -55,8 +55,6 @@ def convert_trt(mode, pp_model_path, trt_dynamic_shapes):
         PrecisionMode,
     )
 
-    trt_save_dir = str(Path(pp_model_path) / "trt" / "inference")
-
     precision_map = {
         "trt_int8": PrecisionMode.INT8,
         "trt_fp32": PrecisionMode.FP32,
@@ -75,10 +73,8 @@ def convert_trt(mode, pp_model_path, trt_dynamic_shapes):
     # Create TensorRTConfig
     trt_config = TensorRTConfig(inputs=trt_inputs)
     trt_config.precision_mode = precision_map[mode]
-    trt_config.save_model_dir = trt_save_dir
-    convert(str(Path(pp_model_path) / "inference"), trt_config)
-    # copy inference.yaml to new model dir
-    shutil.copy(str(Path(pp_model_path) / "inference.yml"), trt_save_dir + ".yml")
+    trt_config.save_model_dir = trt_save_path
+    convert(pp_model_path, trt_config)
 
 
 class Copy2GPU:
@@ -179,34 +175,10 @@ class StaticInfer:
                 ).as_posix()
         params_file = (self.model_dir / f"{self.model_prefix}.pdiparams").as_posix()
 
-        config = Config(model_file, params_file)
-        if self.option.device == "gpu":
-            if self.option.device == "gpu":
-                config.exp_disable_mixed_precision_ops({"feed", "fetch"})
-            config.enable_use_gpu(100, self.option.device_id)
-
-            if hasattr(config, "enable_new_ir"):
-                config.enable_new_ir(self.option.enable_new_ir)
-            if hasattr(config, "enable_new_executor"):
-                config.enable_new_executor()
-            config.set_optimization_level(3)
-
-            # NOTE: The pptrt settings are not aligned with those of FD.
-            precision_map = {
-                "trt_int8": Config.Precision.Int8,
-                "trt_fp32": Config.Precision.Float32,
-                "trt_fp16": Config.Precision.Half,
-            }
-            if self.option.run_mode in precision_map.keys():
-                config.enable_tensorrt_engine(
-                    workspace_size=(1 << 25) * self.option.batch_size,
-                    max_batch_size=self.option.batch_size,
-                    min_subgraph_size=self.option.min_subgraph_size,
-                    precision_mode=precision_map[self.option.run_mode],
-                    use_static=self.option.trt_use_static,
-                    use_calib_mode=self.option.trt_calib_mode,
-                )
-
+        # for TRT
+        if self.option.run_mode.startswith("trt"):
+            assert self.option.device == "gpu"
+            if not USE_PIR_TRT:
                 if not os.path.exists(self.option.shape_info_filename):
                     logging.info(
                         f"Dynamic shape info is collected into: {self.option.shape_info_filename}"
@@ -222,10 +194,49 @@ class StaticInfer:
                     logging.info(
                         f"A dynamic shape info file ( {self.option.shape_info_filename} ) already exists. No need to collect again."
                     )
-                config.enable_tuned_tensorrt_dynamic_shape(
-                    self.option.shape_info_filename, True
+            else:
+                trt_save_path = (
+                    Path(self.model_dir) / "trt" / self.model_prefix
+                ).as_posix()
+                pp_model_path = (Path(self.model_dir) / self.model_prefix).as_posix()
+                convert_trt(
+                    self.option.run_mode,
+                    pp_model_path,
+                    trt_save_path,
+                    self.option.trt_dynamic_shapes,
                 )
+                model_file = trt_save_path + ".json"
+                params_file = trt_save_path + ".pdiparams"
 
+        config = Config(model_file, params_file)
+        if self.option.device == "gpu":
+            config.exp_disable_mixed_precision_ops({"feed", "fetch"})
+            config.enable_use_gpu(100, self.option.device_id)
+            if not self.option.run_mode.startswith("trt"):
+                if hasattr(config, "enable_new_ir"):
+                    config.enable_new_ir(self.option.enable_new_ir)
+                if hasattr(config, "enable_new_executor"):
+                    config.enable_new_executor()
+                config.set_optimization_level(3)
+            # NOTE: The pptrt settings are not aligned with those of FD.
+            else:
+                if not USE_PIR_TRT:
+                    precision_map = {
+                        "trt_int8": Config.Precision.Int8,
+                        "trt_fp32": Config.Precision.Float32,
+                        "trt_fp16": Config.Precision.Half,
+                    }
+                    config.enable_tensorrt_engine(
+                        workspace_size=(1 << 25) * self.option.batch_size,
+                        max_batch_size=self.option.batch_size,
+                        min_subgraph_size=self.option.min_subgraph_size,
+                        precision_mode=precision_map[self.option.run_mode],
+                        use_static=self.option.trt_use_static,
+                        use_calib_mode=self.option.trt_calib_mode,
+                    )
+                    config.enable_tuned_tensorrt_dynamic_shape(
+                        self.option.shape_info_filename, True
+                    )
         elif self.option.device == "npu":
             config.enable_custom_device("npu")
         elif self.option.device == "xpu":

+ 2 - 0
paddlex/utils/flags.py

@@ -26,6 +26,7 @@ __all__ = [
     "INFER_BENCHMARK_OUTPUT",
     "INFER_BENCHMARK_DATA_SIZE",
     "FLAGS_json_format_model",
+    "USE_PIR_TRT",
 ]
 
 
@@ -46,6 +47,7 @@ DRY_RUN = get_flag_from_env_var("PADDLE_PDX_DRY_RUN", False)
 CHECK_OPTS = get_flag_from_env_var("PADDLE_PDX_CHECK_OPTS", False)
 EAGER_INITIALIZATION = get_flag_from_env_var("PADDLE_PDX_EAGER_INIT", True)
 FLAGS_json_format_model = get_flag_from_env_var("FLAGS_json_format_model", None)
+USE_PIR_TRT = get_flag_from_env_var("PADDLE_PDX_USE_PIR_TRT", False)
 
 # Inference Benchmark
 INFER_BENCHMARK = get_flag_from_env_var("PADDLE_PDX_INFER_BENCHMARK", None)