瀏覽代碼

update hpi_model_info_collection.json and deprecate INFER_BENCHMARK_USE_NEW_INFER_API (#4032)

* update hpi_model_info_collection.json

* deprecate INFER_BENCHMARK_USE_NEW_INFER_API

* deprecate INFER_BENCHMARK_USE_NEW_INFER_API

* update json
zhang-prog 6 月之前
父節點
當前提交
6f8f095ed6

+ 0 - 1
docs/module_usage/instructions/benchmark.en.md

@@ -19,7 +19,6 @@ To enable the benchmark feature, you must set the following environment variable
 * `PADDLE_PDX_INFER_BENCHMARK_ITERS`: The number of iterations for testing (default is `0`).
 * `PADDLE_PDX_INFER_BENCHMARK_OUTPUT_DIR`: The directory where the metrics are saved (e.g., `./benchmark`). The default is `None`, meaning the benchmark metrics will not be saved.
 * `PADDLE_PDX_INFER_BENCHMARK_USE_CACHE_FOR_READ`: When set to `True`, the caching mechanism is applied to the operation of reading input data to avoid repetitive I/O overhead, and the time consumed by data read and cache is not recorded in the core time (default is `False`).
-* `PADDLE_PDX_INFER_BENCHMARK_USE_NEW_INFER_API`: When set to `True`,the new inference API is enabled, providing more detailed information for inference operations on benchmarks (default is `False`).
 
 **Note**:
 

+ 0 - 1
docs/module_usage/instructions/benchmark.md

@@ -19,7 +19,6 @@ Benchmark 功能会统计模型在端到端推理过程中,所有操作的每
 * `PADDLE_PDX_INFER_BENCHMARK_ITERS`:测试的循环次数,默认为 `0`;
 * `PADDLE_PDX_INFER_BENCHMARK_OUTPUT_DIR`:保存指标的目录,如 `./benchmark`,默认为 `None`,表示不保存 benchmark 指标;
 * `PADDLE_PDX_INFER_BENCHMARK_USE_CACHE_FOR_READ`:设置为 `True` 时则对读取输入数据操作应用缓存机制,避免重复I/O开销,并且数据读取及缓存消耗的时间不记录到核心耗时中。默认为 `False`;
-* `PADDLE_PDX_INFER_BENCHMARK_USE_NEW_INFER_API`:设置为 `True` 时则使用新的推理API,可以看更细致的分阶段结果。默认为 `False`。
 
 **注意**:
 

+ 2 - 55
paddlex/inference/models/common/static_infer.py

@@ -22,8 +22,7 @@ import numpy as np
 
 from ....utils import logging
 from ....utils.deps import class_requires_deps
-from ....utils.device import constr_device
-from ....utils.flags import DEBUG, INFER_BENCHMARK_USE_NEW_INFER_API, USE_PIR_TRT
+from ....utils.flags import DEBUG, USE_PIR_TRT
 from ...utils.benchmark import benchmark, set_inference_operations
 from ...utils.hpi import (
     HPIConfig,
@@ -40,9 +39,6 @@ from ...utils.trt_config import DISABLE_TRT_HALF_OPS_CONFIG
 CACHE_DIR = ".cache"
 
 INFERENCE_OPERATIONS = [
-    "PaddleCopyToDevice",
-    "PaddleCopyToHost",
-    "PaddleModelInfer",
     "PaddleInferChainLegacy",
     "MultiBackendInfer",
 ]
@@ -233,47 +229,6 @@ def _sort_inputs(inputs, names):
     return inputs
 
 
-def _concatenate(*callables):
-    def _chain(x):
-        for c in callables:
-            x = c(x)
-        return x
-
-    return _chain
-
-
-@benchmark.timeit
-class PaddleCopyToDevice:
-    def __init__(self, device_type, device_id):
-        self.device_type = device_type
-        self.device_id = device_id
-
-    def __call__(self, arrs):
-        import paddle
-
-        device_id = [self.device_id] if self.device_id is not None else self.device_id
-        device = constr_device(self.device_type, device_id)
-        paddle_tensors = [paddle.to_tensor(i, place=device) for i in arrs]
-        return paddle_tensors
-
-
-@benchmark.timeit
-class PaddleCopyToHost:
-    def __call__(self, paddle_tensors):
-        arrs = [i.numpy() for i in paddle_tensors]
-        return arrs
-
-
-@benchmark.timeit
-class PaddleModelInfer:
-    def __init__(self, predictor):
-        super().__init__()
-        self.predictor = predictor
-
-    def __call__(self, x):
-        return self.predictor.run(x)
-
-
 # FIXME: Name might be misleading
 @benchmark.timeit
 class PaddleInferChainLegacy:
@@ -317,15 +272,7 @@ class PaddleInfer(StaticInfer):
         self.model_file_prefix = model_file_prefix
         self._option = option
         self.predictor = self._create()
-        if INFER_BENCHMARK_USE_NEW_INFER_API:
-            device_type = self._option.device_type
-            device_type = "gpu" if device_type == "dcu" else device_type
-            copy_to_device = PaddleCopyToDevice(device_type, self._option.device_id)
-            copy_to_host = PaddleCopyToHost()
-            model_infer = PaddleModelInfer(self.predictor)
-            self.infer = _concatenate(copy_to_device, model_infer, copy_to_host)
-        else:
-            self.infer = PaddleInferChainLegacy(self.predictor)
+        self.infer = PaddleInferChainLegacy(self.predictor)
 
     def __call__(self, x: Sequence[np.ndarray]) -> List[np.ndarray]:
         names = self.predictor.get_input_names()

+ 71 - 0
paddlex/inference/utils/hpi_model_info_collection.json

@@ -1144,6 +1144,38 @@
     ],
     "YOLO-Worldv2-L": [
       "paddle"
+    ],
+    "PP-OCRv5_server_rec": [
+      "paddle",
+      "openvino",
+      "onnxruntime"
+    ],
+    "PP-OCRv5_mobile_rec": [
+      "openvino",
+      "onnxruntime",
+      "paddle"
+    ],
+    "PP-OCRv5_server_det": [
+      "openvino",
+      "onnxruntime",
+      "paddle"
+    ],
+    "PP-OCRv5_mobile_det": [
+      "openvino",
+      "onnxruntime",
+      "paddle"
+    ],
+    "PP-FormulaNet_plus-L": [
+      "onnxruntime",
+      "paddle"
+    ],
+    "PP-FormulaNet_plus-M": [
+      "onnxruntime",
+      "paddle"
+    ],
+    "PP-FormulaNet_plus-S": [
+      "onnxruntime",
+      "paddle"
     ]
   },
   "gpu_cuda118_cudnn89": {
@@ -2255,6 +2287,45 @@
     ],
     "YOLO-Worldv2-L": [
       "paddle"
+    ],
+    "PP-DocBlockLayout": [
+      "tensorrt",
+      "paddle",
+      "onnxruntime"
+    ],
+    "PP-DocLayout_plus-L": [
+      "tensorrt_fp16",
+      "paddle",
+      "onnxruntime"
+    ],
+    "PP-OCRv5_server_rec": [
+      "paddle_tensorrt_fp16",
+      "tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "PP-OCRv5_mobile_rec": [
+      "paddle_tensorrt_fp16",
+      "tensorrt",
+      "onnxruntime"
+    ],
+    "PP-OCRv5_server_det": [
+      "tensorrt",
+      "onnxruntime",
+      "paddle"
+    ],
+    "PP-OCRv5_mobile_det": [
+      "paddle_tensorrt",
+      "tensorrt",
+      "onnxruntime"
+    ],
+    "PP-FormulaNet_plus-L": [
+      "paddle"
+    ],
+    "PP-FormulaNet_plus-M": [
+      "paddle"
+    ],
+    "PP-FormulaNet_plus-S": [
+      "paddle"
     ]
   }
 }

+ 0 - 4
paddlex/utils/flags.py

@@ -24,7 +24,6 @@ __all__ = [
     "INFER_BENCHMARK_ITERS",
     "INFER_BENCHMARK_WARMUP",
     "INFER_BENCHMARK_OUTPUT_DIR",
-    "INFER_BENCHMARK_USE_NEW_INFER_API",
     "FLAGS_json_format_model",
     "USE_PIR_TRT",
     "DISABLE_DEV_MODEL_WL",
@@ -69,6 +68,3 @@ INFER_BENCHMARK_ITERS = get_flag_from_env_var(
 INFER_BENCHMARK_USE_CACHE_FOR_READ = get_flag_from_env_var(
     "PADDLE_PDX_INFER_BENCHMARK_USE_CACHE_FOR_READ", False
 )
-INFER_BENCHMARK_USE_NEW_INFER_API = get_flag_from_env_var(
-    "PADDLE_PDX_INFER_BENCHMARK_USE_NEW_INFER_API", False
-)