6 months ago · 6f8f095ed6
--- a/docs/module_usage/instructions/benchmark.en.md
+++ b/docs/module_usage/instructions/benchmark.en.md
@@ -19,7 +19,6 @@ To enable the benchmark feature, you must set the following environment variable
 
				 * `PADDLE_PDX_INFER_BENCHMARK_ITERS`: The number of iterations for testing (default is `0`).
			
 
				 * `PADDLE_PDX_INFER_BENCHMARK_OUTPUT_DIR`: The directory where the metrics are saved (e.g., `./benchmark`). The default is `None`, meaning the benchmark metrics will not be saved.
			
 
				 * `PADDLE_PDX_INFER_BENCHMARK_USE_CACHE_FOR_READ`: When set to `True`, the caching mechanism is applied to the operation of reading input data to avoid repetitive I/O overhead, and the time consumed by data read and cache is not recorded in the core time (default is `False`).
			
 
				-* `PADDLE_PDX_INFER_BENCHMARK_USE_NEW_INFER_API`: When set to `True`,the new inference API is enabled, providing more detailed information for inference operations on benchmarks (default is `False`).
			
 
				 
			
 
				 **Note**:
			
 
				 
			
--- a/docs/module_usage/instructions/benchmark.md
+++ b/docs/module_usage/instructions/benchmark.md
@@ -19,7 +19,6 @@ Benchmark 功能会统计模型在端到端推理过程中，所有操作的每
 
				 * `PADDLE_PDX_INFER_BENCHMARK_ITERS`：测试的循环次数，默认为 `0`；
			
 
				 * `PADDLE_PDX_INFER_BENCHMARK_OUTPUT_DIR`：保存指标的目录，如 `./benchmark`，默认为 `None`，表示不保存 benchmark 指标；
			
 
				 * `PADDLE_PDX_INFER_BENCHMARK_USE_CACHE_FOR_READ`：设置为 `True` 时则对读取输入数据操作应用缓存机制，避免重复I/O开销，并且数据读取及缓存消耗的时间不记录到核心耗时中。默认为 `False`；
			
 
				-* `PADDLE_PDX_INFER_BENCHMARK_USE_NEW_INFER_API`：设置为 `True` 时则使用新的推理API，可以看更细致的分阶段结果。默认为 `False`。
			
 
				 
			
 
				 **注意**：
			
 
				 
			
--- a/paddlex/inference/models/common/static_infer.py
+++ b/paddlex/inference/models/common/static_infer.py
@@ -22,8 +22,7 @@ import numpy as np
 
				 
			
 
				 from ....utils import logging
			
 
				 from ....utils.deps import class_requires_deps
			
 
				-from ....utils.device import constr_device
			
 
				-from ....utils.flags import DEBUG, INFER_BENCHMARK_USE_NEW_INFER_API, USE_PIR_TRT
			
 
				+from ....utils.flags import DEBUG, USE_PIR_TRT
			
 
				 from ...utils.benchmark import benchmark, set_inference_operations
			
 
				 from ...utils.hpi import (
			
 
				     HPIConfig,
			
@@ -40,9 +39,6 @@ from ...utils.trt_config import DISABLE_TRT_HALF_OPS_CONFIG
 
				 CACHE_DIR = ".cache"
			
 
				 
			
 
				 INFERENCE_OPERATIONS = [
			
 
				-    "PaddleCopyToDevice",
			
 
				-    "PaddleCopyToHost",
			
 
				-    "PaddleModelInfer",
			
 
				     "PaddleInferChainLegacy",
			
 
				     "MultiBackendInfer",
			
 
				 ]
			
@@ -233,47 +229,6 @@ def _sort_inputs(inputs, names):
 
				     return inputs
			
 
				 
			
 
				 
			
 
				-def _concatenate(*callables):
			
 
				-    def _chain(x):
			
 
				-        for c in callables:
			
 
				-            x = c(x)
			
 
				-        return x
			
 
				-
			
 
				-    return _chain
			
 
				-
			
 
				-
			
 
				-@benchmark.timeit
			
 
				-class PaddleCopyToDevice:
			
 
				-    def __init__(self, device_type, device_id):
			
 
				-        self.device_type = device_type
			
 
				-        self.device_id = device_id
			
 
				-
			
 
				-    def __call__(self, arrs):
			
 
				-        import paddle
			
 
				-
			
 
				-        device_id = [self.device_id] if self.device_id is not None else self.device_id
			
 
				-        device = constr_device(self.device_type, device_id)
			
 
				-        paddle_tensors = [paddle.to_tensor(i, place=device) for i in arrs]
			
 
				-        return paddle_tensors
			
 
				-
			
 
				-
			
 
				-@benchmark.timeit
			
 
				-class PaddleCopyToHost:
			
 
				-    def __call__(self, paddle_tensors):
			
 
				-        arrs = [i.numpy() for i in paddle_tensors]
			
 
				-        return arrs
			
 
				-
			
 
				-
			
 
				-@benchmark.timeit
			
 
				-class PaddleModelInfer:
			
 
				-    def __init__(self, predictor):
			
 
				-        super().__init__()
			
 
				-        self.predictor = predictor
			
 
				-
			
 
				-    def __call__(self, x):
			
 
				-        return self.predictor.run(x)
			
 
				-
			
 
				-
			
 
				 # FIXME: Name might be misleading
			
 
				 @benchmark.timeit
			
 
				 class PaddleInferChainLegacy:
			
@@ -317,15 +272,7 @@ class PaddleInfer(StaticInfer):
 
				         self.model_file_prefix = model_file_prefix
			
 
				         self._option = option
			
 
				         self.predictor = self._create()
			
 
				-        if INFER_BENCHMARK_USE_NEW_INFER_API:
			
 
				-            device_type = self._option.device_type
			
 
				-            device_type = "gpu" if device_type == "dcu" else device_type
			
 
				-            copy_to_device = PaddleCopyToDevice(device_type, self._option.device_id)
			
 
				-            copy_to_host = PaddleCopyToHost()
			
 
				-            model_infer = PaddleModelInfer(self.predictor)
			
 
				-            self.infer = _concatenate(copy_to_device, model_infer, copy_to_host)
			
 
				-        else:
			
 
				-            self.infer = PaddleInferChainLegacy(self.predictor)
			
 
				+        self.infer = PaddleInferChainLegacy(self.predictor)
			
 
				 
			
 
				     def __call__(self, x: Sequence[np.ndarray]) -> List[np.ndarray]:
			
 
				         names = self.predictor.get_input_names()
			
--- a/paddlex/inference/utils/hpi_model_info_collection.json
+++ b/paddlex/inference/utils/hpi_model_info_collection.json
@@ -1144,6 +1144,38 @@
 
				     ],
			
 
				     "YOLO-Worldv2-L": [
			
 
				       "paddle"
			
 
				+    ],
			
 
				+    "PP-OCRv5_server_rec": [
			
 
				+      "paddle",
			
 
				+      "openvino",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PP-OCRv5_mobile_rec": [
			
 
				+      "openvino",
			
 
				+      "onnxruntime",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "PP-OCRv5_server_det": [
			
 
				+      "openvino",
			
 
				+      "onnxruntime",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "PP-OCRv5_mobile_det": [
			
 
				+      "openvino",
			
 
				+      "onnxruntime",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "PP-FormulaNet_plus-L": [
			
 
				+      "onnxruntime",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "PP-FormulaNet_plus-M": [
			
 
				+      "onnxruntime",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "PP-FormulaNet_plus-S": [
			
 
				+      "onnxruntime",
			
 
				+      "paddle"
			
 
				     ]
			
 
				   },
			
 
				   "gpu_cuda118_cudnn89": {
			
@@ -2255,6 +2287,45 @@
 
				     ],
			
 
				     "YOLO-Worldv2-L": [
			
 
				       "paddle"
			
 
				+    ],
			
 
				+    "PP-DocBlockLayout": [
			
 
				+      "tensorrt",
			
 
				+      "paddle",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PP-DocLayout_plus-L": [
			
 
				+      "tensorrt_fp16",
			
 
				+      "paddle",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PP-OCRv5_server_rec": [
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PP-OCRv5_mobile_rec": [
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "tensorrt",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PP-OCRv5_server_det": [
			
 
				+      "tensorrt",
			
 
				+      "onnxruntime",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "PP-OCRv5_mobile_det": [
			
 
				+      "paddle_tensorrt",
			
 
				+      "tensorrt",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PP-FormulaNet_plus-L": [
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "PP-FormulaNet_plus-M": [
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "PP-FormulaNet_plus-S": [
			
 
				+      "paddle"
			
 
				     ]
			
 
				   }
			
 
				 }
			
--- a/paddlex/utils/flags.py
+++ b/paddlex/utils/flags.py
@@ -24,7 +24,6 @@ __all__ = [
 
				     "INFER_BENCHMARK_ITERS",
			
 
				     "INFER_BENCHMARK_WARMUP",
			
 
				     "INFER_BENCHMARK_OUTPUT_DIR",
			
 
				-    "INFER_BENCHMARK_USE_NEW_INFER_API",
			
 
				     "FLAGS_json_format_model",
			
 
				     "USE_PIR_TRT",
			
 
				     "DISABLE_DEV_MODEL_WL",
			
@@ -69,6 +68,3 @@ INFER_BENCHMARK_ITERS = get_flag_from_env_var(
 
				 INFER_BENCHMARK_USE_CACHE_FOR_READ = get_flag_from_env_var(
			
 
				     "PADDLE_PDX_INFER_BENCHMARK_USE_CACHE_FOR_READ", False
			
 
				 )
			
 
				-INFER_BENCHMARK_USE_NEW_INFER_API = get_flag_from_env_var(
			
 
				-    "PADDLE_PDX_INFER_BENCHMARK_USE_NEW_INFER_API", False
			
 
				-)