há 7 meses atrás · a65d269b21
--- a/.github/ISSUE_TEMPLATE/3_deploy.md
+++ b/.github/ISSUE_TEMPLATE/3_deploy.md
@@ -22,13 +22,13 @@ assignees: ''
 
				 
			
 
				     * 您是否完全按照[高性能推理文档教程](https://paddlepaddle.github.io/PaddleX/main/pipeline_deploy/high_performance_inference.html)跑通了流程？
			
 
				 
			
 
				-    * 您使用的是离线激活方式还是在线激活方式？
			
 
				-
			
 
				 2. 服务化部署
			
 
				 
			
 
				     * 您是否完全按照[服务化部署文档教程](https://paddlepaddle.github.io/PaddleX/main/pipeline_deploy/serving.html)跑通了流程？
			
 
				 
			
 
				-    * 您在服务化部署中是否有使用高性能推理插件，如果是，您使用的是离线激活方式还是在线激活方式？
			
 
				+    * 您在服务化部署中是否有使用高性能推理插件？
			
 
				+
			
 
				+    * 您使用了哪一种服务化部署方案？
			
 
				 
			
 
				     * 如果是多语言调用的问题，请给出调用示例子。
			
 
				 
			
--- a/docs/module_usage/instructions/benchmark.en.md
+++ b/docs/module_usage/instructions/benchmark.en.md
@@ -133,7 +133,7 @@ After enabling the benchmark feature, the benchmark results will be automaticall
 
				 Below is an example of the benchmark results obtained by running the example program in Section 2:
			
 
				 
			
 
				 ```
			
 
				-                                               WarmUp Data
			
 
				+                                               Warmup Data
			
 
				 +-------+------------+-----------+----------------+------------------------+----------------------------+
			
 
				 | Iters | Batch Size | Instances |      Type      | Avg Time Per Iter (ms) | Avg Time Per Instance (ms) |
			
 
				 +-------+------------+-----------+----------------+------------------------+----------------------------+
			
--- a/docs/module_usage/instructions/benchmark.md
+++ b/docs/module_usage/instructions/benchmark.md
@@ -133,7 +133,7 @@ python test_infer.py
 
				 运行第2节的示例程序所得到的 benchmark 结果如下：
			
 
				 
			
 
				 ```
			
 
				-                                               WarmUp Data
			
 
				+                                               Warmup Data
			
 
				 +-------+------------+-----------+----------------+------------------------+----------------------------+
			
 
				 | Iters | Batch Size | Instances |      Type      | Avg Time Per Iter (ms) | Avg Time Per Instance (ms) |
			
 
				 +-------+------------+-----------+----------------+------------------------+----------------------------+
			
--- a/libs/ultra-infer/CMakeLists.txt
+++ b/libs/ultra-infer/CMakeLists.txt
@@ -70,6 +70,7 @@ option(ENABLE_SOPHGO_BACKEND "Whether to enable SOPHON backend." OFF)
 
				 option(ENABLE_TVM_BACKEND "Whether to enable TVM backend." OFF)
			
 
				 option(ENABLE_LITE_BACKEND "Whether to enable paddle lite backend." OFF)
			
 
				 option(ENABLE_HORIZON_BACKEND "Whether to enable HORIZON backend." OFF)
			
 
				+option(ENABLE_OM_BACKEND "Whether to enable OM backend." OFF)
			
 
				 option(ENABLE_VISION "Whether to enable vision models usage." OFF)
			
 
				 option(ENABLE_TEXT "Whether to enable text models usage." OFF)
			
 
				 option(ENABLE_FLYCV "Whether to enable flycv to boost image preprocess." OFF)
			
@@ -172,6 +173,7 @@ file(GLOB_RECURSE DEPLOY_SOPHGO_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ult
 
				 file(GLOB_RECURSE DEPLOY_TVM_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultra_infer/runtime/backends/tvm/*.cc)
			
 
				 file(GLOB_RECURSE DEPLOY_LITE_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultra_infer/runtime/backends/lite/*.cc)
			
 
				 file(GLOB_RECURSE DEPLOY_PIPELINE_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultra_infer/pipeline/*.cc)
			
 
				+file(GLOB_RECURSE DEPLOY_OM_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultra_infer/runtime/backends/om/*.cc)
			
 
				 file(GLOB_RECURSE DEPLOY_VISION_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultra_infer/vision/*.cc)
			
 
				 file(GLOB_RECURSE DEPLOY_TEXT_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultra_infer/text/*.cc)
			
 
				 file(GLOB_RECURSE DEPLOY_PYBIND_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultra_infer/pybind/*.cc ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultra_infer/*_pybind.cc)
			
@@ -194,7 +196,7 @@ list(REMOVE_ITEM ALL_DEPLOY_SRCS ${DEPLOY_ORT_SRCS} ${DEPLOY_PADDLE_SRCS}
 
				                                  ${DEPLOY_PIPELINE_SRCS} ${DEPLOY_RKNPU2_SRCS}
			
 
				                                  ${DEPLOY_SOPHGO_SRCS}
			
 
				                                  ${DEPLOY_HORIZON_SRCS} ${DEPLOY_TVM_SRCS}
			
 
				-                                 ${DEPLOY_PADDLE_CUSTOM_OP_SRCS})
			
 
				+                                 ${DEPLOY_PADDLE_CUSTOM_OP_SRCS} ${DEPLOY_OM_SRCS})
			
 
				 
			
 
				 
			
 
				 set(DEPEND_LIBS "")
			
@@ -328,6 +330,13 @@ if(ENABLE_POROS_BACKEND)
 
				   execute_process(COMMAND ${Python_EXECUTABLE} ${PROJECT_SOURCE_DIR}/scripts/copy_directory.py ${TRT_DIRECTORY}/lib ${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt/lib)
			
 
				 endif()
			
 
				 
			
 
				+if(ENABLE_OM_BACKEND)
			
 
				+  add_definitions(-DENABLE_OM_BACKEND)
			
 
				+  list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_OM_SRCS})
			
 
				+  include(${PROJECT_SOURCE_DIR}/cmake/om.cmake)
			
 
				+  list(APPEND DEPEND_LIBS ${NPU_libs})
			
 
				+endif()
			
 
				+
			
 
				 if(WITH_GPU)
			
 
				   add_definitions(-DWITH_GPU)
			
 
				   include_directories(${CUDA_DIRECTORY}/include)
			
--- a/libs/ultra-infer/UltraInfer.cmake.in
+++ b/libs/ultra-infer/UltraInfer.cmake.in
@@ -32,6 +32,7 @@ set(ENABLE_PADDLE_BACKEND @ENABLE_PADDLE_BACKEND@)
 
				 set(ENABLE_OPENVINO_BACKEND @ENABLE_OPENVINO_BACKEND@)
			
 
				 set(ENABLE_POROS_BACKEND @ENABLE_POROS_BACKEND@)
			
 
				 set(ENABLE_TRT_BACKEND @ENABLE_TRT_BACKEND@)
			
 
				+set(ENABLE_OM_BACKEND @ENABLE_OM_BACKEND@)
			
 
				 set(ENABLE_PADDLE2ONNX @ENABLE_PADDLE2ONNX@)
			
 
				 set(BUILD_PADDLE2ONNX @BUILD_PADDLE2ONNX@)
			
 
				 
			
@@ -179,6 +180,32 @@ if(ENABLE_POROS_BACKEND)
 
				   list(APPEND ULTRAINFER_INCS ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/poros/include ${TORCH_INCLUDE})
			
 
				 endif()
			
 
				 
			
 
				+if(ENABLE_OM_BACKEND)
			
 
				+  set(LIB_PATH $ENV{NPU_HOST_LIB})
			
 
				+  if (NOT DEFINED ENV{NPU_HOST_LIB})
			
 
				+    set(LIB_PATH "/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/lib64")
			
 
				+    message(STATUS "set default LIB_PATH: ${LIB_PATH}")
			
 
				+  else()
			
 
				+    message(STATUS "set LIB_PATH: ${LIB_PATH}")
			
 
				+  endif ()
			
 
				+
			
 
				+  set(INC_PATH $ENV{DDK_PATH})
			
 
				+  if (NOT DEFINED ENV{DDK_PATH})
			
 
				+    set(INC_PATH "/usr/local/Ascend/ascend-toolkit/latest")
			
 
				+    message(STATUS "set default INC_PATH: ${INC_PATH}")
			
 
				+  else()
			
 
				+    message(STATUS "set INC_PATH: ${INC_PATH}")
			
 
				+  endif ()
			
 
				+
			
 
				+  set(NPU_libs ascendcl stdc++)
			
 
				+
			
 
				+  link_directories(${LIB_PATH})
			
 
				+
			
 
				+  list(APPEND ULTRAINFER_LIBS ${NPU_libs})
			
 
				+
			
 
				+  list(APPEND ULTRAINFER_INCS ${INC_PATH}/runtime/include/)
			
 
				+endif()
			
 
				+
			
 
				 if(WITH_GPU)
			
 
				   if(NOT CUDA_DIRECTORY)
			
 
				     set(CUDA_DIRECTORY "/usr/local/cuda")
			
--- a/libs/ultra-infer/cmake/om.cmake
+++ b/libs/ultra-infer/cmake/om.cmake
@@ -0,0 +1,29 @@
 
				+set(CMAKE_CXX_FLAGS_DEBUG "-fPIC -O0 -g -Wall")
			
 
				+set(CMAKE_CXX_FLAGS_RELEASE "-fPIC -O2 -Wall")
			
 
				+
			
 
				+set(INC_PATH $ENV{DDK_PATH})
			
 
				+if (NOT DEFINED ENV{DDK_PATH})
			
 
				+    set(INC_PATH "/usr/local/Ascend/ascend-toolkit/latest")
			
 
				+    message(STATUS "set default INC_PATH: ${INC_PATH}")
			
 
				+else()
			
 
				+    message(STATUS "set INC_PATH: ${INC_PATH}")
			
 
				+endif ()
			
 
				+
			
 
				+set(LIB_PATH $ENV{NPU_HOST_LIB})
			
 
				+if (NOT DEFINED ENV{NPU_HOST_LIB})
			
 
				+    set(LIB_PATH "/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/lib64")
			
 
				+    message(STATUS "set default LIB_PATH: ${LIB_PATH}")
			
 
				+else()
			
 
				+    message(STATUS "set LIB_PATH: ${LIB_PATH}")
			
 
				+endif ()
			
 
				+
			
 
				+
			
 
				+set(NPU_libs ascendcl stdc++)
			
 
				+
			
 
				+include_directories(
			
 
				+   ${INC_PATH}/runtime/include/
			
 
				+)
			
 
				+
			
 
				+link_directories(
			
 
				+    ${LIB_PATH}
			
 
				+)
			
--- a/libs/ultra-infer/python/setup.py
+++ b/libs/ultra-infer/python/setup.py
@@ -69,6 +69,7 @@ setup_configs["ENABLE_PADDLE_BACKEND"] = os.getenv("ENABLE_PADDLE_BACKEND", "OFF
 
				 setup_configs["ENABLE_POROS_BACKEND"] = os.getenv("ENABLE_POROS_BACKEND", "OFF")
			
 
				 setup_configs["ENABLE_TRT_BACKEND"] = os.getenv("ENABLE_TRT_BACKEND", "OFF")
			
 
				 setup_configs["ENABLE_LITE_BACKEND"] = os.getenv("ENABLE_LITE_BACKEND", "OFF")
			
 
				+setup_configs["ENABLE_OM_BACKEND"] = os.getenv("ENABLE_OM_BACKEND", "OFF")
			
 
				 setup_configs["ENABLE_PADDLE2ONNX"] = os.getenv("ENABLE_PADDLE2ONNX", "OFF")
			
 
				 setup_configs["ENABLE_VISION"] = os.getenv("ENABLE_VISION", "OFF")
			
 
				 setup_configs["ENABLE_FLYCV"] = os.getenv("ENABLE_FLYCV", "OFF")
			
--- a/libs/ultra-infer/python/ultra_infer/__init__.py
+++ b/libs/ultra-infer/python/ultra_infer/__init__.py
@@ -152,6 +152,8 @@ from .c_lib_wrap import (
 
				     is_built_with_paddle,
			
 
				     is_built_with_trt,
			
 
				     get_default_cuda_directory,
			
 
				+    is_built_with_openvino,
			
 
				+    is_built_with_om,
			
 
				 )
			
 
				 
			
 
				 
			
--- a/libs/ultra-infer/python/ultra_infer/c_lib_wrap.py.in
+++ b/libs/ultra-infer/python/ultra_infer/c_lib_wrap.py.in
@@ -43,6 +43,10 @@ def is_built_with_openvino() ->bool:
 
				     return True if "@ENABLE_OPENVINO_BACKEND@" == "ON" else False
			
 
				 
			
 
				 
			
 
				+def is_built_with_om() ->bool:
			
 
				+    return True if "@ENABLE_OM_BACKEND@" == "ON" else False
			
 
				+
			
 
				+
			
 
				 def get_default_cuda_directory() -> str:
			
 
				     if not is_built_with_gpu():
			
 
				        return ""
			
--- a/libs/ultra-infer/python/ultra_infer/runtime.py
+++ b/libs/ultra-infer/python/ultra_infer/runtime.py
@@ -321,6 +321,10 @@ class RuntimeOption:
 
				         """Wrapper function of use_lite_backend(), use Paddle Lite backend, support inference Paddle model on ARM CPU."""
			
 
				         return self.use_lite_backend()
			
 
				 
			
 
				+    def use_om_backend(self):
			
 
				+        """Use Om backend, support inference Om model on NPU"""
			
 
				+        return self._option.use_om_backend()
			
 
				+
			
 
				     def set_lite_context_properties(self, context_properties):
			
 
				         """Set nnadapter context properties for Paddle Lite backend."""
			
 
				         logging.warning(
			
--- a/libs/ultra-infer/ultra_infer/core/config.h.in
+++ b/libs/ultra-infer/ultra_infer/core/config.h.in
@@ -84,3 +84,7 @@
 
				 #ifndef ENABLE_TVM_BACKEND
			
 
				 #cmakedefine ENABLE_TVM_BACKEND
			
 
				 #endif
			
 
				+
			
 
				+#ifndef ENABLE_OM_BACKEND
			
 
				+#cmakedefine ENABLE_OM_BACKEND
			
 
				+#endif
			
--- a/libs/ultra-infer/ultra_infer/pybind/runtime.cc
+++ b/libs/ultra-infer/ultra_infer/pybind/runtime.cc
@@ -137,7 +137,8 @@ void BindRuntime(pybind11::module &m) {
 
				       .value("RKNPU2", Backend::RKNPU2)
			
 
				       .value("SOPHGOTPU", Backend::SOPHGOTPU)
			
 
				       .value("TVM", Backend::TVM)
			
 
				-      .value("LITE", Backend::LITE);
			
 
				+      .value("LITE", Backend::LITE)
			
 
				+      .value("OMONNPU", Backend::OMONNPU);
			
 
				   pybind11::enum_<ModelFormat>(m, "ModelFormat", pybind11::arithmetic(),
			
 
				                                "ModelFormat for inference.")
			
 
				       .value("PADDLE", ModelFormat::PADDLE)
			
@@ -145,7 +146,8 @@ void BindRuntime(pybind11::module &m) {
 
				       .value("RKNN", ModelFormat::RKNN)
			
 
				       .value("SOPHGO", ModelFormat::SOPHGO)
			
 
				       .value("ONNX", ModelFormat::ONNX)
			
 
				-      .value("TVMFormat", ModelFormat::TVMFormat);
			
 
				+      .value("TVMFormat", ModelFormat::TVMFormat)
			
 
				+      .value("OM", ModelFormat::OM);
			
 
				   pybind11::enum_<Device>(m, "Device", pybind11::arithmetic(),
			
 
				                           "Device for inference.")
			
 
				       .value("CPU", Device::CPU)
			
--- a/libs/ultra-infer/ultra_infer/runtime/backends/om/om_backend.cc
+++ b/libs/ultra-infer/ultra_infer/runtime/backends/om/om_backend.cc
@@ -0,0 +1,578 @@
 
				+// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
			
 
				+//
			
 
				+// Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+// you may not use this file except in compliance with the License.
			
 
				+// You may obtain a copy of the License at
			
 
				+//
			
 
				+//     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+//
			
 
				+// Unless required by applicable law or agreed to in writing, software
			
 
				+// distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+// See the License for the specific language governing permissions and
			
 
				+// limitations under the License.
			
 
				+
			
 
				+#include "ultra_infer/runtime/backends/om/om_backend.h"
			
 
				+
			
 
				+#include "acl/acl.h"
			
 
				+#include <chrono>
			
 
				+#include <sys/stat.h>
			
 
				+
			
 
				+namespace ultra_infer {
			
 
				+
			
 
				+bool OmBackend::aclInitFlag = false;
			
 
				+
			
 
				+OmBackend::~OmBackend() {
			
 
				+  FreeInputBuffer();
			
 
				+  FreeOutputBuffer();
			
 
				+  DestroyInput();
			
 
				+  DestroyOutput();
			
 
				+  DestroyResource();
			
 
				+}
			
 
				+
			
 
				+TensorInfo OmBackend::GetInputInfo(int index) {
			
 
				+  FDASSERT(index < NumInputs(),
			
 
				+           "The index: %d should less than the number of inputs: %d.", index,
			
 
				+           NumInputs());
			
 
				+  return inputs_desc_[index];
			
 
				+}
			
 
				+
			
 
				+std::vector<TensorInfo> OmBackend::GetInputInfos() { return inputs_desc_; }
			
 
				+
			
 
				+TensorInfo OmBackend::GetOutputInfo(int index) {
			
 
				+  FDASSERT(index < NumOutputs(),
			
 
				+           "The index: %d should less than the number of outputs %d.", index,
			
 
				+           NumOutputs());
			
 
				+
			
 
				+  return outputs_desc_[index];
			
 
				+}
			
 
				+
			
 
				+std::vector<TensorInfo> OmBackend::GetOutputInfos() { return outputs_desc_; }
			
 
				+
			
 
				+bool OmBackend::Init(const RuntimeOption &runtime_option) {
			
 
				+  // ACL init
			
 
				+  aclError ret = InitResource();
			
 
				+  if (ret != true) {
			
 
				+    FDERROR << "execute InitResource failed, errorCode = "
			
 
				+            << static_cast<int32_t>(ret);
			
 
				+    return false;
			
 
				+  }
			
 
				+
			
 
				+  // model init;
			
 
				+  const char *omModelPath = (char *)runtime_option.model_file.data();
			
 
				+  FDINFO << "omModelPath = " << omModelPath;
			
 
				+  ret = LoadModel(omModelPath);
			
 
				+  if (ret != true) {
			
 
				+    FDERROR << "execute LoadModel failed";
			
 
				+    return false;
			
 
				+  }
			
 
				+
			
 
				+  // build input/output info
			
 
				+  ret = CreateModelDesc();
			
 
				+  if (ret != true) {
			
 
				+    FDERROR << "execute CreateModelDesc failed";
			
 
				+    return false;
			
 
				+  }
			
 
				+  ret = CreateInput();
			
 
				+  if (ret != true) {
			
 
				+    FDERROR << "execute CreateInput failed";
			
 
				+    FreeInputBuffer();
			
 
				+    return false;
			
 
				+  }
			
 
				+  ret = CreateOutput();
			
 
				+  if (ret != true) {
			
 
				+    FDERROR << "execute CreateOutput failed";
			
 
				+    FreeInputBuffer();
			
 
				+    return false;
			
 
				+  }
			
 
				+
			
 
				+  return true;
			
 
				+}
			
 
				+
			
 
				+bool OmBackend::Infer(std::vector<FDTensor> &inputs,
			
 
				+                      std::vector<FDTensor> *outputs, bool copy_to_fd) {
			
 
				+  // set context
			
 
				+  aclError aclRet = aclrtSetCurrentContext(context_);
			
 
				+  if (aclRet != ACL_SUCCESS) {
			
 
				+    FDERROR << "aclrtSetCurrentContext failed"
			
 
				+            << ", errorCode is " << static_cast<int32_t>(aclRet);
			
 
				+    return false;
			
 
				+  }
			
 
				+
			
 
				+  // Judge whether the input and output size are the same
			
 
				+  if (inputs.size() != inputs_desc_.size()) {
			
 
				+    FDERROR << "[OmBackend] Size of the inputs(" << inputs.size()
			
 
				+            << ") should keep same with the inputs of this model("
			
 
				+            << inputs_desc_.size() << ")." << std::endl;
			
 
				+    FreeInputBuffer();
			
 
				+    return false;
			
 
				+  }
			
 
				+
			
 
				+  // cp input tensor to inputBuffer
			
 
				+  for (size_t i = 0; i < inputs.size(); ++i) {
			
 
				+    if (inputs[i].Data() == nullptr) {
			
 
				+      FDERROR << "inputs[i].Data is NULL." << std::endl;
			
 
				+      return false;
			
 
				+    }
			
 
				+    size_t modelInputSize = aclmdlGetInputSizeByIndex(modelDesc_, i);
			
 
				+    aclRet = aclrtMemcpy(inputBuffer[i], modelInputSize, inputs[i].Data(),
			
 
				+                         inputs[i].Nbytes(), ACL_MEMCPY_DEVICE_TO_DEVICE);
			
 
				+    if (aclRet != ACL_SUCCESS) {
			
 
				+      FDERROR << "memcpy d2d failed. buffer size is " << modelInputSize
			
 
				+              << ", inputs[i].Nbytes() is " << inputs[i].Nbytes()
			
 
				+              << ", errorCode is " << static_cast<int32_t>(aclRet);
			
 
				+      return false;
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  bool ret = Execute();
			
 
				+  if (ret != true) {
			
 
				+    FDERROR << "execute inference failed";
			
 
				+    FreeInputBuffer();
			
 
				+    DestroyInput();
			
 
				+    DestroyOutput();
			
 
				+    return false;
			
 
				+  }
			
 
				+
			
 
				+  // cp outputbuffer to outputs
			
 
				+  outputs->resize(outputs_desc_.size());
			
 
				+  std::vector<int64_t> temp_shape(4);
			
 
				+  for (size_t i = 0; i < outputs_desc_.size(); ++i) {
			
 
				+    temp_shape.resize(outputs_desc_[i].shape.size());
			
 
				+    for (int j = 0; j < outputs_desc_[i].shape.size(); ++j) {
			
 
				+      temp_shape[j] = outputs_desc_[i].shape[j];
			
 
				+    }
			
 
				+    (*outputs)[i].Resize(temp_shape, outputs_desc_[i].dtype,
			
 
				+                         outputs_desc_[i].name);
			
 
				+    size_t modelOutputSize = aclmdlGetOutputSizeByIndex(modelDesc_, i);
			
 
				+    if (modelOutputSize != (*outputs)[i].Nbytes()) {
			
 
				+      FDERROR << "output size is not match, index: " << i
			
 
				+              << ", modelOutputSize:" << modelOutputSize
			
 
				+              << ", (*outputs)[i].Nbytes():" << (*outputs)[i].Nbytes();
			
 
				+      return false;
			
 
				+    }
			
 
				+    aclError aclRet = aclrtMemcpy(
			
 
				+        (*outputs)[i].MutableData(), (*outputs)[i].Nbytes(), outputBuffer[i],
			
 
				+        (*outputs)[i].Nbytes(), ACL_MEMCPY_DEVICE_TO_HOST);
			
 
				+    if (aclRet != ACL_SUCCESS) {
			
 
				+      FDERROR << "memcpy h2d failed. buffer size is " << (*outputs)[i].Nbytes()
			
 
				+              << ", errorCode is " << static_cast<int32_t>(aclRet);
			
 
				+      return false;
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  return true;
			
 
				+}
			
 
				+
			
 
				+bool OmBackend::InitResource() {
			
 
				+  // ACL init
			
 
				+  aclError ret;
			
 
				+  if (aclInitFlag == false) {
			
 
				+    ret = aclInit(NULL);
			
 
				+    if (ret != ACL_SUCCESS) {
			
 
				+      FDERROR << "acl init failed, errorCode = " << static_cast<int32_t>(ret);
			
 
				+      return false;
			
 
				+    }
			
 
				+    aclInitFlag = true;
			
 
				+  }
			
 
				+  // set device
			
 
				+  ret = aclrtSetDevice(deviceId_);
			
 
				+  if (ret != ACL_SUCCESS) {
			
 
				+    FDERROR << "acl set device" << deviceId_
			
 
				+            << " failed, errorCode = " << static_cast<int32_t>(ret);
			
 
				+    return false;
			
 
				+  }
			
 
				+
			
 
				+  // create context (set current)
			
 
				+  ret = aclrtCreateContext(&context_, deviceId_);
			
 
				+  if (ret != ACL_SUCCESS) {
			
 
				+    FDERROR << "acl create context failed, deviceId" << deviceId_
			
 
				+            << ", errorCode = " << static_cast<int32_t>(ret);
			
 
				+    return false;
			
 
				+  }
			
 
				+
			
 
				+  // create stream
			
 
				+  ret = aclrtCreateStream(&stream_);
			
 
				+  if (ret != ACL_SUCCESS) {
			
 
				+    FDERROR << "acl create stream failed, deviceId" << deviceId_
			
 
				+            << ", errorCode = " << static_cast<int32_t>(ret);
			
 
				+    return false;
			
 
				+  }
			
 
				+
			
 
				+  // get run mode
			
 
				+  // runMode is ACL_HOST which represents app is running in host
			
 
				+  // runMode is ACL_DEVICE which represents app is running in device
			
 
				+  aclrtRunMode runMode;
			
 
				+  ret = aclrtGetRunMode(&runMode);
			
 
				+  if (ret != ACL_SUCCESS) {
			
 
				+    FDERROR << "acl get run mode failed, errorCode = "
			
 
				+            << static_cast<int32_t>(ret);
			
 
				+    return false;
			
 
				+  }
			
 
				+
			
 
				+  return true;
			
 
				+}
			
 
				+
			
 
				+bool OmBackend::LoadModel(const char *modelPath) {
			
 
				+  if (loadFlag_) {
			
 
				+    FDERROR << "model has already been loaded";
			
 
				+    return false;
			
 
				+  }
			
 
				+  aclError ret = aclmdlQuerySize(modelPath, &modelWorkSize_, &modelWeightSize_);
			
 
				+  if (ret != ACL_SUCCESS) {
			
 
				+    FDERROR << "query model false, model file is" << modelPath
			
 
				+            << ", errorCode is " << static_cast<int32_t>(ret);
			
 
				+    return false;
			
 
				+  }
			
 
				+  // using ACL_MEM_MALLOC_HUGE_FIRST to malloc memory, huge memory is preferred
			
 
				+  // to use and huge memory can improve performance.
			
 
				+  ret = aclrtMalloc(&modelWorkPtr_, modelWorkSize_, ACL_MEM_MALLOC_HUGE_FIRST);
			
 
				+  if (ret != ACL_SUCCESS) {
			
 
				+    FDERROR << "malloc buffer for work failed, require size is "
			
 
				+            << modelWorkSize_ << ", errorCode is " << static_cast<int32_t>(ret);
			
 
				+    return false;
			
 
				+  }
			
 
				+
			
 
				+  // using ACL_MEM_MALLOC_HUGE_FIRST to malloc memory, huge memory is preferred
			
 
				+  // to use and huge memory can improve performance.
			
 
				+  ret = aclrtMalloc(&modelWeightPtr_, modelWeightSize_,
			
 
				+                    ACL_MEM_MALLOC_HUGE_FIRST);
			
 
				+  if (ret != ACL_SUCCESS) {
			
 
				+    FDERROR << "malloc buffer for weight failed, require size is "
			
 
				+            << modelWeightSize_ << ", errorCode is "
			
 
				+            << static_cast<int32_t>(ret);
			
 
				+    return false;
			
 
				+  }
			
 
				+
			
 
				+  ret = aclmdlLoadFromFileWithMem(modelPath, &modelId_, modelWorkPtr_,
			
 
				+                                  modelWorkSize_, modelWeightPtr_,
			
 
				+                                  modelWeightSize_);
			
 
				+  if (ret != ACL_SUCCESS) {
			
 
				+    FDERROR << "load model from file failed, model file is " << modelPath
			
 
				+            << ", errorCode is " << static_cast<int32_t>(ret);
			
 
				+    return false;
			
 
				+  }
			
 
				+
			
 
				+  loadFlag_ = true;
			
 
				+  FDINFO << "load model " << modelPath << " success";
			
 
				+  return true;
			
 
				+}
			
 
				+
			
 
				+bool OmBackend::Execute() {
			
 
				+  aclError ret = aclmdlExecute(modelId_, input_, output_);
			
 
				+  if (ret != ACL_SUCCESS) {
			
 
				+    FDERROR << "execute model failed, modelId is " << modelId_
			
 
				+            << ", errorCode is " << static_cast<int32_t>(ret);
			
 
				+    return false;
			
 
				+  }
			
 
				+  return true;
			
 
				+}
			
 
				+
			
 
				+bool OmBackend::CreateModelDesc() {
			
 
				+  modelDesc_ = aclmdlCreateDesc();
			
 
				+  if (modelDesc_ == nullptr) {
			
 
				+    FDERROR << "create model description failed";
			
 
				+    return false;
			
 
				+  }
			
 
				+
			
 
				+  aclError ret = aclmdlGetDesc(modelDesc_, modelId_);
			
 
				+  if (ret != ACL_SUCCESS) {
			
 
				+    FDERROR << "get model description failed, modelId is " << modelId_
			
 
				+            << ", errorCode is " << static_cast<int32_t>(ret);
			
 
				+    return false;
			
 
				+  }
			
 
				+  return true;
			
 
				+}
			
 
				+
			
 
				+bool OmBackend::CreateInput() {
			
 
				+  // om used in this sample has only one input
			
 
				+  if (modelDesc_ == nullptr) {
			
 
				+    FDERROR << "no model description, create input failed";
			
 
				+    return false;
			
 
				+  }
			
 
				+
			
 
				+  // input:aclmdlDataset
			
 
				+  input_ = aclmdlCreateDataset();
			
 
				+  if (input_ == nullptr) {
			
 
				+    FDERROR << "can't create dataset, create input failed";
			
 
				+    return false;
			
 
				+  }
			
 
				+
			
 
				+  // get input nums
			
 
				+  size_t inputNum = aclmdlGetNumInputs(modelDesc_);
			
 
				+  inputs_desc_.resize(inputNum);
			
 
				+  inputBuffer.resize(inputNum, nullptr);
			
 
				+  // inputBuffer = {nullptr};
			
 
				+  for (size_t i = 0; i < inputNum; ++i) {
			
 
				+    // get input size
			
 
				+    size_t modelInputSize = aclmdlGetInputSizeByIndex(modelDesc_, i);
			
 
				+    aclError ret =
			
 
				+        aclrtMalloc(&inputBuffer[i], modelInputSize, ACL_MEM_MALLOC_HUGE_FIRST);
			
 
				+    if (ret != ACL_SUCCESS) {
			
 
				+      FDERROR << "can't malloc buffer, size is " << modelInputSize
			
 
				+              << ", errorCode is " << static_cast<int32_t>(ret);
			
 
				+      return false;
			
 
				+    }
			
 
				+    // inputData:aclDataBuffer
			
 
				+    aclDataBuffer *inputData =
			
 
				+        aclCreateDataBuffer(inputBuffer[i], modelInputSize);
			
 
				+    if (inputData == nullptr) {
			
 
				+      FDERROR << "can't create data buffer, create input failed";
			
 
				+      return false;
			
 
				+    }
			
 
				+
			
 
				+    // add aclDataBuffer to input
			
 
				+    ret = aclmdlAddDatasetBuffer(input_, inputData);
			
 
				+    if (ret != ACL_SUCCESS) {
			
 
				+      FDERROR << "add input dataset buffer failed, errorCode is "
			
 
				+              << static_cast<int32_t>(ret);
			
 
				+      (void)aclDestroyDataBuffer(inputData);
			
 
				+      inputData = nullptr;
			
 
				+      return false;
			
 
				+    }
			
 
				+
			
 
				+    // get name/shape/dtype of input to build inputs_desc_
			
 
				+    const char *name;
			
 
				+    name = aclmdlGetInputNameByIndex(modelDesc_, i);
			
 
				+    std::string temp_name = name;
			
 
				+
			
 
				+    std::vector<int> temp_shape{};
			
 
				+    aclmdlIODims dims;
			
 
				+    ret = aclmdlGetInputDims(modelDesc_, i, &dims);
			
 
				+    if (ret != ACL_SUCCESS) {
			
 
				+      FDERROR << "get input tensor dims fail! ret=" << ret << std::endl;
			
 
				+      return false;
			
 
				+    }
			
 
				+    int n_dims = (int)dims.dimCount;
			
 
				+    temp_shape.resize(n_dims);
			
 
				+    for (int j = 0; j < n_dims; j++) {
			
 
				+      temp_shape[j] = (int)dims.dims[j];
			
 
				+    }
			
 
				+
			
 
				+    aclDataType dtype = aclmdlGetInputDataType(modelDesc_, i);
			
 
				+    FDDataType temp_dtype;
			
 
				+    switch (dtype) {
			
 
				+    case ACL_BOOL:
			
 
				+      temp_dtype = FDDataType::BOOL;
			
 
				+      break;
			
 
				+    case ACL_UINT8:
			
 
				+      temp_dtype = FDDataType::UINT8;
			
 
				+      break;
			
 
				+    case ACL_INT8:
			
 
				+      temp_dtype = FDDataType::INT8;
			
 
				+      break;
			
 
				+    case ACL_INT16:
			
 
				+      temp_dtype = FDDataType::INT16;
			
 
				+      break;
			
 
				+    case ACL_INT32:
			
 
				+      temp_dtype = FDDataType::INT32;
			
 
				+      break;
			
 
				+    case ACL_INT64:
			
 
				+      temp_dtype = FDDataType::INT64;
			
 
				+      break;
			
 
				+    case ACL_FLOAT16:
			
 
				+      temp_dtype = FDDataType::FP16;
			
 
				+      break;
			
 
				+    case ACL_FLOAT:
			
 
				+      temp_dtype = FDDataType::FP32;
			
 
				+      break;
			
 
				+    case ACL_DOUBLE:
			
 
				+      temp_dtype = FDDataType::FP64;
			
 
				+      break;
			
 
				+    default:
			
 
				+      FDERROR << "unsupported input tensor dtype: " << (int)dtype;
			
 
				+      return false;
			
 
				+    }
			
 
				+    TensorInfo temp_input_info = {temp_name, temp_shape, temp_dtype};
			
 
				+    inputs_desc_[i] = temp_input_info;
			
 
				+  }
			
 
				+  return true;
			
 
				+}
			
 
				+
			
 
				+bool OmBackend::CreateOutput() {
			
 
				+  if (modelDesc_ == nullptr) {
			
 
				+    FDERROR << "no model description, create ouput failed";
			
 
				+    return false;
			
 
				+  }
			
 
				+
			
 
				+  output_ = aclmdlCreateDataset();
			
 
				+  if (output_ == nullptr) {
			
 
				+    FDERROR << "can't create dataset, create output failed";
			
 
				+    return false;
			
 
				+  }
			
 
				+
			
 
				+  size_t outputSize = aclmdlGetNumOutputs(modelDesc_);
			
 
				+  outputs_desc_.resize(outputSize);
			
 
				+  outputBuffer.resize(outputSize, nullptr);
			
 
				+  for (size_t i = 0; i < outputSize; ++i) {
			
 
				+    size_t modelOutputSize = aclmdlGetOutputSizeByIndex(modelDesc_, i);
			
 
				+    aclError ret = aclrtMalloc(&outputBuffer[i], modelOutputSize,
			
 
				+                               ACL_MEM_MALLOC_HUGE_FIRST);
			
 
				+    if (ret != ACL_SUCCESS) {
			
 
				+      FDERROR << "can't malloc buffer, size is " << modelOutputSize
			
 
				+              << ", errorCode is " << static_cast<int32_t>(ret);
			
 
				+      return false;
			
 
				+    }
			
 
				+
			
 
				+    aclDataBuffer *outputData =
			
 
				+        aclCreateDataBuffer(outputBuffer[i], modelOutputSize);
			
 
				+    if (outputData == nullptr) {
			
 
				+      FDERROR << "can't create data buffer, create output failed";
			
 
				+      return false;
			
 
				+    }
			
 
				+
			
 
				+    ret = aclmdlAddDatasetBuffer(output_, outputData);
			
 
				+    if (ret != ACL_SUCCESS) {
			
 
				+      FDERROR << "add output dataset buffer failed, errorCode is "
			
 
				+              << static_cast<int32_t>(ret);
			
 
				+      (void)aclDestroyDataBuffer(outputData);
			
 
				+      return false;
			
 
				+    }
			
 
				+
			
 
				+    const char *name;
			
 
				+    name = aclmdlGetOutputNameByIndex(modelDesc_, i);
			
 
				+    std::string temp_name = name;
			
 
				+
			
 
				+    std::vector<int> temp_shape{};
			
 
				+    aclmdlIODims dims;
			
 
				+    ret = aclmdlGetOutputDims(modelDesc_, i, &dims);
			
 
				+    if (ret != ACL_SUCCESS) {
			
 
				+      FDERROR << "get output tensor dims fail! ret=" << ret << std::endl;
			
 
				+      return false;
			
 
				+    }
			
 
				+    int n_dims = (int)dims.dimCount;
			
 
				+    temp_shape.resize(n_dims);
			
 
				+    for (int j = 0; j < n_dims; j++) {
			
 
				+      temp_shape[j] = (int)dims.dims[j];
			
 
				+    }
			
 
				+
			
 
				+    aclDataType dtype = aclmdlGetOutputDataType(modelDesc_, i);
			
 
				+    FDDataType temp_dtype;
			
 
				+    switch (dtype) {
			
 
				+    case ACL_BOOL:
			
 
				+      temp_dtype = FDDataType::BOOL;
			
 
				+      break;
			
 
				+    case ACL_UINT8:
			
 
				+      temp_dtype = FDDataType::UINT8;
			
 
				+      break;
			
 
				+    case ACL_INT8:
			
 
				+      temp_dtype = FDDataType::INT8;
			
 
				+      break;
			
 
				+    case ACL_INT16:
			
 
				+      temp_dtype = FDDataType::INT16;
			
 
				+      break;
			
 
				+    case ACL_INT32:
			
 
				+      temp_dtype = FDDataType::INT32;
			
 
				+      break;
			
 
				+    case ACL_INT64:
			
 
				+      temp_dtype = FDDataType::INT64;
			
 
				+      break;
			
 
				+    case ACL_FLOAT16:
			
 
				+      temp_dtype = FDDataType::FP16;
			
 
				+      break;
			
 
				+    case ACL_FLOAT:
			
 
				+      temp_dtype = FDDataType::FP32;
			
 
				+      break;
			
 
				+    case ACL_DOUBLE:
			
 
				+      temp_dtype = FDDataType::FP64;
			
 
				+      break;
			
 
				+    default:
			
 
				+      FDERROR << "unsupported output tensor dtype: " << (int)dtype;
			
 
				+      return false;
			
 
				+    }
			
 
				+    TensorInfo temp_output_info = {temp_name, temp_shape, temp_dtype};
			
 
				+    outputs_desc_[i] = temp_output_info;
			
 
				+  }
			
 
				+  return true;
			
 
				+}
			
 
				+
			
 
				+void OmBackend::FreeInputBuffer() {
			
 
				+  for (int i = 0; i < (int)inputs_desc_.size(); ++i) {
			
 
				+    if (inputBuffer[i] != nullptr) {
			
 
				+      (void)aclrtFree(inputBuffer[i]);
			
 
				+      inputBuffer[i] = nullptr;
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+void OmBackend::FreeOutputBuffer() {
			
 
				+  for (int i = 0; i < (int)outputs_desc_.size(); ++i) {
			
 
				+    if (outputBuffer[i] != nullptr) {
			
 
				+      (void)aclrtFree(outputBuffer[i]);
			
 
				+      outputBuffer[i] = nullptr;
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+void OmBackend::DestroyInput() {
			
 
				+  if (input_ == nullptr) {
			
 
				+    return;
			
 
				+  }
			
 
				+
			
 
				+  for (size_t i = 0; i < aclmdlGetDatasetNumBuffers(input_); ++i) {
			
 
				+    aclDataBuffer *dataBuffer = aclmdlGetDatasetBuffer(input_, i);
			
 
				+    (void)aclDestroyDataBuffer(dataBuffer);
			
 
				+  }
			
 
				+  (void)aclmdlDestroyDataset(input_);
			
 
				+  input_ = nullptr;
			
 
				+}
			
 
				+
			
 
				+void OmBackend::DestroyOutput() {
			
 
				+  if (output_ == nullptr) {
			
 
				+    return;
			
 
				+  }
			
 
				+
			
 
				+  for (size_t i = 0; i < aclmdlGetDatasetNumBuffers(output_); ++i) {
			
 
				+    aclDataBuffer *dataBuffer = aclmdlGetDatasetBuffer(output_, i);
			
 
				+    void *data = aclGetDataBufferAddr(dataBuffer);
			
 
				+    (void)aclrtFree(data);
			
 
				+    (void)aclDestroyDataBuffer(dataBuffer);
			
 
				+  }
			
 
				+
			
 
				+  (void)aclmdlDestroyDataset(output_);
			
 
				+  output_ = nullptr;
			
 
				+}
			
 
				+
			
 
				+void OmBackend::DestroyResource() {
			
 
				+  // set context
			
 
				+  aclError ret = aclrtSetCurrentContext(context_);
			
 
				+  if (ret != ACL_SUCCESS) {
			
 
				+    FDERROR << "aclrtSetCurrentContext failed"
			
 
				+            << ", errorCode is " << static_cast<int32_t>(ret);
			
 
				+    return;
			
 
				+  }
			
 
				+  if (stream_ != nullptr) {
			
 
				+    ret = aclrtDestroyStream(stream_);
			
 
				+    if (ret != ACL_SUCCESS) {
			
 
				+      FDERROR << "destroy stream failed, errorCode = "
			
 
				+              << static_cast<int32_t>(ret);
			
 
				+    }
			
 
				+    stream_ = nullptr;
			
 
				+  }
			
 
				+
			
 
				+  if (context_ != nullptr) {
			
 
				+    ret = aclrtDestroyContext(context_);
			
 
				+    if (ret != ACL_SUCCESS) {
			
 
				+      FDERROR << "destroy context failed, errorCode = "
			
 
				+              << static_cast<int32_t>(ret);
			
 
				+    }
			
 
				+    context_ = nullptr;
			
 
				+  }
			
 
				+
			
 
				+  ret = aclrtResetDevice(deviceId_);
			
 
				+  if (ret != ACL_SUCCESS) {
			
 
				+    FDERROR << "reset device " << deviceId_
			
 
				+            << " failed, errorCode = " << static_cast<int32_t>(ret);
			
 
				+  }
			
 
				+
			
 
				+  if (aclInitFlag == true) {
			
 
				+    ret = aclFinalize();
			
 
				+    if (ret != ACL_SUCCESS) {
			
 
				+      FDERROR << "finalize acl failed, errorCode = "
			
 
				+              << static_cast<int32_t>(ret);
			
 
				+    }
			
 
				+    aclInitFlag = false;
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+} // namespace ultra_infer
			
--- a/libs/ultra-infer/ultra_infer/runtime/backends/om/om_backend.h
+++ b/libs/ultra-infer/ultra_infer/runtime/backends/om/om_backend.h
@@ -0,0 +1,82 @@
 
				+// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
			
 
				+//
			
 
				+// Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+// you may not use this file except in compliance with the License.
			
 
				+// You may obtain a copy of the License at
			
 
				+//
			
 
				+//     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+//
			
 
				+// Unless required by applicable law or agreed to in writing, software
			
 
				+// distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+// See the License for the specific language governing permissions and
			
 
				+// limitations under the License.
			
 
				+
			
 
				+#pragma once
			
 
				+
			
 
				+#include <cstring>
			
 
				+#include <iostream>
			
 
				+#include <memory>
			
 
				+#include <string>
			
 
				+#include <vector>
			
 
				+
			
 
				+#include "acl/acl.h"
			
 
				+#include "ultra_infer/core/fd_tensor.h"
			
 
				+#include "ultra_infer/runtime/backends/backend.h"
			
 
				+
			
 
				+namespace ultra_infer {
			
 
				+class OmBackend : public BaseBackend {
			
 
				+public:
			
 
				+  OmBackend() = default;
			
 
				+  virtual ~OmBackend();
			
 
				+
			
 
				+  // OM Backend implementation.
			
 
				+  bool Init(const RuntimeOption &runtime_option) override;
			
 
				+
			
 
				+  int NumInputs() const override {
			
 
				+    return static_cast<int>(inputs_desc_.size());
			
 
				+  }
			
 
				+  int NumOutputs() const override {
			
 
				+    return static_cast<int>(outputs_desc_.size());
			
 
				+  }
			
 
				+
			
 
				+  TensorInfo GetInputInfo(int index) override;
			
 
				+  TensorInfo GetOutputInfo(int index) override;
			
 
				+  std::vector<TensorInfo> GetInputInfos() override;
			
 
				+  std::vector<TensorInfo> GetOutputInfos() override;
			
 
				+
			
 
				+  bool Infer(std::vector<FDTensor> &inputs, std::vector<FDTensor> *outputs,
			
 
				+             bool copy_to_fd = true) override;
			
 
				+  static bool aclInitFlag;
			
 
				+
			
 
				+private:
			
 
				+  std::vector<TensorInfo> inputs_desc_;
			
 
				+  std::vector<TensorInfo> outputs_desc_;
			
 
				+  std::vector<void *> inputBuffer;
			
 
				+  std::vector<void *> outputBuffer;
			
 
				+  bool loadFlag_ = false; // model load flag
			
 
				+  int32_t deviceId_;
			
 
				+  uint32_t modelId_;
			
 
				+  size_t modelWorkSize_;   // model work memory buffer size
			
 
				+  size_t modelWeightSize_; // model weight memory buffer size
			
 
				+  void *modelWorkPtr_;     // model work memory buffer
			
 
				+  void *modelWeightPtr_;   // model weight memory buffer
			
 
				+  aclmdlDesc *modelDesc_;
			
 
				+  aclmdlDataset *input_;
			
 
				+  aclmdlDataset *output_;
			
 
				+  aclrtContext context_;
			
 
				+  aclrtStream stream_;
			
 
				+
			
 
				+  bool LoadModel(const char *modelPath);
			
 
				+  bool Execute();
			
 
				+  bool CreateInput();
			
 
				+  void DestroyInput();
			
 
				+  bool CreateOutput();
			
 
				+  void DestroyOutput();
			
 
				+  void DestroyResource();
			
 
				+  bool CreateModelDesc();
			
 
				+  void FreeInputBuffer();
			
 
				+  void FreeOutputBuffer();
			
 
				+  bool InitResource();
			
 
				+};
			
 
				+} // namespace ultra_infer
			
--- a/libs/ultra-infer/ultra_infer/runtime/enum_variables.cc
+++ b/libs/ultra-infer/ultra_infer/runtime/enum_variables.cc
@@ -36,6 +36,8 @@ std::ostream &operator<<(std::ostream &out, const Backend &backend) {
 
				     out << "Backend::HORIZONNPU";
			
 
				   } else if (backend == Backend::TVM) {
			
 
				     out << "Backend::TVM";
			
 
				+  } else if (backend == Backend::OMONNPU) {
			
 
				+    out << "Backend::OMONNPU";
			
 
				   } else {
			
 
				     out << "UNKNOWN-Backend";
			
 
				   }
			
@@ -92,6 +94,8 @@ std::ostream &operator<<(std::ostream &out, const ModelFormat &format) {
 
				     out << "ModelFormat::HORIZON";
			
 
				   } else if (format == ModelFormat::TVMFormat) {
			
 
				     out << "ModelFormat::TVMFormat";
			
 
				+  } else if (format == ModelFormat::OM) {
			
 
				+    out << "ModelFormat::OM";
			
 
				   } else {
			
 
				     out << "UNKNOWN-ModelFormat";
			
 
				   }
			
@@ -130,6 +134,9 @@ std::vector<Backend> GetAvailableBackends() {
 
				 #ifdef ENABLE_TVM_BACKEND
			
 
				   backends.push_back(Backend::TVM);
			
 
				 #endif
			
 
				+#ifdef ENABLE_OM_BACKEND
			
 
				+  backends.push_back(Backend::OMONNPU);
			
 
				+#endif
			
 
				   return backends;
			
 
				 }
			
 
				 
			
--- a/libs/ultra-infer/ultra_infer/runtime/enum_variables.h
+++ b/libs/ultra-infer/ultra_infer/runtime/enum_variables.h
@@ -39,6 +39,7 @@ enum Backend {
 
				   SOPHGOTPU,  ///< SOPHGOTPU, support SOPHGO format model, Sophgo TPU only
			
 
				   HORIZONNPU, ///< HORIZONNPU, support Horizon format model, Horizon NPU
			
 
				   TVM,        ///< TVMBackend, support TVM format model, CPU / Nvidia GPU
			
 
				+  OMONNPU,    ///< OMONNPU, support OM format model, OM NPU
			
 
				 };
			
 
				 
			
 
				 /**
			
@@ -74,6 +75,7 @@ enum ModelFormat {
 
				   SOPHGO,      ///< Model with SOPHGO format
			
 
				   HORIZON,     ///< Model with HORIZON format
			
 
				   TVMFormat,   ///< Model with TVM format
			
 
				+  OM,          ///< Model with OM format
			
 
				 };
			
 
				 
			
 
				 /// Describle all the supported backends for specified model format
			
@@ -87,7 +89,8 @@ static std::map<ModelFormat, std::vector<Backend>>
 
				         {ModelFormat::HORIZON, {Backend::HORIZONNPU}},
			
 
				         {ModelFormat::TORCHSCRIPT, {Backend::POROS}},
			
 
				         {ModelFormat::SOPHGO, {Backend::SOPHGOTPU}},
			
 
				-        {ModelFormat::TVMFormat, {Backend::TVM}}};
			
 
				+        {ModelFormat::TVMFormat, {Backend::TVM}},
			
 
				+        {ModelFormat::OM, {Backend::OMONNPU}}};
			
 
				 
			
 
				 /// Describle all the supported backends for specified device
			
 
				 static std::map<Device, std::vector<Backend>> s_default_backends_by_device = {
			
@@ -104,7 +107,8 @@ static std::map<Device, std::vector<Backend>> s_default_backends_by_device = {
 
				     {Device::KUNLUNXIN, {Backend::LITE, Backend::PDINFER}},
			
 
				     {Device::ASCEND, {Backend::LITE}},
			
 
				     {Device::SOPHGOTPUD, {Backend::SOPHGOTPU}},
			
 
				-    {Device::DIRECTML, {Backend::ORT}}};
			
 
				+    {Device::DIRECTML, {Backend::ORT}},
			
 
				+    {Device::ASCEND, {Backend::OMONNPU}}};
			
 
				 
			
 
				 inline bool Supported(ModelFormat format, Backend backend) {
			
 
				   auto iter = s_default_backends_by_format.find(format);
			
--- a/libs/ultra-infer/ultra_infer/runtime/option_pybind.cc
+++ b/libs/ultra-infer/ultra_infer/runtime/option_pybind.cc
@@ -65,6 +65,7 @@ void BindOption(pybind11::module &m) {
 
				       .def("use_trt_backend", &RuntimeOption::UseTrtBackend)
			
 
				       .def("use_openvino_backend", &RuntimeOption::UseOpenVINOBackend)
			
 
				       .def("use_lite_backend", &RuntimeOption::UseLiteBackend)
			
 
				+      .def("use_om_backend", &RuntimeOption::UseOMBackend)
			
 
				       .def("enable_pinned_memory", &RuntimeOption::EnablePinnedMemory)
			
 
				       .def("disable_pinned_memory", &RuntimeOption::DisablePinnedMemory)
			
 
				       .def("use_ipu", &RuntimeOption::UseIpu)
			
--- a/libs/ultra-infer/ultra_infer/runtime/runtime.cc
+++ b/libs/ultra-infer/ultra_infer/runtime/runtime.cc
@@ -63,6 +63,10 @@
 
				 #include "ultra_infer/runtime/backends/tvm/tvm_backend.h"
			
 
				 #endif
			
 
				 
			
 
				+#ifdef ENABLE_OM_BACKEND
			
 
				+#include "ultra_infer/runtime/backends/om/om_backend.h"
			
 
				+#endif
			
 
				+
			
 
				 namespace ultra_infer {
			
 
				 
			
 
				 bool AutoSelectBackend(RuntimeOption &option) {
			
@@ -147,6 +151,8 @@ bool Runtime::Init(const RuntimeOption &_option) {
 
				     CreateHorizonBackend();
			
 
				   } else if (option.backend == Backend::TVM) {
			
 
				     CreateTVMBackend();
			
 
				+  } else if (option.backend == Backend::OMONNPU) {
			
 
				+    CreateOMBackend();
			
 
				   } else {
			
 
				     std::string msg = Str(GetAvailableBackends());
			
 
				     FDERROR << "The compiled UltraInfer only supports " << msg << ", "
			
@@ -397,6 +403,18 @@ void Runtime::CreatePorosBackend() {
 
				          << "." << std::endl;
			
 
				 }
			
 
				 
			
 
				+void Runtime::CreateOMBackend() {
			
 
				+#ifdef ENABLE_OM_BACKEND
			
 
				+  backend_ = utils::make_unique<OmBackend>();
			
 
				+  FDASSERT(backend_->Init(option), "Failed to initialize om backend.");
			
 
				+#else
			
 
				+  FDASSERT(false, "OMBackend is not available, please compiled with ",
			
 
				+           " ENABLE_OM_BACKEND=ON.");
			
 
				+#endif
			
 
				+  FDINFO << "Runtime initialized with Backend::OMONNPU in " << option.device
			
 
				+         << "." << std::endl;
			
 
				+}
			
 
				+
			
 
				 // only for poros backend
			
 
				 bool Runtime::Compile(std::vector<std::vector<FDTensor>> &prewarm_tensors) {
			
 
				 #ifdef ENABLE_POROS_BACKEND
			
--- a/libs/ultra-infer/ultra_infer/runtime/runtime.h
+++ b/libs/ultra-infer/ultra_infer/runtime/runtime.h
@@ -119,6 +119,7 @@ private:
 
				   void CreateSophgoNPUBackend();
			
 
				   void CreatePorosBackend();
			
 
				   void CreateTVMBackend();
			
 
				+  void CreateOMBackend();
			
 
				   std::unique_ptr<BaseBackend> backend_;
			
 
				   std::vector<FDTensor> input_tensors_;
			
 
				   std::vector<FDTensor> output_tensors_;
			
--- a/libs/ultra-infer/ultra_infer/runtime/runtime_option.cc
+++ b/libs/ultra-infer/ultra_infer/runtime/runtime_option.cc
@@ -234,6 +234,14 @@ void RuntimeOption::UseHorizonNPUBackend() {
 
				 #endif
			
 
				 }
			
 
				 
			
 
				+void RuntimeOption::UseOMBackend() {
			
 
				+#ifdef ENABLE_OM_BACKEND
			
 
				+  backend = Backend::OMONNPU;
			
 
				+#else
			
 
				+  FDASSERT(false, "The FastDeploy didn't compile with npu om");
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				 void RuntimeOption::SetPaddleMKLDNN(bool pd_mkldnn) {
			
 
				   FDWARNING << "`RuntimeOption::SetPaddleMKLDNN` will be removed in v1.2.0, "
			
 
				                "please modify its member variable directly, e.g "
			
--- a/libs/ultra-infer/ultra_infer/runtime/runtime_option.h
+++ b/libs/ultra-infer/ultra_infer/runtime/runtime_option.h
@@ -277,6 +277,7 @@ struct ULTRAINFER_DECL RuntimeOption {
 
				   void UseLiteBackend();
			
 
				   void UseHorizonNPUBackend();
			
 
				   void UseTVMBackend();
			
 
				+  void UseOMBackend();
			
 
				 };
			
 
				 
			
 
				 } // namespace ultra_infer
			
--- a/paddlex/constants.py
+++ b/paddlex/constants.py
@@ -0,0 +1,18 @@
 
				+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from typing import Final
			
 
				+
			
 
				+
			
 
				+MODEL_FILE_PREFIX: Final[str] = "inference"
			
--- a/paddlex/hpip_links.html
+++ b/paddlex/hpip_links.html
@@ -4,16 +4,20 @@
 
				         <title>PaddleX HPIP Links</title>
			
 
				     </head>
			
 
				     <body>
			
 
				-        <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/hpi/ultra_infer/releases/3.0.0rc0/ultra_infer_python-1.0.0.3.0.0rc0-cp38-cp38-linux_x86_64.whl"></a>
			
 
				-        <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/hpi/ultra_infer/releases/3.0.0rc0/ultra_infer_python-1.0.0.3.0.0rc0-cp39-cp39-linux_x86_64.whl"></a>
			
 
				-        <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/hpi/ultra_infer/releases/3.0.0rc0/ultra_infer_python-1.0.0.3.0.0rc0-cp310-cp310-linux_x86_64.whl"></a>
			
 
				-        <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/hpi/ultra_infer/releases/3.0.0rc0/ultra_infer_python-1.0.0.3.0.0rc0-cp311-cp311-linux_x86_64.whl"></a>
			
 
				-        <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/hpi/ultra_infer/releases/3.0.0rc0/ultra_infer_python-1.0.0.3.0.0rc0-cp312-cp312-linux_x86_64.whl"></a>
			
 
				-        <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/hpi/ultra_infer/releases/3.0.0rc0/ultra_infer_gpu_python-1.0.0.3.0.0rc0-cp38-cp38-linux_x86_64.whl"></a>
			
 
				-        <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/hpi/ultra_infer/releases/3.0.0rc0/ultra_infer_gpu_python-1.0.0.3.0.0rc0-cp39-cp39-linux_x86_64.whl"></a>
			
 
				-        <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/hpi/ultra_infer/releases/3.0.0rc0/ultra_infer_gpu_python-1.0.0.3.0.0rc0-cp310-cp310-linux_x86_64.whl"></a>
			
 
				-        <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/hpi/ultra_infer/releases/3.0.0rc0/ultra_infer_gpu_python-1.0.0.3.0.0rc0-cp311-cp311-linux_x86_64.whl"></a>
			
 
				-        <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/hpi/ultra_infer/releases/3.0.0rc0/ultra_infer_gpu_python-1.0.0.3.0.0rc0-cp312-cp312-linux_x86_64.whl"></a>
			
 
				-        <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/hpi/ultra_infer/releases/3.0.0rc0/paddlex_hpi-3.0.0rc0-py3-none-any.whl"></a>
			
 
				+        <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/hpi/ultra_infer/releases/new_hpi/v1.0.0/ultra_infer_python-1.0.0-cp38-cp38-linux_x86_64.whl"></a>
			
 
				+        <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/hpi/ultra_infer/releases/new_hpi/v1.0.0/ultra_infer_python-1.0.0-cp39-cp39-linux_x86_64.whl"></a>
			
 
				+        <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/hpi/ultra_infer/releases/new_hpi/v1.0.0/ultra_infer_python-1.0.0-cp310-cp310-linux_x86_64.whl"></a>
			
 
				+        <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/hpi/ultra_infer/releases/new_hpi/v1.0.0/ultra_infer_python-1.0.0-cp311-cp311-linux_x86_64.whl"></a>
			
 
				+        <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/hpi/ultra_infer/releases/new_hpi/v1.0.0/ultra_infer_python-1.0.0-cp312-cp312-linux_x86_64.whl"></a>
			
 
				+        <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/hpi/ultra_infer/releases/new_hpi/v1.0.0/ultra_infer_gpu_python-1.0.0-cp38-cp38-linux_x86_64.whl"></a>
			
 
				+        <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/hpi/ultra_infer/releases/new_hpi/v1.0.0/ultra_infer_gpu_python-1.0.0-cp39-cp39-linux_x86_64.whl"></a>
			
 
				+        <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/hpi/ultra_infer/releases/new_hpi/v1.0.0/ultra_infer_gpu_python-1.0.0-cp310-cp310-linux_x86_64.whl"></a>
			
 
				+        <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/hpi/ultra_infer/releases/new_hpi/v1.0.0/ultra_infer_gpu_python-1.0.0-cp311-cp311-linux_x86_64.whl"></a>
			
 
				+        <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/hpi/ultra_infer/releases/new_hpi/v1.0.0/ultra_infer_gpu_python-1.0.0-cp312-cp312-linux_x86_64.whl"></a>
			
 
				+        <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/hpi/ultra_infer/releases/new_hpi/v1.0.0/ultra_infer_npu_python-1.0.0-cp38-cp38-linux_x86_64.whl"></a>
			
 
				+        <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/hpi/ultra_infer/releases/new_hpi/v1.0.0/ultra_infer_npu_python-1.0.0-cp39-cp39-linux_x86_64.whl"></a>
			
 
				+        <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/hpi/ultra_infer/releases/new_hpi/v1.0.0/ultra_infer_npu_python-1.0.0-cp310-cp310-linux_x86_64.whl"></a>
			
 
				+        <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/hpi/ultra_infer/releases/new_hpi/v1.0.0/ultra_infer_npu_python-1.0.0-cp311-cp311-linux_x86_64.whl"></a>
			
 
				+        <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/hpi/ultra_infer/releases/new_hpi/v1.0.0/ultra_infer_npu_python-1.0.0-cp312-cp312-linux_x86_64.whl"></a>
			
 
				     </body>
			
 
				 </html>
			
--- a/paddlex/inference/__init__.py
+++ b/paddlex/inference/__init__.py
@@ -16,4 +16,5 @@ from ..utils import logging
 
				 
			
 
				 from .pipelines import create_pipeline, load_pipeline_config
			
 
				 from .models import create_predictor
			
 
				+from .utils.hpi import HPIConfig
			
 
				 from .utils.pp_option import PaddlePredictorOption
			
--- a/paddlex/inference/models/3d_bev_detection/predictor.py
+++ b/paddlex/inference/models/3d_bev_detection/predictor.py
@@ -27,7 +27,7 @@ MODELS = getattr(module_3d_model_list, "MODELS")
 
				 from ...common.batch_sampler import Det3DBatchSampler
			
 
				 from ...common.reader import ReadNuscenesData
			
 
				 from ..common import StaticInfer
			
 
				-from ..base import BasicPredictor
			
 
				+from ..base import BasePredictor
			
 
				 from ..base.predictor.base_predictor import PredictionWrap
			
 
				 from .processors import (
			
 
				     LoadPointsFromFile,
			
@@ -42,8 +42,8 @@ from .processors import (
 
				 from .result import BEV3DDetResult
			
 
				 
			
 
				 
			
 
				-class BEVDet3DPredictor(BasicPredictor):
			
 
				-    """BEVDet3DPredictor that inherits from BasicPredictor."""
			
 
				+class BEVDet3DPredictor(BasePredictor):
			
 
				+    """BEVDet3DPredictor that inherits from BasePredictor."""
			
 
				 
			
 
				     entities = MODELS
			
 
				 
			
@@ -105,11 +105,7 @@ class BEVDet3DPredictor(BasicPredictor):
 
				                 pre_tfs[name] = op
			
 
				         pre_tfs["GetInferInput"] = GetInferInput()
			
 
				 
			
 
				-        infer = StaticInfer(
			
 
				-            model_dir=self.model_dir,
			
 
				-            model_prefix=self.MODEL_FILE_PREFIX,
			
 
				-            option=self.pp_option,
			
 
				-        )
			
 
				+        infer = self.create_static_infer()
			
 
				 
			
 
				         return pre_tfs, infer
			
 
				 
			
--- a/paddlex/inference/models/__init__.py
+++ b/paddlex/inference/models/__init__.py
@@ -14,11 +14,12 @@
 
				 
			
 
				 
			
 
				 from pathlib import Path
			
 
				-from typing import Any, Dict, Optional
			
 
				+from typing import Any, Dict, Optional, Union
			
 
				 from importlib import import_module
			
 
				 from ...utils import errors
			
 
				+from ..utils.hpi import HPIConfig
			
 
				 from ..utils.official_models import official_models
			
 
				-from .base import BasePredictor, BasicPredictor
			
 
				+from .base import BasePredictor
			
 
				 
			
 
				 from .image_classification import ClasPredictor
			
 
				 from .object_detection import DetPredictor
			
@@ -53,38 +54,13 @@ module_3d_bev_detection = import_module(".3d_bev_detection", "paddlex.inference.
 
				 BEVDet3DPredictor = getattr(module_3d_bev_detection, "BEVDet3DPredictor")
			
 
				 
			
 
				 
			
 
				-def _create_hp_predictor(
			
 
				-    model_name, model_dir, device, config, hpi_params, *args, **kwargs
			
 
				-):
			
 
				-    try:
			
 
				-        from paddlex_hpi.models import HPPredictor
			
 
				-    except ModuleNotFoundError:
			
 
				-        raise RuntimeError(
			
 
				-            "The PaddleX HPI plugin is not properly installed, and the high-performance model inference features are not available."
			
 
				-        ) from None
			
 
				-    try:
			
 
				-        predictor = HPPredictor.get(model_name)(
			
 
				-            model_dir=model_dir,
			
 
				-            config=config,
			
 
				-            device=device,
			
 
				-            *args,
			
 
				-            hpi_params=hpi_params,
			
 
				-            **kwargs,
			
 
				-        )
			
 
				-    except errors.others.ClassNotFoundException:
			
 
				-        raise ValueError(
			
 
				-            f"{model_name} is not supported by the PaddleX HPI plugin."
			
 
				-        ) from None
			
 
				-    return predictor
			
 
				-
			
 
				-
			
 
				 def create_predictor(
			
 
				     model_name: str,
			
 
				     model_dir: Optional[str] = None,
			
 
				     device=None,
			
 
				     pp_option=None,
			
 
				     use_hpip: bool = False,
			
 
				-    hpi_params: Optional[Dict[str, Any]] = None,
			
 
				+    hpi_config: Optional[Union[Dict[str, Any], HPIConfig]] = None,
			
 
				     *args,
			
 
				     **kwargs,
			
 
				 ) -> BasePredictor:
			
@@ -97,26 +73,16 @@ def create_predictor(
 
				     assert (
			
 
				         model_name == config["Global"]["model_name"]
			
 
				     ), f"Model name mismatch，please input the correct model dir."
			
 
				-
			
 
				-    if use_hpip:
			
 
				-        return _create_hp_predictor(
			
 
				-            model_name=model_name,
			
 
				-            model_dir=model_dir,
			
 
				-            config=config,
			
 
				-            hpi_params=hpi_params,
			
 
				-            device=device,
			
 
				-            *args,
			
 
				-            **kwargs,
			
 
				-        )
			
 
				-    else:
			
 
				-        return BasicPredictor.get(model_name)(
			
 
				-            model_dir=model_dir,
			
 
				-            config=config,
			
 
				-            device=device,
			
 
				-            pp_option=pp_option,
			
 
				-            *args,
			
 
				-            **kwargs,
			
 
				-        )
			
 
				+    return BasePredictor.get(model_name)(
			
 
				+        model_dir=model_dir,
			
 
				+        config=config,
			
 
				+        device=device,
			
 
				+        pp_option=pp_option,
			
 
				+        use_hpip=use_hpip,
			
 
				+        hpi_config=hpi_config,
			
 
				+        *args,
			
 
				+        **kwargs,
			
 
				+    )
			
 
				 
			
 
				 
			
 
				 def check_model(model):
			
--- a/paddlex/inference/models/anomaly_detection/predictor.py
+++ b/paddlex/inference/models/anomaly_detection/predictor.py
@@ -28,12 +28,12 @@ from ..common import (
 
				     StaticInfer,
			
 
				 )
			
 
				 from .processors import MapToMask
			
 
				-from ..base import BasicPredictor
			
 
				+from ..base import BasePredictor
			
 
				 from .result import UadResult
			
 
				 
			
 
				 
			
 
				-class UadPredictor(BasicPredictor):
			
 
				-    """UadPredictor that inherits from BasicPredictor."""
			
 
				+class UadPredictor(BasePredictor):
			
 
				+    """UadPredictor that inherits from BasePredictor."""
			
 
				 
			
 
				     entities = MODELS
			
 
				 
			
@@ -82,11 +82,7 @@ class UadPredictor(BasicPredictor):
 
				             preprocessors[name] = op
			
 
				         preprocessors["ToBatch"] = ToBatch()
			
 
				 
			
 
				-        infer = StaticInfer(
			
 
				-            model_dir=self.model_dir,
			
 
				-            model_prefix=self.MODEL_FILE_PREFIX,
			
 
				-            option=self.pp_option,
			
 
				-        )
			
 
				+        infer = self.create_static_infer()
			
 
				         postprocessors = {"Map_to_mask": MapToMask()}
			
 
				         return preprocessors, infer, postprocessors
			
 
				 
			
--- a/paddlex/inference/models/base/__init__.py
+++ b/paddlex/inference/models/base/__init__.py
@@ -12,4 +12,4 @@
 
				 # See the License for the specific language governing permissions and
			
 
				 # limitations under the License.
			
 
				 
			
 
				-from .predictor import BasePredictor, BasicPredictor
			
 
				+from .predictor import BasePredictor
			
--- a/paddlex/inference/models/base/predictor/__init__.py
+++ b/paddlex/inference/models/base/predictor/__init__.py
@@ -13,4 +13,3 @@
 
				 # limitations under the License.
			
 
				 
			
 
				 from .base_predictor import BasePredictor
			
 
				-from .basic_predictor import BasicPredictor
			
--- a/paddlex/inference/models/base/predictor/base_predictor.py
+++ b/paddlex/inference/models/base/predictor/base_predictor.py
@@ -12,13 +12,28 @@
 
				 # See the License for the specific language governing permissions and
			
 
				 # limitations under the License.
			
 
				 
			
 
				-from typing import List, Dict, Any, Iterator
			
 
				+from abc import ABC, abstractmethod
			
 
				+from copy import deepcopy
			
 
				 from pathlib import Path
			
 
				-from abc import abstractmethod, ABC
			
 
				-
			
 
				-from .....utils.flags import INFER_BENCHMARK
			
 
				-from ....utils.io import YAMLReader
			
 
				+from typing import Any, List, Dict, Iterator, Optional, Union
			
 
				+
			
 
				+from pydantic import ValidationError
			
 
				+
			
 
				+from ..... import constants
			
 
				+from .....utils import logging
			
 
				+from .....utils.device import get_default_device, parse_device
			
 
				+from .....utils.flags import (
			
 
				+    INFER_BENCHMARK,
			
 
				+    INFER_BENCHMARK_WARMUP,
			
 
				+    INFER_BENCHMARK_ITERS,
			
 
				+)
			
 
				+from .....utils.subclass_register import AutoRegisterABCMetaClass
			
 
				 from ....common.batch_sampler import BaseBatchSampler
			
 
				+from ....utils.benchmark import benchmark, ENTRY_POINT_NAME
			
 
				+from ....utils.hpi import HPIInfo, HPIConfig
			
 
				+from ....utils.io import YAMLReader
			
 
				+from ....utils.pp_option import PaddlePredictorOption
			
 
				+from ...common import HPInfer, PaddleInfer
			
 
				 
			
 
				 
			
 
				 class PredictionWrap:
			
@@ -52,19 +67,45 @@ class PredictionWrap:
 
				         return {key: self._data[key][idx] for key in self._keys}
			
 
				 
			
 
				 
			
 
				-class BasePredictor(ABC):
			
 
				-    """BasePredictor."""
			
 
				-
			
 
				-    MODEL_FILE_PREFIX = "inference"
			
 
				-
			
 
				-    def __init__(self, model_dir: str, config: Dict = None) -> None:
			
 
				+class BasePredictor(
			
 
				+    ABC,
			
 
				+    metaclass=AutoRegisterABCMetaClass,
			
 
				+):
			
 
				+    MODEL_FILE_PREFIX = constants.MODEL_FILE_PREFIX
			
 
				+
			
 
				+    __is_base = True
			
 
				+
			
 
				+    def __init__(
			
 
				+        self,
			
 
				+        model_dir: str,
			
 
				+        config: Optional[Dict[str, Any]] = None,
			
 
				+        *,
			
 
				+        device: Optional[str] = None,
			
 
				+        batch_size: int = 1,
			
 
				+        pp_option: Optional[PaddlePredictorOption] = None,
			
 
				+        use_hpip: bool = False,
			
 
				+        hpi_config: Optional[Union[Dict[str, Any], HPIConfig]] = None,
			
 
				+    ) -> None:
			
 
				         """Initializes the BasePredictor.
			
 
				 
			
 
				         Args:
			
 
				-            model_dir (str): The directory where the static model files is stored.
			
 
				-            config (dict, optional): The configuration of model to infer. Defaults to None.
			
 
				+            model_dir (str): The directory where the model files are stored.
			
 
				+            config (Optional[Dict[str, Any]], optional): The model configuration
			
 
				+                dictionary. Defaults to None.
			
 
				+            device (Optional[str], optional): The device to run the inference
			
 
				+                engine on. Defaults to None.
			
 
				+            batch_size (int, optional): The batch size to predict.
			
 
				+                Defaults to 1.
			
 
				+            pp_option (Optional[PaddlePredictorOption], optional): The inference
			
 
				+                engine options. Defaults to None.
			
 
				+            use_hpip (bool, optional): Whether to use high-performance inference
			
 
				+                plugin. Defaults to False.
			
 
				+            hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
			
 
				+                The high-performance inference configuration dictionary.
			
 
				+                Defaults to None.
			
 
				         """
			
 
				         super().__init__()
			
 
				+
			
 
				         self.model_dir = Path(model_dir)
			
 
				         self.config = config if config else self.load_config(self.model_dir)
			
 
				         self.batch_sampler = self._build_batch_sampler()
			
@@ -73,6 +114,19 @@ class BasePredictor(ABC):
 
				         # alias predict() to the __call__()
			
 
				         self.predict = self.__call__
			
 
				 
			
 
				+        self.batch_sampler.batch_size = batch_size
			
 
				+        self._use_hpip = use_hpip
			
 
				+        if not use_hpip:
			
 
				+            if hpi_config is not None:
			
 
				+                logging.warning("`hpi_config` will be ignored when not using HPIP.")
			
 
				+            self._pp_option = self._prepare_pp_option(pp_option, device)
			
 
				+        else:
			
 
				+            if pp_option is not None:
			
 
				+                logging.warning("`pp_option` will be ignored when using HPIP.")
			
 
				+            self._hpi_config = self._prepare_hpi_config(hpi_config, device)
			
 
				+
			
 
				+        logging.debug(f"{self.__class__.__name__}: {self.model_dir}")
			
 
				+
			
 
				     @property
			
 
				     def config_path(self) -> str:
			
 
				         """
			
@@ -93,40 +147,105 @@ class BasePredictor(ABC):
 
				         """
			
 
				         return self.config["Global"]["model_name"]
			
 
				 
			
 
				-    @classmethod
			
 
				-    def get_config_path(cls, model_dir) -> str:
			
 
				-        """Get the path to the configuration file for the given model directory.
			
 
				+    @property
			
 
				+    def pp_option(self) -> PaddlePredictorOption:
			
 
				+        if not hasattr(self, "_pp_option"):
			
 
				+            raise AttributeError(f"{repr(self)} has no attribute 'pp_option'.")
			
 
				+        return self._pp_option
			
 
				+
			
 
				+    @property
			
 
				+    def hpi_config(self) -> HPIConfig:
			
 
				+        if not hasattr(self, "_hpi_config"):
			
 
				+            raise AttributeError(f"{repr(self)} has no attribute 'hpi_config'.")
			
 
				+        return self._hpi_config
			
 
				+
			
 
				+    @property
			
 
				+    def use_hpip(self) -> bool:
			
 
				+        return self._use_hpip
			
 
				+
			
 
				+    def __call__(
			
 
				+        self,
			
 
				+        input: Any,
			
 
				+        batch_size: Optional[int] = None,
			
 
				+        **kwargs: Any,
			
 
				+    ) -> Iterator[Any]:
			
 
				+        """
			
 
				+        Predict with the input data.
			
 
				 
			
 
				         Args:
			
 
				-            model_dir (Path): The directory where the static model files is stored.
			
 
				+            input (Any): The input data to be predicted.
			
 
				+            batch_size (int, optional): The batch size to use. Defaults to None.
			
 
				+            **kwargs (Dict[str, Any]): Additional keyword arguments to set up predictor.
			
 
				 
			
 
				         Returns:
			
 
				-            Path: The path to the configuration file.
			
 
				+            Iterator[Any]: An iterator yielding the prediction output.
			
 
				         """
			
 
				-        return model_dir / f"{cls.MODEL_FILE_PREFIX}.yml"
			
 
				+        self.set_predictor(batch_size)
			
 
				+        if INFER_BENCHMARK:
			
 
				+            # TODO(zhang-prog): Get metadata of input data
			
 
				+            @benchmark.timeit_with_options(name=ENTRY_POINT_NAME)
			
 
				+            def _apply(input, **kwargs):
			
 
				+                return list(self.apply(input, **kwargs))
			
 
				+
			
 
				+            if isinstance(input, list):
			
 
				+                raise TypeError("`input` cannot be a list in benchmark mode")
			
 
				+            input = [input] * batch_size
			
 
				+
			
 
				+            if not (INFER_BENCHMARK_WARMUP > 0 or INFER_BENCHMARK_ITERS > 0):
			
 
				+                raise RuntimeError(
			
 
				+                    "At least one of `INFER_BENCHMARK_WARMUP` and `INFER_BENCHMARK_ITERS` must be greater than zero"
			
 
				+                )
			
 
				+
			
 
				+            if INFER_BENCHMARK_WARMUP > 0:
			
 
				+                benchmark.start_warmup()
			
 
				+                for _ in range(INFER_BENCHMARK_WARMUP):
			
 
				+                    output = _apply(input, **kwargs)
			
 
				+                benchmark.collect(batch_size)
			
 
				+                benchmark.stop_warmup()
			
 
				+
			
 
				+            if INFER_BENCHMARK_ITERS > 0:
			
 
				+                for _ in range(INFER_BENCHMARK_ITERS):
			
 
				+                    output = _apply(input, **kwargs)
			
 
				+                benchmark.collect(batch_size)
			
 
				+
			
 
				+            yield output[0]
			
 
				+        else:
			
 
				+            yield from self.apply(input, **kwargs)
			
 
				 
			
 
				-    @classmethod
			
 
				-    def load_config(cls, model_dir) -> Dict:
			
 
				-        """Load the configuration from the specified model directory.
			
 
				+    def set_predictor(
			
 
				+        self,
			
 
				+        batch_size: Optional[int] = None,
			
 
				+    ) -> None:
			
 
				+        """
			
 
				+        Sets the predictor configuration.
			
 
				 
			
 
				         Args:
			
 
				-            model_dir (Path): The where the static model files is stored.
			
 
				+            batch_size (Optional[int], optional): The batch size to use. Defaults to None.
			
 
				 
			
 
				         Returns:
			
 
				-            dict: The loaded configuration dictionary.
			
 
				+            None
			
 
				         """
			
 
				-        yaml_reader = YAMLReader()
			
 
				-        return yaml_reader.read(cls.get_config_path(model_dir))
			
 
				-
			
 
				-    @abstractmethod
			
 
				-    def __call__(self, input: Any, **kwargs: Dict[str, Any]) -> Iterator[Any]:
			
 
				-        """Predict with the given input and additional keyword arguments."""
			
 
				-        raise NotImplementedError
			
 
				-
			
 
				-    @abstractmethod
			
 
				-    def set_predictor(self, batch_size: int = None, device: str = None, *args) -> None:
			
 
				-        """Sets up the predictor."""
			
 
				-        raise NotImplementedError
			
 
				+        if batch_size:
			
 
				+            self.batch_sampler.batch_size = batch_size
			
 
				+
			
 
				+    def get_hpi_info(self):
			
 
				+        if "Hpi" not in self.config:
			
 
				+            return None
			
 
				+        try:
			
 
				+            return HPIInfo.model_validate(self.config["Hpi"])
			
 
				+        except ValidationError as e:
			
 
				+            logging.exception("The HPI info in the model config file is invalid.")
			
 
				+            raise RuntimeError(f"Invalid HPI info: {str(e)}") from e
			
 
				+
			
 
				+    def create_static_infer(self):
			
 
				+        if not self._use_hpip:
			
 
				+            return PaddleInfer(self.model_dir, self.MODEL_FILE_PREFIX, self._pp_option)
			
 
				+        else:
			
 
				+            return HPInfer(
			
 
				+                self.model_dir,
			
 
				+                self.MODEL_FILE_PREFIX,
			
 
				+                self._hpi_config,
			
 
				+            )
			
 
				 
			
 
				     def apply(self, input: Any, **kwargs) -> Iterator[Any]:
			
 
				         """
			
@@ -164,6 +283,31 @@ class BasePredictor(ABC):
 
				         """
			
 
				         raise NotImplementedError
			
 
				 
			
 
				+    @classmethod
			
 
				+    def get_config_path(cls, model_dir) -> str:
			
 
				+        """Get the path to the configuration file for the given model directory.
			
 
				+
			
 
				+        Args:
			
 
				+            model_dir (Path): The directory where the static model files is stored.
			
 
				+
			
 
				+        Returns:
			
 
				+            Path: The path to the configuration file.
			
 
				+        """
			
 
				+        return model_dir / f"{cls.MODEL_FILE_PREFIX}.yml"
			
 
				+
			
 
				+    @classmethod
			
 
				+    def load_config(cls, model_dir) -> Dict:
			
 
				+        """Load the configuration from the specified model directory.
			
 
				+
			
 
				+        Args:
			
 
				+            model_dir (Path): The where the static model files is stored.
			
 
				+
			
 
				+        Returns:
			
 
				+            dict: The loaded configuration dictionary.
			
 
				+        """
			
 
				+        yaml_reader = YAMLReader()
			
 
				+        return yaml_reader.read(cls.get_config_path(model_dir))
			
 
				+
			
 
				     @abstractmethod
			
 
				     def _build_batch_sampler(self) -> BaseBatchSampler:
			
 
				         """Build batch sampler.
			
@@ -181,3 +325,90 @@ class BasePredictor(ABC):
 
				             type: The result class.
			
 
				         """
			
 
				         raise NotImplementedError
			
 
				+
			
 
				+    def _prepare_pp_option(
			
 
				+        self,
			
 
				+        pp_option: Optional[PaddlePredictorOption],
			
 
				+        device: Optional[str],
			
 
				+    ) -> PaddlePredictorOption:
			
 
				+        if pp_option is None or device is not None:
			
 
				+            device_info = self._get_device_info(device)
			
 
				+        else:
			
 
				+            device_info = None
			
 
				+        if pp_option is None:
			
 
				+            pp_option = PaddlePredictorOption(model_name=self.model_name)
			
 
				+        if device_info:
			
 
				+            pp_option.device_type = device_info[0]
			
 
				+            pp_option.device_id = device_info[1]
			
 
				+        hpi_info = self.get_hpi_info()
			
 
				+        if hpi_info is not None:
			
 
				+            hpi_info = hpi_info.model_dump(exclude_unset=True)
			
 
				+            if pp_option.trt_dynamic_shapes is None:
			
 
				+                trt_dynamic_shapes = (
			
 
				+                    hpi_info.get("backend_configs", {})
			
 
				+                    .get("paddle_infer", {})
			
 
				+                    .get("trt_dynamic_shapes", None)
			
 
				+                )
			
 
				+                if trt_dynamic_shapes is not None:
			
 
				+                    logging.debug(
			
 
				+                        "TensorRT dynamic shapes set to %s", trt_dynamic_shapes
			
 
				+                    )
			
 
				+                    pp_option.trt_dynamic_shapes = trt_dynamic_shapes
			
 
				+            if pp_option.trt_dynamic_shape_input_data is None:
			
 
				+                trt_dynamic_shape_input_data = (
			
 
				+                    hpi_info.get("backend_configs", {})
			
 
				+                    .get("paddle_infer", {})
			
 
				+                    .get("trt_dynamic_shape_input_data", None)
			
 
				+                )
			
 
				+                if trt_dynamic_shape_input_data is not None:
			
 
				+                    logging.debug(
			
 
				+                        "TensorRT dynamic shape input data set to %s",
			
 
				+                        trt_dynamic_shape_input_data,
			
 
				+                    )
			
 
				+                    pp_option.trt_dynamic_shape_input_data = (
			
 
				+                        trt_dynamic_shape_input_data
			
 
				+                    )
			
 
				+        return pp_option
			
 
				+
			
 
				+    def _prepare_hpi_config(
			
 
				+        self,
			
 
				+        hpi_config: Optional[Union[Dict[str, Any], HPIConfig]],
			
 
				+        device: Optional[str],
			
 
				+    ) -> HPIConfig:
			
 
				+        if hpi_config is None:
			
 
				+            hpi_config = {}
			
 
				+        elif isinstance(hpi_config, HPIConfig):
			
 
				+            hpi_config = hpi_config.model_dump(exclude_unset=True)
			
 
				+        else:
			
 
				+            hpi_config = deepcopy(hpi_config)
			
 
				+
			
 
				+        if "model_name" not in hpi_config:
			
 
				+            hpi_config["model_name"] = self.model_name
			
 
				+
			
 
				+        if device is not None or "device_type" not in hpi_config:
			
 
				+            device_type, device_id = self._get_device_info(device)
			
 
				+            hpi_config["device_type"] = device_type
			
 
				+            if device is not None or "device_id" not in hpi_config:
			
 
				+                hpi_config["device_id"] = device_id
			
 
				+
			
 
				+        if "hpi_info" not in hpi_config:
			
 
				+            hpi_info = self.get_hpi_info()
			
 
				+            if hpi_info is not None:
			
 
				+                hpi_config["hpi_info"] = hpi_info
			
 
				+
			
 
				+        hpi_config = HPIConfig.model_validate(hpi_config)
			
 
				+
			
 
				+        return hpi_config
			
 
				+
			
 
				+    # Should this be static?
			
 
				+    def _get_device_info(self, device):
			
 
				+        if device is None:
			
 
				+            device = get_default_device()
			
 
				+        device_type, device_ids = parse_device(device)
			
 
				+        if device_ids is not None:
			
 
				+            device_id = device_ids[0]
			
 
				+        else:
			
 
				+            device_id = None
			
 
				+        if device_ids and len(device_ids) > 1:
			
 
				+            logging.debug("Got multiple device IDs. Using the first one: %d", device_id)
			
 
				+        return device_type, device_id
			
--- a/paddlex/inference/models/base/predictor/basic_predictor.py
+++ b/paddlex/inference/models/base/predictor/basic_predictor.py
@@ -1,159 +0,0 @@
 
				-# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
			
 
				-#
			
 
				-# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-# you may not use this file except in compliance with the License.
			
 
				-# You may obtain a copy of the License at
			
 
				-#
			
 
				-#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-#
			
 
				-# Unless required by applicable law or agreed to in writing, software
			
 
				-# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-# See the License for the specific language governing permissions and
			
 
				-# limitations under the License.
			
 
				-
			
 
				-from typing import Dict, Any, Iterator
			
 
				-from abc import abstractmethod
			
 
				-
			
 
				-from .....utils.subclass_register import AutoRegisterABCMetaClass
			
 
				-from .....utils.flags import (
			
 
				-    INFER_BENCHMARK,
			
 
				-    INFER_BENCHMARK_WARMUP,
			
 
				-    INFER_BENCHMARK_ITERS,
			
 
				-)
			
 
				-from .....utils import logging
			
 
				-from ....utils.pp_option import PaddlePredictorOption
			
 
				-from ....utils.benchmark import benchmark, ENTRY_POINT_NAME
			
 
				-from .base_predictor import BasePredictor
			
 
				-
			
 
				-
			
 
				-class BasicPredictor(
			
 
				-    BasePredictor,
			
 
				-    metaclass=AutoRegisterABCMetaClass,
			
 
				-):
			
 
				-    """BasicPredictor."""
			
 
				-
			
 
				-    __is_base = True
			
 
				-
			
 
				-    def __init__(
			
 
				-        self,
			
 
				-        model_dir: str,
			
 
				-        config: Dict[str, Any] = None,
			
 
				-        device: str = None,
			
 
				-        batch_size: int = 1,
			
 
				-        pp_option: PaddlePredictorOption = None,
			
 
				-    ) -> None:
			
 
				-        """Initializes the BasicPredictor.
			
 
				-
			
 
				-        Args:
			
 
				-            model_dir (str): The directory where the model files are stored.
			
 
				-            config (Dict[str, Any], optional): The configuration dictionary. Defaults to None.
			
 
				-            device (str, optional): The device to run the inference engine on. Defaults to None.
			
 
				-            batch_size (int, optional): The batch size to predict. Defaults to 1.
			
 
				-            pp_option (PaddlePredictorOption, optional): The inference engine options. Defaults to None.
			
 
				-        """
			
 
				-        super().__init__(model_dir=model_dir, config=config)
			
 
				-        if not pp_option:
			
 
				-            pp_option = PaddlePredictorOption(model_name=self.model_name)
			
 
				-        if device:
			
 
				-            pp_option.set_device(device)
			
 
				-        trt_dynamic_shapes = (
			
 
				-            self.config.get("Hpi", {})
			
 
				-            .get("backend_configs", {})
			
 
				-            .get("paddle_infer", {})
			
 
				-            .get("trt_dynamic_shapes", None)
			
 
				-        )
			
 
				-        if trt_dynamic_shapes:
			
 
				-            pp_option.trt_dynamic_shapes = trt_dynamic_shapes
			
 
				-        trt_dynamic_shape_input_data = (
			
 
				-            self.config.get("Hpi", {})
			
 
				-            .get("backend_configs", {})
			
 
				-            .get("paddle_infer", {})
			
 
				-            .get("trt_dynamic_shape_input_data", None)
			
 
				-        )
			
 
				-        if trt_dynamic_shape_input_data:
			
 
				-            pp_option.trt_dynamic_shape_input_data = trt_dynamic_shape_input_data
			
 
				-
			
 
				-        self.pp_option = pp_option
			
 
				-        self.pp_option.batch_size = batch_size
			
 
				-        self.batch_sampler.batch_size = batch_size
			
 
				-
			
 
				-        logging.debug(f"{self.__class__.__name__}: {self.model_dir}")
			
 
				-
			
 
				-    def __call__(
			
 
				-        self,
			
 
				-        input: Any,
			
 
				-        batch_size: int = None,
			
 
				-        device: str = None,
			
 
				-        pp_option: PaddlePredictorOption = None,
			
 
				-        **kwargs: Dict[str, Any],
			
 
				-    ) -> Iterator[Any]:
			
 
				-        """
			
 
				-        Predict with the input data.
			
 
				-
			
 
				-        Args:
			
 
				-            input (Any): The input data to be predicted.
			
 
				-            batch_size (int, optional): The batch size to use. Defaults to None.
			
 
				-            device (str, optional): The device to run the predictor on. Defaults to None.
			
 
				-            pp_option (PaddlePredictorOption, optional): The predictor options to set. Defaults to None.
			
 
				-            **kwargs (Dict[str, Any]): Additional keyword arguments to set up predictor.
			
 
				-
			
 
				-        Returns:
			
 
				-            Iterator[Any]: An iterator yielding the prediction output.
			
 
				-        """
			
 
				-        self.set_predictor(batch_size, device, pp_option)
			
 
				-        if INFER_BENCHMARK:
			
 
				-            # TODO(zhang-prog): Get metadata of input data
			
 
				-            @benchmark.timeit_with_options(name=ENTRY_POINT_NAME)
			
 
				-            def _apply(input, **kwargs):
			
 
				-                return list(self.apply(input, **kwargs))
			
 
				-
			
 
				-            if isinstance(input, list):
			
 
				-                raise TypeError("`input` cannot be a list in benchmark mode")
			
 
				-            input = [input] * batch_size
			
 
				-
			
 
				-            if not (INFER_BENCHMARK_WARMUP > 0 or INFER_BENCHMARK_ITERS > 0):
			
 
				-                raise RuntimeError(
			
 
				-                    "At least one of `INFER_BENCHMARK_WARMUP` and `INFER_BENCHMARK_ITERS` must be greater than zero"
			
 
				-                )
			
 
				-
			
 
				-            if INFER_BENCHMARK_WARMUP > 0:
			
 
				-                benchmark.start_warmup()
			
 
				-                for _ in range(INFER_BENCHMARK_WARMUP):
			
 
				-                    output = _apply(input, **kwargs)
			
 
				-                benchmark.collect(batch_size)
			
 
				-                benchmark.stop_warmup()
			
 
				-
			
 
				-            if INFER_BENCHMARK_ITERS > 0:
			
 
				-                for _ in range(INFER_BENCHMARK_ITERS):
			
 
				-                    output = _apply(input, **kwargs)
			
 
				-                benchmark.collect(batch_size)
			
 
				-
			
 
				-            yield output[0]
			
 
				-        else:
			
 
				-            yield from self.apply(input, **kwargs)
			
 
				-
			
 
				-    def set_predictor(
			
 
				-        self,
			
 
				-        batch_size: int = None,
			
 
				-        device: str = None,
			
 
				-        pp_option: PaddlePredictorOption = None,
			
 
				-    ) -> None:
			
 
				-        """
			
 
				-        Sets the predictor configuration.
			
 
				-
			
 
				-        Args:
			
 
				-            batch_size (int, optional): The batch size to use. Defaults to None.
			
 
				-            device (str, optional): The device to run the predictor on. Defaults to None.
			
 
				-            pp_option (PaddlePredictorOption, optional): The predictor options to set. Defaults to None.
			
 
				-
			
 
				-        Returns:
			
 
				-            None
			
 
				-        """
			
 
				-        if batch_size:
			
 
				-            self.batch_sampler.batch_size = batch_size
			
 
				-            self.pp_option.batch_size = batch_size
			
 
				-        if device and device != self.pp_option.device:
			
 
				-            self.pp_option.set_device(device)
			
 
				-        if pp_option and pp_option != self.pp_option:
			
 
				-            self.pp_option = pp_option
			
--- a/paddlex/inference/models/common/__init__.py
+++ b/paddlex/inference/models/common/__init__.py
@@ -30,6 +30,6 @@ from .ts import (
 
				     TStoArray,
			
 
				     TStoBatch,
			
 
				 )
			
 
				-from .static_infer import StaticInfer
			
 
				+from .static_infer import StaticInfer, PaddleInfer, HPInfer
			
 
				 
			
 
				 from .tokenizer import PretrainedTokenizer, PretrainedTokenizer
			
--- a/paddlex/inference/models/common/static_infer.py
+++ b/paddlex/inference/models/common/static_infer.py
@@ -12,10 +12,13 @@
 
				 # See the License for the specific language governing permissions and
			
 
				 # limitations under the License.
			
 
				 
			
 
				+import abc
			
 
				+import importlib.util
			
 
				+import subprocess
			
 
				 from typing import Sequence, List
			
 
				 from pathlib import Path
			
 
				 
			
 
				-import lazy_paddle
			
 
				+import lazy_paddle as paddle
			
 
				 import numpy as np
			
 
				 
			
 
				 from ....utils import logging
			
@@ -26,37 +29,44 @@ from ....utils.flags import (
 
				     INFER_BENCHMARK_USE_NEW_INFER_API,
			
 
				 )
			
 
				 from ...utils.benchmark import benchmark, set_inference_operations
			
 
				-from ...utils.hpi import get_model_paths
			
 
				+from ...utils.hpi import (
			
 
				+    HPIConfig,
			
 
				+    ONNXRuntimeConfig,
			
 
				+    OpenVINOConfig,
			
 
				+    TensorRTConfig,
			
 
				+    OMConfig,
			
 
				+    get_model_paths,
			
 
				+    suggest_inference_backend_and_config,
			
 
				+)
			
 
				 from ...utils.pp_option import PaddlePredictorOption
			
 
				 from ...utils.trt_config import DISABLE_TRT_HALF_OPS_CONFIG
			
 
				 
			
 
				 
			
 
				 CACHE_DIR = ".cache"
			
 
				 
			
 
				-if INFER_BENCHMARK_USE_NEW_INFER_API:
			
 
				-    INFERENCE_OPERATIONS = [
			
 
				-        "PaddleCopyToDevice",
			
 
				-        "PaddleCopyToHost",
			
 
				-        "PaddleModelInfer",
			
 
				-    ]
			
 
				-else:
			
 
				-    INFERENCE_OPERATIONS = ["PaddleInferChainLegacy"]
			
 
				+INFERENCE_OPERATIONS = [
			
 
				+    "PaddleCopyToDevice",
			
 
				+    "PaddleCopyToHost",
			
 
				+    "PaddleModelInfer",
			
 
				+    "PaddleInferChainLegacy",
			
 
				+    "MultiBackendInfer",
			
 
				+]
			
 
				 set_inference_operations(INFERENCE_OPERATIONS)
			
 
				 
			
 
				 
			
 
				 # XXX: Better use Paddle Inference API to do this
			
 
				 def _pd_dtype_to_np_dtype(pd_dtype):
			
 
				-    if pd_dtype == lazy_paddle.inference.DataType.FLOAT64:
			
 
				+    if pd_dtype == paddle.inference.DataType.FLOAT64:
			
 
				         return np.float64
			
 
				-    elif pd_dtype == lazy_paddle.inference.DataType.FLOAT32:
			
 
				+    elif pd_dtype == paddle.inference.DataType.FLOAT32:
			
 
				         return np.float32
			
 
				-    elif pd_dtype == lazy_paddle.inference.DataType.INT64:
			
 
				+    elif pd_dtype == paddle.inference.DataType.INT64:
			
 
				         return np.int64
			
 
				-    elif pd_dtype == lazy_paddle.inference.DataType.INT32:
			
 
				+    elif pd_dtype == paddle.inference.DataType.INT32:
			
 
				         return np.int32
			
 
				-    elif pd_dtype == lazy_paddle.inference.DataType.UINT8:
			
 
				+    elif pd_dtype == paddle.inference.DataType.UINT8:
			
 
				         return np.uint8
			
 
				-    elif pd_dtype == lazy_paddle.inference.DataType.INT8:
			
 
				+    elif pd_dtype == paddle.inference.DataType.INT8:
			
 
				         return np.int8
			
 
				     else:
			
 
				         raise TypeError(f"Unsupported data type: {pd_dtype}")
			
@@ -74,12 +84,12 @@ def _collect_trt_shape_range_info(
 
				 
			
 
				     dynamic_shape_input_data = dynamic_shape_input_data or {}
			
 
				 
			
 
				-    config = lazy_paddle.inference.Config(model_file, model_params)
			
 
				+    config = paddle.inference.Config(model_file, model_params)
			
 
				     config.enable_use_gpu(100, gpu_id)
			
 
				     config.collect_shape_range_info(shape_range_info_path)
			
 
				     # TODO: Add other needed options
			
 
				     config.disable_glog_info()
			
 
				-    predictor = lazy_paddle.inference.create_predictor(config)
			
 
				+    predictor = paddle.inference.create_predictor(config)
			
 
				 
			
 
				     input_names = predictor.get_input_names()
			
 
				     for name in dynamic_shapes:
			
@@ -147,7 +157,7 @@ def _convert_trt(
 
				     dynamic_shapes,
			
 
				     dynamic_shape_input_data,
			
 
				 ):
			
 
				-    from lazy_paddle.tensorrt.export import (
			
 
				+    from paddle.tensorrt.export import (
			
 
				         Input,
			
 
				         TensorRTConfig,
			
 
				         convert,
			
@@ -162,12 +172,12 @@ def _convert_trt(
 
				 
			
 
				     def _get_predictor(model_file, params_file):
			
 
				         # HACK
			
 
				-        config = lazy_paddle.inference.Config(str(model_file), str(params_file))
			
 
				+        config = paddle.inference.Config(str(model_file), str(params_file))
			
 
				         config.enable_use_gpu(100, device_id)
			
 
				         # NOTE: Disable oneDNN to circumvent a bug in Paddle Inference
			
 
				         config.disable_mkldnn()
			
 
				         config.disable_glog_info()
			
 
				-        return lazy_paddle.inference.create_predictor(config)
			
 
				+        return paddle.inference.create_predictor(config)
			
 
				 
			
 
				     dynamic_shape_input_data = dynamic_shape_input_data or {}
			
 
				 
			
@@ -246,7 +256,7 @@ class PaddleCopyToDevice:
 
				     def __call__(self, arrs):
			
 
				         device_id = [self.device_id] if self.device_id is not None else self.device_id
			
 
				         device = constr_device(self.device_type, device_id)
			
 
				-        paddle_tensors = [lazy_paddle.to_tensor(i, place=device) for i in arrs]
			
 
				+        paddle_tensors = [paddle.to_tensor(i, place=device) for i in arrs]
			
 
				         return paddle_tensors
			
 
				 
			
 
				 
			
@@ -292,19 +302,25 @@ class PaddleInferChainLegacy:
 
				         return outputs
			
 
				 
			
 
				 
			
 
				-class StaticInfer(object):
			
 
				+class StaticInfer(metaclass=abc.ABCMeta):
			
 
				+    @abc.abstractmethod
			
 
				+    def __call__(self, x: Sequence[np.ndarray]) -> List[np.ndarray]:
			
 
				+        raise NotImplementedError
			
 
				+
			
 
				+
			
 
				+class PaddleInfer(StaticInfer):
			
 
				     def __init__(
			
 
				         self,
			
 
				         model_dir: str,
			
 
				-        model_prefix: str,
			
 
				+        model_file_prefix: str,
			
 
				         option: PaddlePredictorOption,
			
 
				     ) -> None:
			
 
				         super().__init__()
			
 
				         self.model_dir = model_dir
			
 
				-        self.model_file_prefix = model_prefix
			
 
				+        self.model_file_prefix = model_file_prefix
			
 
				         self._option = option
			
 
				         self.predictor = self._create()
			
 
				-        if self._use_new_inference_api:
			
 
				+        if INFER_BENCHMARK_USE_NEW_INFER_API:
			
 
				             device_type = self._option.device_type
			
 
				             device_type = "gpu" if device_type == "dcu" else device_type
			
 
				             copy_to_device = PaddleCopyToDevice(device_type, self._option.device_id)
			
@@ -314,13 +330,6 @@ class StaticInfer(object):
 
				         else:
			
 
				             self.infer = PaddleInferChainLegacy(self.predictor)
			
 
				 
			
 
				-    @property
			
 
				-    def _use_new_inference_api(self):
			
 
				-        # HACK: Temp fallback to legacy API via env var
			
 
				-        return INFER_BENCHMARK_USE_NEW_INFER_API
			
 
				-
			
 
				-        # return self._option.device_type in ("cpu", "gpu", "dcu")
			
 
				-
			
 
				     def __call__(self, x: Sequence[np.ndarray]) -> List[np.ndarray]:
			
 
				         names = self.predictor.get_input_names()
			
 
				         if len(names) != len(x):
			
@@ -340,7 +349,7 @@ class StaticInfer(object):
 
				         """_create"""
			
 
				         model_paths = get_model_paths(self.model_dir, self.model_file_prefix)
			
 
				         if "paddle" not in model_paths:
			
 
				-            raise RuntimeError("No valid Paddle model found")
			
 
				+            raise RuntimeError("No valid PaddlePaddle model found")
			
 
				         model_file, params_file = model_paths["paddle"]
			
 
				 
			
 
				         if (
			
@@ -383,10 +392,10 @@ class StaticInfer(object):
 
				             config.enable_use_gpu(100, self._option.device_id)
			
 
				         # for Native Paddle and MKLDNN
			
 
				         else:
			
 
				-            config = lazy_paddle.inference.Config(str(model_file), str(params_file))
			
 
				+            config = paddle.inference.Config(str(model_file), str(params_file))
			
 
				             if self._option.device_type == "gpu":
			
 
				                 config.exp_disable_mixed_precision_ops({"feed", "fetch"})
			
 
				-                from lazy_paddle.inference import PrecisionType
			
 
				+                from paddle.inference import PrecisionType
			
 
				 
			
 
				                 precision = (
			
 
				                     PrecisionType.Half
			
@@ -427,7 +436,7 @@ class StaticInfer(object):
 
				                 if hasattr(config, "enable_new_executor"):
			
 
				                     config.enable_new_executor()
			
 
				                 # XXX: is_compiled_with_rocm() must be True on dcu platform ?
			
 
				-                if lazy_paddle.is_compiled_with_rocm():
			
 
				+                if paddle.is_compiled_with_rocm():
			
 
				                     # Delete unsupported passes in dcu
			
 
				                     config.delete_pass("conv2d_add_act_fuse_pass")
			
 
				                     config.delete_pass("conv2d_add_fuse_pass")
			
@@ -463,7 +472,7 @@ class StaticInfer(object):
 
				         if not DEBUG:
			
 
				             config.disable_glog_info()
			
 
				 
			
 
				-        predictor = lazy_paddle.inference.create_predictor(config)
			
 
				+        predictor = paddle.inference.create_predictor(config)
			
 
				 
			
 
				         return predictor
			
 
				 
			
@@ -482,9 +491,9 @@ class StaticInfer(object):
 
				             )
			
 
				             model_file = trt_save_path.with_suffix(".json")
			
 
				             params_file = trt_save_path.with_suffix(".pdiparams")
			
 
				-            config = lazy_paddle.inference.Config(str(model_file), str(params_file))
			
 
				+            config = paddle.inference.Config(str(model_file), str(params_file))
			
 
				         else:
			
 
				-            config = lazy_paddle.inference.Config(str(model_file), str(params_file))
			
 
				+            config = paddle.inference.Config(str(model_file), str(params_file))
			
 
				             config.set_optim_cache_dir(str(cache_dir / "optim_cache"))
			
 
				             # call enable_use_gpu() first to use TensorRT engine
			
 
				             config.enable_use_gpu(100, self._option.device_id)
			
@@ -534,8 +543,11 @@ class StaticInfer(object):
 
				                             self._option.trt_dynamic_shapes,
			
 
				                             self._option.trt_dynamic_shape_input_data,
			
 
				                         )
			
 
				-                    if self._option.model_name in DISABLE_TRT_HALF_OPS_CONFIG and self._option.run_mode == "trt_fp16":
			
 
				-                        lazy_paddle.inference.InternalUtils.disable_tensorrt_half_ops(
			
 
				+                    if (
			
 
				+                        self._option.model_name in DISABLE_TRT_HALF_OPS_CONFIG
			
 
				+                        and self._option.run_mode == "trt_fp16"
			
 
				+                    ):
			
 
				+                        paddle.inference.InternalUtils.disable_tensorrt_half_ops(
			
 
				                             config, DISABLE_TRT_HALF_OPS_CONFIG[self._option.model_name]
			
 
				                         )
			
 
				                     config.enable_tuned_tensorrt_dynamic_shape(
			
@@ -559,3 +571,288 @@ class StaticInfer(object):
 
				                         raise RuntimeError("No dynamic shape information provided")
			
 
				 
			
 
				         return config
			
 
				+
			
 
				+
			
 
				+# FIXME: Name might be misleading
			
 
				+@benchmark.timeit
			
 
				+class MultiBackendInfer(object):
			
 
				+    def __init__(self, ui_runtime):
			
 
				+        super().__init__()
			
 
				+        self.ui_runtime = ui_runtime
			
 
				+
			
 
				+    # The time consumed by the wrapper code will also be taken into account.
			
 
				+    def __call__(self, x):
			
 
				+        outputs = self.ui_runtime.infer(x)
			
 
				+        return outputs
			
 
				+
			
 
				+
			
 
				+# TODO: It would be better to refactor the code to make `HPInfer` a higher-level
			
 
				+# class that uses `PaddleInfer`.
			
 
				+class HPInfer(StaticInfer):
			
 
				+    def __init__(
			
 
				+        self,
			
 
				+        model_dir: str,
			
 
				+        model_file_prefix: str,
			
 
				+        config: HPIConfig,
			
 
				+    ) -> None:
			
 
				+        super().__init__()
			
 
				+        self._model_dir = model_dir
			
 
				+        self._model_file_prefix = model_file_prefix
			
 
				+        self._config = config
			
 
				+        backend, backend_config = self._determine_backend_and_config()
			
 
				+        if backend == "paddle":
			
 
				+            self._use_paddle = True
			
 
				+            self._paddle_infer = self._build_paddle_infer(backend_config)
			
 
				+        else:
			
 
				+            self._use_paddle = False
			
 
				+            ui_runtime = self._build_ui_runtime(backend, backend_config)
			
 
				+            self._multi_backend_infer = MultiBackendInfer(ui_runtime)
			
 
				+            num_inputs = ui_runtime.num_inputs()
			
 
				+            self._input_names = [
			
 
				+                ui_runtime.get_input_info(i).name for i in range(num_inputs)
			
 
				+            ]
			
 
				+
			
 
				+    @property
			
 
				+    def model_dir(self) -> str:
			
 
				+        return self._model_dir
			
 
				+
			
 
				+    @property
			
 
				+    def model_file_prefix(self) -> str:
			
 
				+        return self._model_file_prefix
			
 
				+
			
 
				+    @property
			
 
				+    def config(self) -> HPIConfig:
			
 
				+        return self._config
			
 
				+
			
 
				+    def __call__(self, x: Sequence[np.ndarray]) -> List[np.ndarray]:
			
 
				+        if self._use_paddle:
			
 
				+            return self._call_paddle_infer(x)
			
 
				+        else:
			
 
				+            return self._call_multi_backend_infer(x)
			
 
				+
			
 
				+    def _call_paddle_infer(self, x):
			
 
				+        return self._paddle_infer(x)
			
 
				+
			
 
				+    def _call_multi_backend_infer(self, x):
			
 
				+        num_inputs = len(self._input_names)
			
 
				+        if len(x) != num_inputs:
			
 
				+            raise ValueError(f"Expected {num_inputs} inputs but got {len(x)} instead")
			
 
				+        x = _sort_inputs(x, self._input_names)
			
 
				+        inputs = {}
			
 
				+        for name, input_ in zip(self._input_names, x):
			
 
				+            inputs[name] = np.ascontiguousarray(input_)
			
 
				+        return self._multi_backend_infer(inputs)
			
 
				+
			
 
				+    def _determine_backend_and_config(self):
			
 
				+        from ultra_infer import (
			
 
				+            is_built_with_om,
			
 
				+            is_built_with_openvino,
			
 
				+            is_built_with_ort,
			
 
				+            is_built_with_trt,
			
 
				+        )
			
 
				+
			
 
				+        model_paths = get_model_paths(self._model_dir, self._model_file_prefix)
			
 
				+        is_onnx_model_available = "onnx" in model_paths
			
 
				+        # TODO: Give a warning if Paddle2ONNX is not available but can be used
			
 
				+        # to select a better backend.
			
 
				+        if self._config.auto_paddle2onnx:
			
 
				+            if self._check_paddle2onnx():
			
 
				+                is_onnx_model_available = (
			
 
				+                    is_onnx_model_available or "paddle" in model_paths
			
 
				+                )
			
 
				+            else:
			
 
				+                logging.debug(
			
 
				+                    "Paddle2ONNX is not available. Automatic model conversion will not be performed."
			
 
				+                )
			
 
				+        available_backends = []
			
 
				+        if "paddle" in model_paths:
			
 
				+            available_backends.append("paddle")
			
 
				+        if is_built_with_openvino() and is_onnx_model_available:
			
 
				+            available_backends.append("openvino")
			
 
				+        if is_built_with_ort() and is_onnx_model_available:
			
 
				+            available_backends.append("onnxruntime")
			
 
				+        if is_built_with_trt() and is_onnx_model_available:
			
 
				+            available_backends.append("tensorrt")
			
 
				+        if is_built_with_om() and "om" in model_paths:
			
 
				+            available_backends.append("om")
			
 
				+
			
 
				+        if not available_backends:
			
 
				+            raise RuntimeError("No inference backend is available")
			
 
				+
			
 
				+        if (
			
 
				+            self._config.backend is not None
			
 
				+            and self._config.backend not in available_backends
			
 
				+        ):
			
 
				+            raise RuntimeError(
			
 
				+                f"Inference backend {repr(self._config.backend)} is unavailable"
			
 
				+            )
			
 
				+
			
 
				+        if self._config.auto_config:
			
 
				+            # Should we use the strategy pattern here to allow extensible
			
 
				+            # strategies?
			
 
				+            ret = suggest_inference_backend_and_config(
			
 
				+                self._config, available_backends=available_backends
			
 
				+            )
			
 
				+            if ret[0] is None:
			
 
				+                # Should I use a custom exception?
			
 
				+                raise RuntimeError(
			
 
				+                    f"No inference backend and configuration could be suggested. Reason: {ret[1]}"
			
 
				+                )
			
 
				+            backend, backend_config = ret
			
 
				+        else:
			
 
				+            backend = self._config.backend
			
 
				+            if backend is None:
			
 
				+                raise RuntimeError(
			
 
				+                    "When automatic configuration is not used, the inference backend must be specified manually."
			
 
				+                )
			
 
				+            backend_config = self._config.backend_config or {}
			
 
				+
			
 
				+        if backend == "paddle" and not backend_config:
			
 
				+            logging.warning(
			
 
				+                "The Paddle Inference backend is selected with the default configuration. This may not provide optimal performance."
			
 
				+            )
			
 
				+
			
 
				+        return backend, backend_config
			
 
				+
			
 
				+    def _build_paddle_infer(self, backend_config):
			
 
				+        kwargs = {
			
 
				+            "device_type": self._config.device_type,
			
 
				+            "device_id": self._config.device_id,
			
 
				+            **backend_config,
			
 
				+        }
			
 
				+        # TODO: This is probably redundant. Can we reuse the code in the
			
 
				+        # predictor class?
			
 
				+        paddle_info = self._config.hpi_info.backend_configs.paddle_infer
			
 
				+        if paddle_info is not None:
			
 
				+            if (
			
 
				+                kwargs.get("trt_dynamic_shapes") is None
			
 
				+                and paddle_info.trt_dynamic_shapes is not None
			
 
				+            ):
			
 
				+                trt_dynamic_shapes = paddle_info.trt_dynamic_shapes
			
 
				+                logging.debug("TensorRT dynamic shapes set to %s", trt_dynamic_shapes)
			
 
				+                kwargs["trt_dynamic_shapes"] = trt_dynamic_shapes
			
 
				+            if (
			
 
				+                kwargs.get("trt_dynamic_shape_input_data") is None
			
 
				+                and paddle_info.trt_dynamic_shape_input_data is not None
			
 
				+            ):
			
 
				+                trt_dynamic_shape_input_data = paddle_info.trt_dynamic_shape_input_data
			
 
				+                logging.debug(
			
 
				+                    "TensorRT dynamic shape input data set to %s",
			
 
				+                    trt_dynamic_shape_input_data,
			
 
				+                )
			
 
				+                kwargs["trt_dynamic_shape_input_data"] = trt_dynamic_shape_input_data
			
 
				+        pp_option = PaddlePredictorOption(self._config.pdx_model_name, **kwargs)
			
 
				+        logging.info("Using Paddle Inference backend")
			
 
				+        logging.info("Paddle predictor option: %s", pp_option)
			
 
				+        return PaddleInfer(self._model_dir, self._model_file_prefix, option=pp_option)
			
 
				+
			
 
				+    def _build_ui_runtime(self, backend, backend_config, ui_option=None):
			
 
				+        from ultra_infer import ModelFormat, Runtime, RuntimeOption
			
 
				+
			
 
				+        if ui_option is None:
			
 
				+            ui_option = RuntimeOption()
			
 
				+
			
 
				+        if self._config.device_type == "cpu":
			
 
				+            pass
			
 
				+        elif self._config.device_type == "gpu":
			
 
				+            ui_option.use_gpu(self._config.device_id or 0)
			
 
				+        elif self._config.device_type == "npu":
			
 
				+            ui_option.use_ascend()
			
 
				+        else:
			
 
				+            raise RuntimeError(
			
 
				+                f"Unsupported device type {repr(self._config.device_type)}"
			
 
				+            )
			
 
				+
			
 
				+        model_paths = get_model_paths(self.model_dir, self.model_file_prefix)
			
 
				+        if backend in ("openvino", "onnxruntime", "tensorrt"):
			
 
				+            # XXX: This introduces side effects.
			
 
				+            if "onnx" not in model_paths:
			
 
				+                if self._config.auto_paddle2onnx:
			
 
				+                    if "paddle" not in model_paths:
			
 
				+                        raise RuntimeError("PaddlePaddle model required")
			
 
				+                    # The CLI is used here since there is currently no API.
			
 
				+                    logging.info(
			
 
				+                        "Automatically converting PaddlePaddle model to ONNX format"
			
 
				+                    )
			
 
				+                    subprocess.check_call(
			
 
				+                        [
			
 
				+                            "paddlex",
			
 
				+                            "--paddle2onnx",
			
 
				+                            "--paddle_model_dir",
			
 
				+                            self._model_dir,
			
 
				+                            "--onnx_model_dir",
			
 
				+                            self._model_dir,
			
 
				+                        ]
			
 
				+                    )
			
 
				+                    model_paths = get_model_paths(
			
 
				+                        self.model_dir, self.model_file_prefix
			
 
				+                    )
			
 
				+                    assert "onnx" in model_paths
			
 
				+                else:
			
 
				+                    raise RuntimeError("ONNX model required")
			
 
				+            ui_option.set_model_path(str(model_paths["onnx"]), "", ModelFormat.ONNX)
			
 
				+        elif backend == "om":
			
 
				+            if "om" not in model_paths:
			
 
				+                raise RuntimeError("OM model required")
			
 
				+            ui_option.set_model_path(str(model_paths["om"]), "", ModelFormat.OM)
			
 
				+        else:
			
 
				+            raise ValueError(f"Unsupported inference backend {repr(backend)}")
			
 
				+
			
 
				+        if backend == "openvino":
			
 
				+            backend_config = OpenVINOConfig.model_validate(backend_config)
			
 
				+            ui_option.use_openvino_backend()
			
 
				+            ui_option.set_cpu_thread_num(backend_config.cpu_num_threads)
			
 
				+        elif backend == "onnxruntime":
			
 
				+            backend_config = ONNXRuntimeConfig.model_validate(backend_config)
			
 
				+            ui_option.use_ort_backend()
			
 
				+            ui_option.set_cpu_thread_num(backend_config.cpu_num_threads)
			
 
				+        elif backend == "tensorrt":
			
 
				+            if (
			
 
				+                backend_config.get("use_dynamic_shapes", True)
			
 
				+                and backend_config.get("dynamic_shapes") is None
			
 
				+            ):
			
 
				+                trt_info = self._config.hpi_info.backend_configs.tensorrt
			
 
				+                if trt_info is not None and trt_info.dynamic_shapes is not None:
			
 
				+                    trt_dynamic_shapes = trt_info.dynamic_shapes
			
 
				+                    logging.debug(
			
 
				+                        "TensorRT dynamic shapes set to %s", trt_dynamic_shapes
			
 
				+                    )
			
 
				+                    backend_config = {
			
 
				+                        **backend_config,
			
 
				+                        "dynamic_shapes": trt_dynamic_shapes,
			
 
				+                    }
			
 
				+            backend_config = TensorRTConfig.model_validate(backend_config)
			
 
				+            ui_option.use_trt_backend()
			
 
				+            cache_dir = self.model_dir / CACHE_DIR / "tensorrt"
			
 
				+            cache_dir.mkdir(parents=True, exist_ok=True)
			
 
				+            ui_option.trt_option.serialize_file = str(cache_dir / "trt_serialized.trt")
			
 
				+            if backend_config.precision == "FP16":
			
 
				+                ui_option.trt_option.enable_fp16 = True
			
 
				+            if not backend_config.use_dynamic_shapes:
			
 
				+                raise RuntimeError(
			
 
				+                    "TensorRT static shape inference is currently not supported"
			
 
				+                )
			
 
				+            if backend_config.dynamic_shapes is not None:
			
 
				+                if not Path(ui_option.trt_option.serialize_file).exists():
			
 
				+                    for name, shapes in backend_config.dynamic_shapes.items():
			
 
				+                        ui_option.trt_option.set_shape(name, *shapes)
			
 
				+                else:
			
 
				+                    logging.warning(
			
 
				+                        "TensorRT dynamic shapes will be loaded from the file."
			
 
				+                    )
			
 
				+        elif backend == "om":
			
 
				+            backend_config = OMConfig.model_validate(backend_config)
			
 
				+            ui_option.use_om_backend()
			
 
				+        else:
			
 
				+            raise ValueError(f"Unsupported inference backend {repr(backend)}")
			
 
				+
			
 
				+        logging.info("Inference backend: %s", backend)
			
 
				+        logging.info("Inference backend config: %s", backend_config)
			
 
				+
			
 
				+        ui_runtime = Runtime(ui_option)
			
 
				+
			
 
				+        return ui_runtime
			
 
				+
			
 
				+    def _check_paddle2onnx(self):
			
 
				+        # HACK
			
 
				+        return importlib.util.find_spec("paddle2onnx") is not None
			
--- a/paddlex/inference/models/formula_recognition/predictor.py
+++ b/paddlex/inference/models/formula_recognition/predictor.py
@@ -18,10 +18,7 @@ from ....utils.func_register import FuncRegister
 
				 from ....modules.formula_recognition.model_list import MODELS
			
 
				 from ...common.batch_sampler import ImageBatchSampler
			
 
				 from ...common.reader import ReadImage
			
 
				-from ..common import (
			
 
				-    StaticInfer,
			
 
				-)
			
 
				-from ..base import BasicPredictor
			
 
				+from ..base import BasePredictor
			
 
				 from .processors import (
			
 
				     MinMaxResize,
			
 
				     LatexTestTransform,
			
@@ -38,8 +35,8 @@ from .processors import (
 
				 from .result import FormulaRecResult
			
 
				 
			
 
				 
			
 
				-class FormulaRecPredictor(BasicPredictor):
			
 
				-    """FormulaRecPredictor that inherits from BasicPredictor."""
			
 
				+class FormulaRecPredictor(BasePredictor):
			
 
				+    """FormulaRecPredictor that inherits from BasePredictor."""
			
 
				 
			
 
				     entities = MODELS
			
 
				 
			
@@ -84,11 +81,7 @@ class FormulaRecPredictor(BasicPredictor):
 
				                 pre_tfs[name] = op
			
 
				         pre_tfs["ToBatch"] = ToBatch()
			
 
				 
			
 
				-        infer = StaticInfer(
			
 
				-            model_dir=self.model_dir,
			
 
				-            model_prefix=self.MODEL_FILE_PREFIX,
			
 
				-            option=self.pp_option,
			
 
				-        )
			
 
				+        infer = self.create_static_infer()
			
 
				 
			
 
				         post_op = self.build_postprocess(**self.config["PostProcess"])
			
 
				         return pre_tfs, infer, post_op
			
--- a/paddlex/inference/models/image_classification/predictor.py
+++ b/paddlex/inference/models/image_classification/predictor.py
@@ -25,15 +25,14 @@ from ..common import (
 
				     Normalize,
			
 
				     ToCHWImage,
			
 
				     ToBatch,
			
 
				-    StaticInfer,
			
 
				 )
			
 
				-from ..base import BasicPredictor
			
 
				+from ..base import BasePredictor
			
 
				 from .processors import Crop, Topk
			
 
				 from .result import TopkResult
			
 
				 
			
 
				 
			
 
				-class ClasPredictor(BasicPredictor):
			
 
				-    """ClasPredictor that inherits from BasicPredictor."""
			
 
				+class ClasPredictor(BasePredictor):
			
 
				+    """ClasPredictor that inherits from BasePredictor."""
			
 
				 
			
 
				     entities = MODELS
			
 
				 
			
@@ -85,11 +84,7 @@ class ClasPredictor(BasicPredictor):
 
				             preprocessors[name] = op
			
 
				         preprocessors["ToBatch"] = ToBatch()
			
 
				 
			
 
				-        infer = StaticInfer(
			
 
				-            model_dir=self.model_dir,
			
 
				-            model_prefix=self.MODEL_FILE_PREFIX,
			
 
				-            option=self.pp_option,
			
 
				-        )
			
 
				+        infer = self.create_static_infer()
			
 
				 
			
 
				         postprocessors = {}
			
 
				         for key in self.config["PostProcess"]:
			
--- a/paddlex/inference/models/image_feature/predictor.py
+++ b/paddlex/inference/models/image_feature/predictor.py
@@ -25,15 +25,14 @@ from ..common import (
 
				     Normalize,
			
 
				     ToCHWImage,
			
 
				     ToBatch,
			
 
				-    StaticInfer,
			
 
				 )
			
 
				-from ..base import BasicPredictor
			
 
				+from ..base import BasePredictor
			
 
				 from .processors import NormalizeFeatures
			
 
				 from .result import IdentityResult
			
 
				 
			
 
				 
			
 
				-class ImageFeaturePredictor(BasicPredictor):
			
 
				-    """ImageFeaturePredictor that inherits from BasicPredictor."""
			
 
				+class ImageFeaturePredictor(BasePredictor):
			
 
				+    """ImageFeaturePredictor that inherits from BasePredictor."""
			
 
				 
			
 
				     entities = MODELS
			
 
				 
			
@@ -83,11 +82,7 @@ class ImageFeaturePredictor(BasicPredictor):
 
				             preprocessors[name] = op
			
 
				         preprocessors["ToBatch"] = ToBatch()
			
 
				 
			
 
				-        infer = StaticInfer(
			
 
				-            model_dir=self.model_dir,
			
 
				-            model_prefix=self.MODEL_FILE_PREFIX,
			
 
				-            option=self.pp_option,
			
 
				-        )
			
 
				+        infer = self.create_static_infer()
			
 
				 
			
 
				         postprocessors = {}
			
 
				         for key in self.config["PostProcess"]:
			
--- a/paddlex/inference/models/image_multilabel_classification/predictor.py
+++ b/paddlex/inference/models/image_multilabel_classification/predictor.py
@@ -22,7 +22,7 @@ from ....modules.multilabel_classification.model_list import MODELS
 
				 
			
 
				 
			
 
				 class MLClasPredictor(ClasPredictor):
			
 
				-    """MLClasPredictor that inherits from BasicPredictor."""
			
 
				+    """MLClasPredictor that inherits from BasePredictor."""
			
 
				 
			
 
				     entities = MODELS
			
 
				 
			
--- a/paddlex/inference/models/image_unwarping/predictor.py
+++ b/paddlex/inference/models/image_unwarping/predictor.py
@@ -22,15 +22,14 @@ from ..common import (
 
				     Normalize,
			
 
				     ToCHWImage,
			
 
				     ToBatch,
			
 
				-    StaticInfer,
			
 
				 )
			
 
				-from ..base import BasicPredictor
			
 
				+from ..base import BasePredictor
			
 
				 from .processors import DocTrPostProcess
			
 
				 from .result import DocTrResult
			
 
				 
			
 
				 
			
 
				-class WarpPredictor(BasicPredictor):
			
 
				-    """WarpPredictor that inherits from BasicPredictor."""
			
 
				+class WarpPredictor(BasePredictor):
			
 
				+    """WarpPredictor that inherits from BasePredictor."""
			
 
				 
			
 
				     entities = MODELS
			
 
				 
			
@@ -71,11 +70,7 @@ class WarpPredictor(BasicPredictor):
 
				         preprocessors["ToCHW"] = ToCHWImage()
			
 
				         preprocessors["ToBatch"] = ToBatch()
			
 
				 
			
 
				-        infer = StaticInfer(
			
 
				-            model_dir=self.model_dir,
			
 
				-            model_prefix=self.MODEL_FILE_PREFIX,
			
 
				-            option=self.pp_option,
			
 
				-        )
			
 
				+        infer = self.create_static_infer()
			
 
				 
			
 
				         postprocessors = {"DocTrPostProcess": DocTrPostProcess()}
			
 
				         return preprocessors, infer, postprocessors
			
--- a/paddlex/inference/models/instance_segmentation/predictor.py
+++ b/paddlex/inference/models/instance_segmentation/predictor.py
@@ -17,7 +17,6 @@ import numpy as np
 
				 
			
 
				 from ....modules.instance_segmentation.model_list import MODELS
			
 
				 from ...common.batch_sampler import ImageBatchSampler
			
 
				-from ..common import StaticInfer
			
 
				 from ..object_detection.processors import (
			
 
				     ReadImage,
			
 
				     ToBatch,
			
@@ -85,11 +84,7 @@ class InstanceSegPredictor(DetPredictor):
 
				         pre_ops.append(self.build_to_batch())
			
 
				 
			
 
				         # build infer
			
 
				-        infer = StaticInfer(
			
 
				-            model_dir=self.model_dir,
			
 
				-            model_prefix=self.MODEL_FILE_PREFIX,
			
 
				-            option=self.pp_option,
			
 
				-        )
			
 
				+        infer = self.create_static_infer()
			
 
				 
			
 
				         # build postprocess op
			
 
				         post_op = self.build_postprocess()
			
--- a/paddlex/inference/models/multilingual_speech_recognition/predictor.py
+++ b/paddlex/inference/models/multilingual_speech_recognition/predictor.py
@@ -18,14 +18,14 @@ import numpy as np
 
				 from ....utils.func_register import FuncRegister
			
 
				 from ...common.batch_sampler import AudioBatchSampler
			
 
				 
			
 
				-from ..base import BasicPredictor
			
 
				+from ..base import BasePredictor
			
 
				 from .result import WhisperResult
			
 
				 from ...utils.io import AudioReader
			
 
				 from ....modules.multilingual_speech_recognition.model_list import MODELS
			
 
				 from ....utils.download import download_and_extract
			
 
				 
			
 
				 
			
 
				-class WhisperPredictor(BasicPredictor):
			
 
				+class WhisperPredictor(BasePredictor):
			
 
				 
			
 
				     entities = MODELS
			
 
				 
			
--- a/paddlex/inference/models/object_detection/predictor.py
+++ b/paddlex/inference/models/object_detection/predictor.py
@@ -20,8 +20,7 @@ from ....utils.func_register import FuncRegister
 
				 from ....modules.object_detection.model_list import MODELS
			
 
				 from ...common.batch_sampler import ImageBatchSampler
			
 
				 
			
 
				-from ..common import StaticInfer
			
 
				-from ..base import BasicPredictor
			
 
				+from ..base import BasePredictor
			
 
				 from .processors import (
			
 
				     DetPad,
			
 
				     DetPostProcess,
			
@@ -37,7 +36,7 @@ from .result import DetResult
 
				 from .utils import STATIC_SHAPE_MODEL_LIST
			
 
				 
			
 
				 
			
 
				-class DetPredictor(BasicPredictor):
			
 
				+class DetPredictor(BasePredictor):
			
 
				 
			
 
				     entities = MODELS
			
 
				 
			
@@ -142,11 +141,7 @@ class DetPredictor(BasicPredictor):
 
				             pre_ops.insert(1, self.build_resize(self.img_size, False, 2))
			
 
				 
			
 
				         # build infer
			
 
				-        infer = StaticInfer(
			
 
				-            model_dir=self.model_dir,
			
 
				-            model_prefix=self.MODEL_FILE_PREFIX,
			
 
				-            option=self.pp_option,
			
 
				-        )
			
 
				+        infer = self.create_static_infer()
			
 
				 
			
 
				         # build postprocess op
			
 
				         post_op = self.build_postprocess()
			
--- a/paddlex/inference/models/open_vocabulary_detection/predictor.py
+++ b/paddlex/inference/models/open_vocabulary_detection/predictor.py
@@ -27,11 +27,11 @@ from .processors import (
 
				     YOLOWorldPostProcessor,
			
 
				 )
			
 
				 from ..common import StaticInfer
			
 
				-from ..base import BasicPredictor
			
 
				+from ..base import BasePredictor
			
 
				 from ..object_detection.result import DetResult
			
 
				 
			
 
				 
			
 
				-class OVDetPredictor(BasicPredictor):
			
 
				+class OVDetPredictor(BasePredictor):
			
 
				 
			
 
				     entities = MODELS
			
 
				 
			
@@ -73,11 +73,7 @@ class OVDetPredictor(BasicPredictor):
 
				                 pre_ops.append(op)
			
 
				 
			
 
				         # build infer
			
 
				-        infer = StaticInfer(
			
 
				-            model_dir=self.model_dir,
			
 
				-            model_prefix=self.MODEL_FILE_PREFIX,
			
 
				-            option=self.pp_option,
			
 
				-        )
			
 
				+        infer = self.create_static_infer()
			
 
				 
			
 
				         # build postprocess op
			
 
				         post_op = self.build_postprocess(pre_ops=pre_ops)
			
--- a/paddlex/inference/models/open_vocabulary_detection/processors/groundingdino_processors.py
+++ b/paddlex/inference/models/open_vocabulary_detection/processors/groundingdino_processors.py
@@ -19,8 +19,8 @@ import numpy as np
 
				 import PIL
			
 
				 
			
 
				 from ...common.tokenizer.bert_tokenizer import BertTokenizer
			
 
				-from .....utils.lazy_loader import LazyLoader
			
 
				 from ....utils.benchmark import benchmark
			
 
				+from .....utils.lazy_loader import LazyLoader
			
 
				 
			
 
				 # NOTE: LazyLoader is used to avoid conflicts between ultra-infer and Paddle
			
 
				 paddle = LazyLoader("lazy_paddle", globals(), "paddle")
			
--- a/paddlex/inference/models/open_vocabulary_segmentation/predictor.py
+++ b/paddlex/inference/models/open_vocabulary_segmentation/predictor.py
@@ -23,11 +23,11 @@ from ...common.batch_sampler import ImageBatchSampler
 
				 from ...common.reader import ReadImage
			
 
				 from .processors import SAMProcessor
			
 
				 from ..common import StaticInfer
			
 
				-from ..base import BasicPredictor
			
 
				+from ..base import BasePredictor
			
 
				 from .results import SAMSegResult
			
 
				 
			
 
				 
			
 
				-class OVSegPredictor(BasicPredictor):
			
 
				+class OVSegPredictor(BasePredictor):
			
 
				 
			
 
				     entities = MODELS
			
 
				 
			
@@ -62,11 +62,7 @@ class OVSegPredictor(BasicPredictor):
 
				                 pre_ops.append(op)
			
 
				 
			
 
				         # build infer
			
 
				-        infer = StaticInfer(
			
 
				-            model_dir=self.model_dir,
			
 
				-            model_prefix=self.MODEL_FILE_PREFIX,
			
 
				-            option=self.pp_option,
			
 
				-        )
			
 
				+        infer = self.create_static_infer()
			
 
				 
			
 
				         # build model specific processor, it's required for a OV model.
			
 
				         processor_cfg = self.config["Processor"]
			
--- a/paddlex/inference/models/open_vocabulary_segmentation/processors/sam_processer.py
+++ b/paddlex/inference/models/open_vocabulary_segmentation/processors/sam_processer.py
@@ -20,13 +20,14 @@ import PIL
 
				 from copy import deepcopy
			
 
				 
			
 
				 from .....utils.lazy_loader import LazyLoader
			
 
				-from ....utils.benchmark import benchmark
			
 
				 
			
 
				 # NOTE: LazyLoader is used to avoid conflicts between ultra-infer and Paddle
			
 
				 paddle = LazyLoader("lazy_paddle", globals(), "paddle")
			
 
				 T = LazyLoader("T", globals(), "paddle.vision.transforms")
			
 
				 F = LazyLoader("F", globals(), "paddle.nn.functional")
			
 
				 
			
 
				+from ....utils.benchmark import benchmark
			
 
				+
			
 
				 
			
 
				 def _get_preprocess_shape(
			
 
				     oldh: int, oldw: int, long_side_length: int
			
--- a/paddlex/inference/models/semantic_segmentation/predictor.py
+++ b/paddlex/inference/models/semantic_segmentation/predictor.py
@@ -24,15 +24,14 @@ from ..common import (
 
				     Normalize,
			
 
				     ToCHWImage,
			
 
				     ToBatch,
			
 
				-    StaticInfer,
			
 
				 )
			
 
				 from .processors import Resize, SegPostProcess
			
 
				-from ..base import BasicPredictor
			
 
				+from ..base import BasePredictor
			
 
				 from .result import SegResult
			
 
				 
			
 
				 
			
 
				-class SegPredictor(BasicPredictor):
			
 
				-    """SegPredictor that inherits from BasicPredictor."""
			
 
				+class SegPredictor(BasePredictor):
			
 
				+    """SegPredictor that inherits from BasePredictor."""
			
 
				 
			
 
				     entities = MODELS
			
 
				 
			
@@ -95,11 +94,7 @@ class SegPredictor(BasicPredictor):
 
				             _, op = self._FUNC_MAP["Resize"](self, target_size=self.target_size)
			
 
				             preprocessors["Resize"] = op
			
 
				 
			
 
				-        infer = StaticInfer(
			
 
				-            model_dir=self.model_dir,
			
 
				-            model_prefix=self.MODEL_FILE_PREFIX,
			
 
				-            option=self.pp_option,
			
 
				-        )
			
 
				+        infer = self.create_static_infer()
			
 
				 
			
 
				         postprocessers = SegPostProcess()
			
 
				 
			
--- a/paddlex/inference/models/table_structure_recognition/predictor.py
+++ b/paddlex/inference/models/table_structure_recognition/predictor.py
@@ -25,14 +25,13 @@ from ..common import (
 
				     Normalize,
			
 
				     ToCHWImage,
			
 
				     ToBatch,
			
 
				-    StaticInfer,
			
 
				 )
			
 
				-from ..base import BasicPredictor
			
 
				+from ..base import BasePredictor
			
 
				 from .processors import Pad, TableLabelDecode
			
 
				 from .result import TableRecResult
			
 
				 
			
 
				 
			
 
				-class TablePredictor(BasicPredictor):
			
 
				+class TablePredictor(BasePredictor):
			
 
				     entities = MODELS
			
 
				 
			
 
				     _FUNC_MAP = {}
			
@@ -59,11 +58,7 @@ class TablePredictor(BasicPredictor):
 
				                 preprocessors.append(op)
			
 
				         preprocessors.append(ToBatch())
			
 
				 
			
 
				-        infer = StaticInfer(
			
 
				-            model_dir=self.model_dir,
			
 
				-            model_prefix=self.MODEL_FILE_PREFIX,
			
 
				-            option=self.pp_option,
			
 
				-        )
			
 
				+        infer = self.create_static_infer()
			
 
				 
			
 
				         postprocessors = TableLabelDecode(
			
 
				             model_name=self.config["Global"]["model_name"],
			
--- a/paddlex/inference/models/text_detection/predictor.py
+++ b/paddlex/inference/models/text_detection/predictor.py
@@ -25,14 +25,13 @@ from ..common import (
 
				     Normalize,
			
 
				     ToCHWImage,
			
 
				     ToBatch,
			
 
				-    StaticInfer,
			
 
				 )
			
 
				-from ..base import BasicPredictor
			
 
				+from ..base import BasePredictor
			
 
				 from .processors import DetResizeForTest, NormalizeImage, DBPostProcess
			
 
				 from .result import TextDetResult
			
 
				 
			
 
				 
			
 
				-class TextDetPredictor(BasicPredictor):
			
 
				+class TextDetPredictor(BasePredictor):
			
 
				 
			
 
				     entities = MODELS
			
 
				 
			
@@ -46,6 +45,7 @@ class TextDetPredictor(BasicPredictor):
 
				         thresh: Union[float, None] = None,
			
 
				         box_thresh: Union[float, None] = None,
			
 
				         unclip_ratio: Union[float, None] = None,
			
 
				+        input_shape=None,
			
 
				         *args,
			
 
				         **kwargs
			
 
				     ):
			
@@ -56,6 +56,7 @@ class TextDetPredictor(BasicPredictor):
 
				         self.thresh = thresh
			
 
				         self.box_thresh = box_thresh
			
 
				         self.unclip_ratio = unclip_ratio
			
 
				+        self.input_shape = input_shape
			
 
				         self.pre_tfs, self.infer, self.post_op = self._build()
			
 
				 
			
 
				     def _build_batch_sampler(self):
			
@@ -76,11 +77,7 @@ class TextDetPredictor(BasicPredictor):
 
				                 pre_tfs[name] = op
			
 
				         pre_tfs["ToBatch"] = ToBatch()
			
 
				 
			
 
				-        infer = StaticInfer(
			
 
				-            model_dir=self.model_dir,
			
 
				-            model_prefix=self.MODEL_FILE_PREFIX,
			
 
				-            option=self.pp_option,
			
 
				-        )
			
 
				+        infer = self.create_static_infer()
			
 
				 
			
 
				         post_op = self.build_postprocess(**self.config["PostProcess"])
			
 
				         return pre_tfs, infer, post_op
			
@@ -147,7 +144,10 @@ class TextDetPredictor(BasicPredictor):
 
				             limit_type = self.limit_type or kwargs.get("limit_type", "min")
			
 
				 
			
 
				         return "Resize", DetResizeForTest(
			
 
				-            limit_side_len=limit_side_len, limit_type=limit_type, **kwargs
			
 
				+            limit_side_len=limit_side_len,
			
 
				+            limit_type=limit_type,
			
 
				+            input_shape=self.input_shape,
			
 
				+            **kwargs
			
 
				         )
			
 
				 
			
 
				     @register("NormalizeImage")
			
--- a/paddlex/inference/models/text_detection/processors.py
+++ b/paddlex/inference/models/text_detection/processors.py
@@ -33,11 +33,14 @@ from ...utils.benchmark import benchmark
 
				 class DetResizeForTest:
			
 
				     """DetResizeForTest"""
			
 
				 
			
 
				-    def __init__(self, **kwargs):
			
 
				+    def __init__(self, input_shape=None, **kwargs):
			
 
				         super().__init__()
			
 
				         self.resize_type = 0
			
 
				         self.keep_ratio = False
			
 
				-        if "image_shape" in kwargs:
			
 
				+        if input_shape is not None:
			
 
				+            self.input_shape = input_shape
			
 
				+            self.resize_type = 3
			
 
				+        elif "image_shape" in kwargs:
			
 
				             self.image_shape = kwargs["image_shape"]
			
 
				             self.resize_type = 1
			
 
				             if "keep_ratio" in kwargs:
			
@@ -80,6 +83,8 @@ class DetResizeForTest:
 
				             )
			
 
				         elif self.resize_type == 2:
			
 
				             img, [ratio_h, ratio_w] = self.resize_image_type2(img)
			
 
				+        elif self.resize_type == 3:
			
 
				+            img, [ratio_h, ratio_w] = self.resize_image_type3(img)
			
 
				         else:
			
 
				             # img, shape = self.resize_image_type1(img)
			
 
				             img, [ratio_h, ratio_w] = self.resize_image_type1(img)
			
@@ -182,6 +187,15 @@ class DetResizeForTest:
 
				 
			
 
				         return img, [ratio_h, ratio_w]
			
 
				 
			
 
				+    def resize_image_type3(self, img):
			
 
				+        """resize the image"""
			
 
				+        resize_c, resize_h, resize_w = self.input_shape  # (c, h, w)
			
 
				+        ori_h, ori_w = img.shape[:2]  # (h, w, c)
			
 
				+        ratio_h = float(resize_h) / ori_h
			
 
				+        ratio_w = float(resize_w) / ori_w
			
 
				+        img = cv2.resize(img, (int(resize_w), int(resize_h)))
			
 
				+        return img, [ratio_h, ratio_w]
			
 
				+
			
 
				 
			
 
				 @benchmark.timeit
			
 
				 class NormalizeImage:
			
--- a/paddlex/inference/models/text_recognition/predictor.py
+++ b/paddlex/inference/models/text_recognition/predictor.py
@@ -21,22 +21,22 @@ from ..common import (
 
				     ResizeByShort,
			
 
				     Normalize,
			
 
				     ToCHWImage,
			
 
				-    StaticInfer,
			
 
				 )
			
 
				-from ..base import BasicPredictor
			
 
				+from ..base import BasePredictor
			
 
				 from .processors import OCRReisizeNormImg, CTCLabelDecode, ToBatch
			
 
				 from .result import TextRecResult
			
 
				 
			
 
				 
			
 
				-class TextRecPredictor(BasicPredictor):
			
 
				+class TextRecPredictor(BasePredictor):
			
 
				 
			
 
				     entities = MODELS
			
 
				 
			
 
				     _FUNC_MAP = {}
			
 
				     register = FuncRegister(_FUNC_MAP)
			
 
				 
			
 
				-    def __init__(self, *args, **kwargs):
			
 
				+    def __init__(self, *args, input_shape=None, **kwargs):
			
 
				         super().__init__(*args, **kwargs)
			
 
				+        self.input_shape = input_shape
			
 
				         self.pre_tfs, self.infer, self.post_op = self._build()
			
 
				 
			
 
				     def _build_batch_sampler(self):
			
@@ -57,11 +57,7 @@ class TextRecPredictor(BasicPredictor):
 
				                 pre_tfs[name] = op
			
 
				         pre_tfs["ToBatch"] = ToBatch()
			
 
				 
			
 
				-        infer = StaticInfer(
			
 
				-            model_dir=self.model_dir,
			
 
				-            model_prefix=self.MODEL_FILE_PREFIX,
			
 
				-            option=self.pp_option,
			
 
				-        )
			
 
				+        infer = self.create_static_infer()
			
 
				 
			
 
				         post_op = self.build_postprocess(**self.config["PostProcess"])
			
 
				         return pre_tfs, infer, post_op
			
@@ -87,7 +83,9 @@ class TextRecPredictor(BasicPredictor):
 
				 
			
 
				     @register("RecResizeImg")
			
 
				     def build_resize(self, image_shape):
			
 
				-        return "ReisizeNorm", OCRReisizeNormImg(rec_image_shape=image_shape)
			
 
				+        return "ReisizeNorm", OCRReisizeNormImg(
			
 
				+            rec_image_shape=image_shape, input_shape=self.input_shape
			
 
				+        )
			
 
				 
			
 
				     def build_postprocess(self, **kwargs):
			
 
				         if kwargs.get("name") == "CTCLabelDecode":
			
--- a/paddlex/inference/models/text_recognition/processors.py
+++ b/paddlex/inference/models/text_recognition/processors.py
@@ -34,9 +34,10 @@ from ...utils.benchmark import benchmark
 
				 class OCRReisizeNormImg:
			
 
				     """for ocr image resize and normalization"""
			
 
				 
			
 
				-    def __init__(self, rec_image_shape=[3, 48, 320]):
			
 
				+    def __init__(self, rec_image_shape=[3, 48, 320], input_shape=None):
			
 
				         super().__init__()
			
 
				         self.rec_image_shape = rec_image_shape
			
 
				+        self.input_shape = input_shape
			
 
				         self.max_imgW = 3200
			
 
				 
			
 
				     def resize_norm_img(self, img, max_wh_ratio):
			
@@ -66,7 +67,10 @@ class OCRReisizeNormImg:
 
				 
			
 
				     def __call__(self, imgs):
			
 
				         """apply"""
			
 
				-        return [self.resize(img) for img in imgs]
			
 
				+        if self.input_shape is None:
			
 
				+            return [self.resize(img) for img in imgs]
			
 
				+        else:
			
 
				+            return [self.staticResize(img) for img in imgs]
			
 
				 
			
 
				     def resize(self, img):
			
 
				         imgC, imgH, imgW = self.rec_image_shape
			
@@ -77,6 +81,14 @@ class OCRReisizeNormImg:
 
				         img = self.resize_norm_img(img, max_wh_ratio)
			
 
				         return img
			
 
				 
			
 
				+    def staticResize(self, img):
			
 
				+        imgC, imgH, imgW = self.input_shape
			
 
				+        resized_image = cv2.resize(img, (int(imgW), int(imgH)))
			
 
				+        resized_image = resized_image.transpose((2, 0, 1)) / 255
			
 
				+        resized_image -= 0.5
			
 
				+        resized_image /= 0.5
			
 
				+        return resized_image
			
 
				+
			
 
				 
			
 
				 @benchmark.timeit
			
 
				 class BaseRecLabelDecode:
			
--- a/paddlex/inference/models/ts_anomaly_detection/predictor.py
+++ b/paddlex/inference/models/ts_anomaly_detection/predictor.py
@@ -27,15 +27,14 @@ from ..common import (
 
				     TimeFeature,
			
 
				     TStoArray,
			
 
				     TStoBatch,
			
 
				-    StaticInfer,
			
 
				 )
			
 
				 from .processors import GetAnomaly
			
 
				-from ..base import BasicPredictor
			
 
				+from ..base import BasePredictor
			
 
				 from .result import TSAdResult
			
 
				 
			
 
				 
			
 
				-class TSAdPredictor(BasicPredictor):
			
 
				-    """TSAdPredictor that inherits from BasicPredictor."""
			
 
				+class TSAdPredictor(BasePredictor):
			
 
				+    """TSAdPredictor that inherits from BasePredictor."""
			
 
				 
			
 
				     entities = MODELS
			
 
				 
			
@@ -94,11 +93,7 @@ class TSAdPredictor(BasicPredictor):
 
				             )
			
 
				         preprocessors["TStoArray"] = TStoArray(self.config["input_data"])
			
 
				         preprocessors["TStoBatch"] = TStoBatch()
			
 
				-        infer = StaticInfer(
			
 
				-            model_dir=self.model_dir,
			
 
				-            model_prefix=self.MODEL_FILE_PREFIX,
			
 
				-            option=self.pp_option,
			
 
				-        )
			
 
				+        infer = self.create_static_infer()
			
 
				         postprocessors = {}
			
 
				         postprocessors["GetAnomaly"] = GetAnomaly(
			
 
				             self.config["model_threshold"], self.config["info_params"]
			
--- a/paddlex/inference/models/ts_classification/predictor.py
+++ b/paddlex/inference/models/ts_classification/predictor.py
@@ -28,16 +28,15 @@ from ..common import (
 
				     TimeFeature,
			
 
				     TStoArray,
			
 
				     TStoBatch,
			
 
				-    StaticInfer,
			
 
				 )
			
 
				 
			
 
				 from .processors import GetCls, BuildPadMask
			
 
				-from ..base import BasicPredictor
			
 
				+from ..base import BasePredictor
			
 
				 from .result import TSClsResult
			
 
				 
			
 
				 
			
 
				-class TSClsPredictor(BasicPredictor):
			
 
				-    """TSClsPredictor that inherits from BasicPredictor."""
			
 
				+class TSClsPredictor(BasePredictor):
			
 
				+    """TSClsPredictor that inherits from BasePredictor."""
			
 
				 
			
 
				     entities = MODELS
			
 
				 
			
@@ -90,11 +89,7 @@ class TSClsPredictor(BasicPredictor):
 
				         preprocessors["BuildPadMask"] = BuildPadMask(self.config["input_data"])
			
 
				         preprocessors["TStoArray"] = TStoArray(self.config["input_data"])
			
 
				         preprocessors["TStoBatch"] = TStoBatch()
			
 
				-        infer = StaticInfer(
			
 
				-            model_dir=self.model_dir,
			
 
				-            model_prefix=self.MODEL_FILE_PREFIX,
			
 
				-            option=self.pp_option,
			
 
				-        )
			
 
				+        infer = self.create_static_infer()
			
 
				         postprocessors = {}
			
 
				         postprocessors["GetCls"] = GetCls()
			
 
				         return preprocessors, infer, postprocessors
			
--- a/paddlex/inference/models/ts_forecasting/predictor.py
+++ b/paddlex/inference/models/ts_forecasting/predictor.py
@@ -28,15 +28,14 @@ from ..common import (
 
				     TimeFeature,
			
 
				     TStoArray,
			
 
				     TStoBatch,
			
 
				-    StaticInfer,
			
 
				 )
			
 
				 from .processors import ArraytoTS, TSDeNormalize
			
 
				-from ..base import BasicPredictor
			
 
				+from ..base import BasePredictor
			
 
				 from .result import TSFcResult
			
 
				 
			
 
				 
			
 
				-class TSFcPredictor(BasicPredictor):
			
 
				-    """TSFcPredictor that inherits from BasicPredictor."""
			
 
				+class TSFcPredictor(BasePredictor):
			
 
				+    """TSFcPredictor that inherits from BasePredictor."""
			
 
				 
			
 
				     entities = MODELS
			
 
				 
			
@@ -95,11 +94,7 @@ class TSFcPredictor(BasicPredictor):
 
				             )
			
 
				         preprocessors["TStoArray"] = TStoArray(self.config["input_data"])
			
 
				         preprocessors["TStoBatch"] = TStoBatch()
			
 
				-        infer = StaticInfer(
			
 
				-            model_dir=self.model_dir,
			
 
				-            model_prefix=self.MODEL_FILE_PREFIX,
			
 
				-            option=self.pp_option,
			
 
				-        )
			
 
				+        infer = self.create_static_infer()
			
 
				         postprocessors = {}
			
 
				         postprocessors["ArraytoTS"] = ArraytoTS(self.config["info_params"])
			
 
				         if self.config.get("scale", None):
			
--- a/paddlex/inference/models/video_classification/predictor.py
+++ b/paddlex/inference/models/video_classification/predictor.py
@@ -17,10 +17,7 @@ from ....utils.func_register import FuncRegister
 
				 from ....modules.video_classification.model_list import MODELS
			
 
				 from ...common.batch_sampler import VideoBatchSampler
			
 
				 from ...common.reader import ReadVideo
			
 
				-from ..common import (
			
 
				-    StaticInfer,
			
 
				-)
			
 
				-from ..base import BasicPredictor
			
 
				+from ..base import BasePredictor
			
 
				 from .processors import (
			
 
				     Scale,
			
 
				     CenterCrop,
			
@@ -32,7 +29,7 @@ from .processors import (
 
				 from .result import TopkVideoResult
			
 
				 
			
 
				 
			
 
				-class VideoClasPredictor(BasicPredictor):
			
 
				+class VideoClasPredictor(BasePredictor):
			
 
				 
			
 
				     entities = MODELS
			
 
				 
			
@@ -62,11 +59,7 @@ class VideoClasPredictor(BasicPredictor):
 
				                 pre_tfs[name] = op
			
 
				         pre_tfs["ToBatch"] = ToBatch()
			
 
				 
			
 
				-        infer = StaticInfer(
			
 
				-            model_dir=self.model_dir,
			
 
				-            model_prefix=self.MODEL_FILE_PREFIX,
			
 
				-            option=self.pp_option,
			
 
				-        )
			
 
				+        infer = self.create_static_infer()
			
 
				 
			
 
				         post_op = {}
			
 
				         for key in self.config["PostProcess"]:
			
--- a/paddlex/inference/models/video_detection/predictor.py
+++ b/paddlex/inference/models/video_detection/predictor.py
@@ -21,12 +21,12 @@ from ..common import (
 
				     ToBatch,
			
 
				     StaticInfer,
			
 
				 )
			
 
				-from ..base import BasicPredictor
			
 
				+from ..base import BasePredictor
			
 
				 from .processors import ResizeVideo, Image2Array, NormalizeVideo, DetVideoPostProcess
			
 
				 from .result import DetVideoResult
			
 
				 
			
 
				 
			
 
				-class VideoDetPredictor(BasicPredictor):
			
 
				+class VideoDetPredictor(BasePredictor):
			
 
				 
			
 
				     entities = MODELS
			
 
				 
			
@@ -62,11 +62,7 @@ class VideoDetPredictor(BasicPredictor):
 
				             if op:
			
 
				                 pre_tfs[name] = op
			
 
				 
			
 
				-        infer = StaticInfer(
			
 
				-            model_dir=self.model_dir,
			
 
				-            model_prefix=self.MODEL_FILE_PREFIX,
			
 
				-            option=self.pp_option,
			
 
				-        )
			
 
				+        infer = self.create_static_infer()
			
 
				         post_op = {}
			
 
				         for cfg in self.config["PostProcess"]["transform_ops"]:
			
 
				             tf_key = list(cfg.keys())[0]
			
--- a/paddlex/inference/pipelines/3d_bev_detection/pipeline.py
+++ b/paddlex/inference/pipelines/3d_bev_detection/pipeline.py
@@ -16,6 +16,7 @@ from typing import Any, Dict, Optional, Union, List
 
				 import numpy as np
			
 
				 from importlib import import_module
			
 
				 from ...utils.pp_option import PaddlePredictorOption
			
 
				+from ...utils.hpi import HPIConfig
			
 
				 from ..base import BasePipeline
			
 
				 
			
 
				 module_3d_bev_detection_result = import_module(
			
@@ -35,6 +36,7 @@ class BEVDet3DPipeline(BasePipeline):
 
				         device: str = None,
			
 
				         pp_option: PaddlePredictorOption = None,
			
 
				         use_hpip: bool = False,
			
 
				+        hpi_config: Optional[Union[Dict[str, Any], HPIConfig]] = None,
			
 
				     ) -> None:
			
 
				         """
			
 
				         Initializes the class with given configurations and options.
			
@@ -43,9 +45,15 @@ class BEVDet3DPipeline(BasePipeline):
 
				             config (Dict): Configuration dictionary containing model and other parameters.
			
 
				             device (str): The device to run the prediction on. Default is None.
			
 
				             pp_option (PaddlePredictorOption): Options for PaddlePaddle predictor. Default is None.
			
 
				-            use_hpip (bool): Whether to use high-performance inference (hpip) for prediction. Defaults to False.
			
 
				+            use_hpip (bool, optional): Whether to use the high-performance
			
 
				+                inference plugin (HPIP) by default. Defaults to False.
			
 
				+            hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
			
 
				+                The default high-performance inference configuration dictionary.
			
 
				+                Defaults to None.
			
 
				         """
			
 
				-        super().__init__(device=device, pp_option=pp_option, use_hpip=use_hpip)
			
 
				+        super().__init__(
			
 
				+            device=device, pp_option=pp_option, use_hpip=use_hpip, hpi_config=hpi_config
			
 
				+        )
			
 
				 
			
 
				         bev_detection_3d_model_config = config["SubModules"]["3DBEVDetection"]
			
 
				         self.bev_detection_3d_model = self.create_model(bev_detection_3d_model_config)
			
--- a/paddlex/inference/pipelines/__init__.py
+++ b/paddlex/inference/pipelines/__init__.py
@@ -13,10 +13,11 @@
 
				 # limitations under the License.
			
 
				 
			
 
				 from pathlib import Path
			
 
				-from typing import Any, Dict, Optional
			
 
				+from typing import Any, Dict, Optional, Union
			
 
				 from importlib import import_module
			
 
				 from .base import BasePipeline
			
 
				 from ..utils.pp_option import PaddlePredictorOption
			
 
				+from ..utils.hpi import HPIConfig
			
 
				 from .components import BaseChat, BaseRetriever, BaseGeneratePrompt
			
 
				 from ...utils import logging
			
 
				 from ...utils.config import parse_config
			
@@ -109,7 +110,8 @@ def create_pipeline(
 
				     config: Optional[Dict[str, Any]] = None,
			
 
				     device: Optional[str] = None,
			
 
				     pp_option: Optional[PaddlePredictorOption] = None,
			
 
				-    use_hpip: bool = False,
			
 
				+    use_hpip: Optional[bool] = None,
			
 
				+    hpi_config: Optional[Union[Dict[str, Any], HPIConfig]] = None,
			
 
				     *args: Any,
			
 
				     **kwargs: Any,
			
 
				 ) -> BasePipeline:
			
@@ -128,8 +130,12 @@ def create_pipeline(
 
				             Defaults to None.
			
 
				         pp_option (Optional[PaddlePredictorOption], optional): The options for
			
 
				             the PaddlePredictor. Defaults to None.
			
 
				-        use_hpip (bool, optional): Whether to use high-performance inference
			
 
				-            plugin (HPIP) for prediction. Defaults to False.
			
 
				+        use_hpip (Optional[bool], optional): Whether to use the high-performance
			
 
				+            inference plugin (HPIP) for prediction by default.
			
 
				+            Defaults to None.
			
 
				+        hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional): The
			
 
				+            default high-performance inference configuration dictionary.
			
 
				+            Defaults to None.
			
 
				         *args: Additional positional arguments.
			
 
				         **kwargs: Additional keyword arguments.
			
 
				 
			
@@ -151,12 +157,19 @@ def create_pipeline(
 
				                 config["pipeline_name"],
			
 
				             )
			
 
				     pipeline_name = config["pipeline_name"]
			
 
				+    if device is None:
			
 
				+        device = config.get("device", None)
			
 
				+    if use_hpip is None:
			
 
				+        use_hpip = config.get("use_hpip", False)
			
 
				+    if hpi_config is None:
			
 
				+        hpi_config = config.get("hpi_config", None)
			
 
				 
			
 
				     pipeline = BasePipeline.get(pipeline_name)(
			
 
				         config=config,
			
 
				         device=device,
			
 
				         pp_option=pp_option,
			
 
				         use_hpip=use_hpip,
			
 
				+        hpi_config=hpi_config,
			
 
				         *args,
			
 
				         **kwargs,
			
 
				     )
			
--- a/paddlex/inference/pipelines/anomaly_detection/pipeline.py
+++ b/paddlex/inference/pipelines/anomaly_detection/pipeline.py
@@ -16,6 +16,7 @@ from typing import Any, Dict, Optional, Union, List
 
				 import numpy as np
			
 
				 
			
 
				 from ...utils.pp_option import PaddlePredictorOption
			
 
				+from ...utils.hpi import HPIConfig
			
 
				 from ..base import BasePipeline
			
 
				 
			
 
				 from ...models.anomaly_detection.result import UadResult
			
@@ -32,6 +33,7 @@ class AnomalyDetectionPipeline(BasePipeline):
 
				         device: str = None,
			
 
				         pp_option: PaddlePredictorOption = None,
			
 
				         use_hpip: bool = False,
			
 
				+        hpi_config: Optional[Union[Dict[str, Any], HPIConfig]] = None,
			
 
				     ) -> None:
			
 
				         """Initializes the image anomaly detection pipeline.
			
 
				 
			
@@ -39,10 +41,16 @@ class AnomalyDetectionPipeline(BasePipeline):
 
				             config (Dict): Configuration dictionary containing various settings.
			
 
				             device (str, optional): Device to run the predictions on. Defaults to None.
			
 
				             pp_option (PaddlePredictorOption, optional): PaddlePredictor options. Defaults to None.
			
 
				-            use_hpip (bool, optional): Whether to use high-performance inference (hpip) for prediction. Defaults to False.
			
 
				+            use_hpip (bool, optional): Whether to use the high-performance
			
 
				+                inference plugin (HPIP) by default. Defaults to False.
			
 
				+            hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
			
 
				+                The default high-performance inference configuration dictionary.
			
 
				+                Defaults to None.
			
 
				         """
			
 
				 
			
 
				-        super().__init__(device=device, pp_option=pp_option, use_hpip=use_hpip)
			
 
				+        super().__init__(
			
 
				+            device=device, pp_option=pp_option, use_hpip=use_hpip, hpi_config=hpi_config
			
 
				+        )
			
 
				 
			
 
				         anomaly_detetion_model_config = config["SubModules"]["AnomalyDetection"]
			
 
				         self.anomaly_detetion_model = self.create_model(anomaly_detetion_model_config)
			
--- a/paddlex/inference/pipelines/attribute_recognition/pipeline.py
+++ b/paddlex/inference/pipelines/attribute_recognition/pipeline.py
@@ -19,6 +19,7 @@ from pathlib import Path
 
				 import numpy as np
			
 
				 
			
 
				 from ...utils.pp_option import PaddlePredictorOption
			
 
				+from ...utils.hpi import HPIConfig
			
 
				 from ...common.reader import ReadImage
			
 
				 from ...common.batch_sampler import ImageBatchSampler
			
 
				 from ..components import CropByBoxes
			
@@ -35,8 +36,11 @@ class AttributeRecPipeline(BasePipeline):
 
				         device: str = None,
			
 
				         pp_option: PaddlePredictorOption = None,
			
 
				         use_hpip: bool = False,
			
 
				+        hpi_config: Optional[Union[Dict[str, Any], HPIConfig]] = None,
			
 
				     ):
			
 
				-        super().__init__(device=device, pp_option=pp_option, use_hpip=use_hpip)
			
 
				+        super().__init__(
			
 
				+            device=device, pp_option=pp_option, use_hpip=use_hpip, hpi_config=hpi_config
			
 
				+        )
			
 
				 
			
 
				         self.det_model = self.create_model(config["SubModules"]["Detection"])
			
 
				         self.cls_model = self.create_model(config["SubModules"]["Classification"])
			
--- a/paddlex/inference/pipelines/base.py
+++ b/paddlex/inference/pipelines/base.py
@@ -12,12 +12,11 @@
 
				 # See the License for the specific language governing permissions and
			
 
				 # limitations under the License.
			
 
				 
			
 
				-from pathlib import Path
			
 
				-from typing import Any, Dict, Optional
			
 
				+from typing import Any, Dict, Optional, Union
			
 
				 from abc import ABC, abstractmethod
			
 
				-import yaml
			
 
				-import codecs
			
 
				+from ...utils import logging
			
 
				 from ...utils.subclass_register import AutoRegisterABCMetaClass
			
 
				+from ..utils.hpi import HPIConfig
			
 
				 from ..utils.pp_option import PaddlePredictorOption
			
 
				 from ..models import BasePredictor
			
 
				 
			
@@ -37,6 +36,7 @@ class BasePipeline(ABC, metaclass=AutoRegisterABCMetaClass):
 
				         device: str = None,
			
 
				         pp_option: PaddlePredictorOption = None,
			
 
				         use_hpip: bool = False,
			
 
				+        hpi_config: Optional[Union[Dict[str, Any], HPIConfig]] = None,
			
 
				         *args,
			
 
				         **kwargs,
			
 
				     ) -> None:
			
@@ -46,12 +46,17 @@ class BasePipeline(ABC, metaclass=AutoRegisterABCMetaClass):
 
				         Args:
			
 
				             device (str, optional): The device to use for prediction. Defaults to None.
			
 
				             pp_option (PaddlePredictorOption, optional): The options for PaddlePredictor. Defaults to None.
			
 
				-            use_hpip (bool, optional): Whether to use high-performance inference (hpip) for prediction. Defaults to False.
			
 
				+            use_hpip (bool, optional): Whether to use the high-performance
			
 
				+                inference plugin (HPIP) by default. Defaults to False.
			
 
				+            hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
			
 
				+                The default high-performance inference configuration dictionary.
			
 
				+                Defaults to None.
			
 
				         """
			
 
				         super().__init__()
			
 
				         self.device = device
			
 
				         self.pp_option = pp_option
			
 
				         self.use_hpip = use_hpip
			
 
				+        self.hpi_config = hpi_config
			
 
				 
			
 
				     @abstractmethod
			
 
				     def predict(self, input, **kwargs):
			
@@ -79,18 +84,25 @@ class BasePipeline(ABC, metaclass=AutoRegisterABCMetaClass):
 
				             raise ValueError(config["model_config_error"])
			
 
				 
			
 
				         model_dir = config.get("model_dir", None)
			
 
				-        hpi_params = config.get("hpi_params", None)
			
 
				+        # Should we log if the actual parameter to use is different from the default?
			
 
				+        use_hpip = config.get("use_hpip", self.use_hpip)
			
 
				+        hpi_config = config.get("hpi_config", None)
			
 
				+        if self.hpi_config is not None:
			
 
				+            hpi_config = hpi_config or {}
			
 
				+            hpi_config = {**self.hpi_config, **hpi_config}
			
 
				 
			
 
				         from .. import create_predictor
			
 
				 
			
 
				+        logging.info("Creating model: %s", (config["model_name"], model_dir))
			
 
				+
			
 
				         model = create_predictor(
			
 
				             model_name=config["model_name"],
			
 
				             model_dir=model_dir,
			
 
				             device=self.device,
			
 
				             batch_size=config.get("batch_size", 1),
			
 
				             pp_option=self.pp_option,
			
 
				-            use_hpip=self.use_hpip,
			
 
				-            hpi_params=hpi_params,
			
 
				+            use_hpip=use_hpip,
			
 
				+            hpi_config=hpi_config,
			
 
				             **kwargs,
			
 
				         )
			
 
				         return model
			
@@ -110,11 +122,18 @@ class BasePipeline(ABC, metaclass=AutoRegisterABCMetaClass):
 
				 
			
 
				         from . import create_pipeline
			
 
				 
			
 
				+        use_hpip = config.get("use_hpip", self.use_hpip)
			
 
				+        hpi_config = config.get("hpi_config", None)
			
 
				+        if self.hpi_config is not None:
			
 
				+            hpi_config = hpi_config or {}
			
 
				+            hpi_config = {**self.hpi_config, **hpi_config}
			
 
				+
			
 
				         pipeline = create_pipeline(
			
 
				             config=config,
			
 
				             device=self.device,
			
 
				             pp_option=self.pp_option,
			
 
				-            use_hpip=self.use_hpip,
			
 
				+            use_hpip=use_hpip,
			
 
				+            hpi_config=hpi_config,
			
 
				         )
			
 
				         return pipeline
			
 
				 
			
--- a/paddlex/inference/pipelines/doc_preprocessor/pipeline.py
+++ b/paddlex/inference/pipelines/doc_preprocessor/pipeline.py
@@ -21,6 +21,7 @@ from ....utils import logging
 
				 from ...common.reader import ReadImage
			
 
				 from ...common.batch_sampler import ImageBatchSampler
			
 
				 from ...utils.pp_option import PaddlePredictorOption
			
 
				+from ...utils.hpi import HPIConfig
			
 
				 
			
 
				 
			
 
				 class DocPreprocessorPipeline(BasePipeline):
			
@@ -34,6 +35,7 @@ class DocPreprocessorPipeline(BasePipeline):
 
				         device: Optional[str] = None,
			
 
				         pp_option: Optional[PaddlePredictorOption] = None,
			
 
				         use_hpip: bool = False,
			
 
				+        hpi_config: Optional[Union[Dict[str, Any], HPIConfig]] = None,
			
 
				     ) -> None:
			
 
				         """Initializes the doc preprocessor pipeline.
			
 
				 
			
@@ -41,10 +43,16 @@ class DocPreprocessorPipeline(BasePipeline):
 
				             config (Dict): Configuration dictionary containing various settings.
			
 
				             device (str, optional): Device to run the predictions on. Defaults to None.
			
 
				             pp_option (PaddlePredictorOption, optional): PaddlePredictor options. Defaults to None.
			
 
				-            use_hpip (bool, optional): Whether to use high-performance inference (hpip) for prediction. Defaults to False.
			
 
				+            use_hpip (bool, optional): Whether to use the high-performance
			
 
				+                inference plugin (HPIP) by default. Defaults to False.
			
 
				+            hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
			
 
				+                The default high-performance inference configuration dictionary.
			
 
				+                Defaults to None.
			
 
				         """
			
 
				 
			
 
				-        super().__init__(device=device, pp_option=pp_option, use_hpip=use_hpip)
			
 
				+        super().__init__(
			
 
				+            device=device, pp_option=pp_option, use_hpip=use_hpip, hpi_config=hpi_config
			
 
				+        )
			
 
				 
			
 
				         self.use_doc_orientation_classify = config.get(
			
 
				             "use_doc_orientation_classify", True
			
--- a/paddlex/inference/pipelines/formula_recognition/pipeline.py
+++ b/paddlex/inference/pipelines/formula_recognition/pipeline.py
@@ -25,6 +25,7 @@ from ...models.formula_recognition.result import (
 
				 )
			
 
				 from ....utils import logging
			
 
				 from ...utils.pp_option import PaddlePredictorOption
			
 
				+from ...utils.hpi import HPIConfig
			
 
				 from ...common.reader import ReadImage
			
 
				 from ...common.batch_sampler import ImageBatchSampler
			
 
				 from ..ocr.result import OCRResult
			
@@ -44,6 +45,7 @@ class FormulaRecognitionPipeline(BasePipeline):
 
				         device: str = None,
			
 
				         pp_option: PaddlePredictorOption = None,
			
 
				         use_hpip: bool = False,
			
 
				+        hpi_config: Optional[Union[Dict[str, Any], HPIConfig]] = None,
			
 
				     ) -> None:
			
 
				         """Initializes the formula recognition pipeline.
			
 
				 
			
@@ -51,10 +53,16 @@ class FormulaRecognitionPipeline(BasePipeline):
 
				             config (Dict): Configuration dictionary containing various settings.
			
 
				             device (str, optional): Device to run the predictions on. Defaults to None.
			
 
				             pp_option (PaddlePredictorOption, optional): PaddlePredictor options. Defaults to None.
			
 
				-            use_hpip (bool, optional): Whether to use high-performance inference (hpip) for prediction. Defaults to False.
			
 
				+            use_hpip (bool, optional): Whether to use the high-performance
			
 
				+                inference plugin (HPIP) by default. Defaults to False.
			
 
				+            hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
			
 
				+                The default high-performance inference configuration dictionary.
			
 
				+                Defaults to None.
			
 
				         """
			
 
				 
			
 
				-        super().__init__(device=device, pp_option=pp_option, use_hpip=use_hpip)
			
 
				+        super().__init__(
			
 
				+            device=device, pp_option=pp_option, use_hpip=use_hpip, hpi_config=hpi_config
			
 
				+        )
			
 
				 
			
 
				         self.use_doc_preprocessor = config.get("use_doc_preprocessor", True)
			
 
				         if self.use_doc_preprocessor:
			
--- a/paddlex/inference/pipelines/image_classification/pipeline.py
+++ b/paddlex/inference/pipelines/image_classification/pipeline.py
@@ -17,6 +17,7 @@ import numpy as np
 
				 from ...common.reader import ReadImage
			
 
				 from ...common.batch_sampler import ImageBatchSampler
			
 
				 from ...utils.pp_option import PaddlePredictorOption
			
 
				+from ...utils.hpi import HPIConfig
			
 
				 from ..base import BasePipeline
			
 
				 
			
 
				 from ...models.image_classification.result import TopkResult
			
@@ -33,6 +34,7 @@ class ImageClassificationPipeline(BasePipeline):
 
				         device: str = None,
			
 
				         pp_option: PaddlePredictorOption = None,
			
 
				         use_hpip: bool = False,
			
 
				+        hpi_config: Optional[Union[Dict[str, Any], HPIConfig]] = None,
			
 
				     ) -> None:
			
 
				         """
			
 
				         Initializes the class with given configurations and options.
			
@@ -41,9 +43,15 @@ class ImageClassificationPipeline(BasePipeline):
 
				             config (Dict): Configuration dictionary containing model and other parameters.
			
 
				             device (str): The device to run the prediction on. Default is None.
			
 
				             pp_option (PaddlePredictorOption): Options for PaddlePaddle predictor. Default is None.
			
 
				-            use_hpip (bool): Whether to use high-performance inference (hpip) for prediction. Defaults to False.
			
 
				+            use_hpip (bool, optional): Whether to use the high-performance
			
 
				+                inference plugin (HPIP) by default. Defaults to False.
			
 
				+            hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
			
 
				+                The default high-performance inference configuration dictionary.
			
 
				+                Defaults to None.
			
 
				         """
			
 
				-        super().__init__(device=device, pp_option=pp_option, use_hpip=use_hpip)
			
 
				+        super().__init__(
			
 
				+            device=device, pp_option=pp_option, use_hpip=use_hpip, hpi_config=hpi_config
			
 
				+        )
			
 
				 
			
 
				         image_classification_model_config = config["SubModules"]["ImageClassification"]
			
 
				         model_kwargs = {}
			
--- a/paddlex/inference/pipelines/image_multilabel_classification/pipeline.py
+++ b/paddlex/inference/pipelines/image_multilabel_classification/pipeline.py
@@ -17,6 +17,7 @@ import numpy as np
 
				 from ...common.reader import ReadImage
			
 
				 from ...common.batch_sampler import ImageBatchSampler
			
 
				 from ...utils.pp_option import PaddlePredictorOption
			
 
				+from ...utils.hpi import HPIConfig
			
 
				 from ..base import BasePipeline
			
 
				 
			
 
				 from ...models.image_multilabel_classification.result import MLClassResult
			
@@ -33,6 +34,7 @@ class ImageMultiLabelClassificationPipeline(BasePipeline):
 
				         device: str = None,
			
 
				         pp_option: PaddlePredictorOption = None,
			
 
				         use_hpip: bool = False,
			
 
				+        hpi_config: Optional[Union[Dict[str, Any], HPIConfig]] = None,
			
 
				     ) -> None:
			
 
				         """
			
 
				         Initializes the class with given configurations and options.
			
@@ -41,9 +43,15 @@ class ImageMultiLabelClassificationPipeline(BasePipeline):
 
				             config (Dict): Configuration dictionary containing model and other parameters.
			
 
				             device (str): The device to run the prediction on. Default is None.
			
 
				             pp_option (PaddlePredictorOption): Options for PaddlePaddle predictor. Default is None.
			
 
				-            use_hpip (bool): Whether to use high-performance inference (hpip) for prediction. Defaults to False.
			
 
				+            use_hpip (bool, optional): Whether to use the high-performance
			
 
				+                inference plugin (HPIP) by default. Defaults to False.
			
 
				+            hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
			
 
				+                The default high-performance inference configuration dictionary.
			
 
				+                Defaults to None.
			
 
				         """
			
 
				-        super().__init__(device=device, pp_option=pp_option, use_hpip=use_hpip)
			
 
				+        super().__init__(
			
 
				+            device=device, pp_option=pp_option, use_hpip=use_hpip, hpi_config=hpi_config
			
 
				+        )
			
 
				 
			
 
				         self.threshold = config["SubModules"]["ImageMultiLabelClassification"].get(
			
 
				             "threshold", None
			
--- a/paddlex/inference/pipelines/instance_segmentation/pipeline.py
+++ b/paddlex/inference/pipelines/instance_segmentation/pipeline.py
@@ -15,6 +15,7 @@
 
				 from typing import Any, Dict, Optional, Union, List
			
 
				 import numpy as np
			
 
				 from ...utils.pp_option import PaddlePredictorOption
			
 
				+from ...utils.hpi import HPIConfig
			
 
				 from ..base import BasePipeline
			
 
				 
			
 
				 from ...models.instance_segmentation.result import InstanceSegResult
			
@@ -31,6 +32,7 @@ class InstanceSegmentationPipeline(BasePipeline):
 
				         device: str = None,
			
 
				         pp_option: PaddlePredictorOption = None,
			
 
				         use_hpip: bool = False,
			
 
				+        hpi_config: Optional[Union[Dict[str, Any], HPIConfig]] = None,
			
 
				     ) -> None:
			
 
				         """
			
 
				         Initializes the class with given configurations and options.
			
@@ -39,9 +41,15 @@ class InstanceSegmentationPipeline(BasePipeline):
 
				             config (Dict): Configuration dictionary containing model and other parameters.
			
 
				             device (str): The device to run the prediction on. Default is None.
			
 
				             pp_option (PaddlePredictorOption): Options for PaddlePaddle predictor. Default is None.
			
 
				-            use_hpip (bool): Whether to use high-performance inference (hpip) for prediction. Defaults to False.
			
 
				+            use_hpip (bool, optional): Whether to use the high-performance
			
 
				+                inference plugin (HPIP) by default. Defaults to False.
			
 
				+            hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
			
 
				+                The default high-performance inference configuration dictionary.
			
 
				+                Defaults to None.
			
 
				         """
			
 
				-        super().__init__(device=device, pp_option=pp_option, use_hpip=use_hpip)
			
 
				+        super().__init__(
			
 
				+            device=device, pp_option=pp_option, use_hpip=use_hpip, hpi_config=hpi_config
			
 
				+        )
			
 
				 
			
 
				         instance_segmentation_model_config = config["SubModules"][
			
 
				             "InstanceSegmentation"
			
--- a/paddlex/inference/pipelines/keypoint_detection/pipeline.py
+++ b/paddlex/inference/pipelines/keypoint_detection/pipeline.py
@@ -15,6 +15,7 @@
 
				 from typing import Any, Dict, Optional, Union, Tuple, List
			
 
				 import numpy as np
			
 
				 from ...utils.pp_option import PaddlePredictorOption
			
 
				+from ...utils.hpi import HPIConfig
			
 
				 from ..base import BasePipeline
			
 
				 
			
 
				 from ...models.keypoint_detection.result import KptResult
			
@@ -33,6 +34,7 @@ class KeypointDetectionPipeline(BasePipeline):
 
				         device: str = None,
			
 
				         pp_option: PaddlePredictorOption = None,
			
 
				         use_hpip: bool = False,
			
 
				+        hpi_config: Optional[Union[Dict[str, Any], HPIConfig]] = None,
			
 
				     ) -> None:
			
 
				         """
			
 
				         Initializes the class with given configurations and options.
			
@@ -41,9 +43,15 @@ class KeypointDetectionPipeline(BasePipeline):
 
				             config (Dict): Configuration dictionary containing model and other parameters.
			
 
				             device (str): The device to run the prediction on. Default is None.
			
 
				             pp_option (PaddlePredictorOption): Options for PaddlePaddle predictor. Default is None.
			
 
				-            use_hpip (bool): Whether to use high-performance inference (hpip) for prediction. Defaults to False.
			
 
				+            use_hpip (bool, optional): Whether to use the high-performance
			
 
				+                inference plugin (HPIP) by default. Defaults to False.
			
 
				+            hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
			
 
				+                The default high-performance inference configuration dictionary.
			
 
				+                Defaults to None.
			
 
				         """
			
 
				-        super().__init__(device=device, pp_option=pp_option, use_hpip=use_hpip)
			
 
				+        super().__init__(
			
 
				+            device=device, pp_option=pp_option, use_hpip=use_hpip, hpi_config=hpi_config
			
 
				+        )
			
 
				 
			
 
				         # create object detection model
			
 
				         model_cfg = config["SubModules"]["ObjectDetection"]
			
--- a/paddlex/inference/pipelines/layout_parsing/pipeline.py
+++ b/paddlex/inference/pipelines/layout_parsing/pipeline.py
@@ -12,7 +12,7 @@
 
				 # See the License for the specific language governing permissions and
			
 
				 # limitations under the License.
			
 
				 
			
 
				-from typing import Dict, Optional, Union, List, Tuple
			
 
				+from typing import Any, Dict, Optional, Union, List, Tuple
			
 
				 import numpy as np
			
 
				 from ..base import BasePipeline
			
 
				 from .utils import get_sub_regions_ocr_res, sorted_layout_boxes
			
@@ -20,6 +20,7 @@ from ..components import CropByBoxes
 
				 from .result import LayoutParsingResult
			
 
				 from ....utils import logging
			
 
				 from ...utils.pp_option import PaddlePredictorOption
			
 
				+from ...utils.hpi import HPIConfig
			
 
				 from ...common.reader import ReadImage
			
 
				 from ...common.batch_sampler import ImageBatchSampler
			
 
				 from ..ocr.result import OCRResult
			
@@ -38,6 +39,7 @@ class LayoutParsingPipeline(BasePipeline):
 
				         device: str = None,
			
 
				         pp_option: PaddlePredictorOption = None,
			
 
				         use_hpip: bool = False,
			
 
				+        hpi_config: Optional[Union[Dict[str, Any], HPIConfig]] = None,
			
 
				     ) -> None:
			
 
				         """Initializes the layout parsing pipeline.
			
 
				 
			
@@ -45,10 +47,16 @@ class LayoutParsingPipeline(BasePipeline):
 
				             config (Dict): Configuration dictionary containing various settings.
			
 
				             device (str, optional): Device to run the predictions on. Defaults to None.
			
 
				             pp_option (PaddlePredictorOption, optional): PaddlePredictor options. Defaults to None.
			
 
				-            use_hpip (bool, optional): Whether to use high-performance inference (hpip) for prediction. Defaults to False.
			
 
				+            use_hpip (bool, optional): Whether to use the high-performance
			
 
				+                inference plugin (HPIP) by default. Defaults to False.
			
 
				+            hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
			
 
				+                The default high-performance inference configuration dictionary.
			
 
				+                Defaults to None.
			
 
				         """
			
 
				 
			
 
				-        super().__init__(device=device, pp_option=pp_option, use_hpip=use_hpip)
			
 
				+        super().__init__(
			
 
				+            device=device, pp_option=pp_option, use_hpip=use_hpip, hpi_config=hpi_config
			
 
				+        )
			
 
				 
			
 
				         self.inintial_predictor(config)
			
 
				 
			
--- a/paddlex/inference/pipelines/layout_parsing/pipeline_v2.py
+++ b/paddlex/inference/pipelines/layout_parsing/pipeline_v2.py
@@ -13,7 +13,7 @@
 
				 # limitations under the License.
			
 
				 from __future__ import annotations
			
 
				 
			
 
				-from typing import Optional, Union, Tuple, Iterator
			
 
				+from typing import Any, Dict, Optional, Union, Tuple
			
 
				 import numpy as np
			
 
				 import re
			
 
				 import copy
			
@@ -23,6 +23,7 @@ from ...common.batch_sampler import ImageBatchSampler
 
				 from ...common.reader import ReadImage
			
 
				 from ...models.object_detection.result import DetResult
			
 
				 from ...utils.pp_option import PaddlePredictorOption
			
 
				+from ...utils.hpi import HPIConfig
			
 
				 from ..base import BasePipeline
			
 
				 from ..ocr.result import OCRResult
			
 
				 from .result_v2 import LayoutParsingResultV2
			
@@ -40,6 +41,7 @@ class LayoutParsingPipelineV2(BasePipeline):
 
				         device: str = None,
			
 
				         pp_option: PaddlePredictorOption = None,
			
 
				         use_hpip: bool = False,
			
 
				+        hpi_config: Optional[Union[Dict[str, Any], HPIConfig]] = None,
			
 
				     ) -> None:
			
 
				         """Initializes the layout parsing pipeline.
			
 
				 
			
@@ -47,13 +49,18 @@ class LayoutParsingPipelineV2(BasePipeline):
 
				             config (Dict): Configuration dictionary containing various settings.
			
 
				             device (str, optional): Device to run the predictions on. Defaults to None.
			
 
				             pp_option (PaddlePredictorOption, optional): PaddlePredictor options. Defaults to None.
			
 
				-            use_hpip (bool, optional): Whether to use high-performance inference (hpip) for prediction. Defaults to False.
			
 
				+            use_hpip (bool, optional): Whether to use the high-performance
			
 
				+                inference plugin (HPIP) by default. Defaults to False.
			
 
				+            hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
			
 
				+                The default high-performance inference configuration dictionary.
			
 
				+                Defaults to None.
			
 
				         """
			
 
				 
			
 
				         super().__init__(
			
 
				             device=device,
			
 
				             pp_option=pp_option,
			
 
				             use_hpip=use_hpip,
			
 
				+            hpi_config=hpi_config,
			
 
				         )
			
 
				 
			
 
				         self.inintial_predictor(config)
			
--- a/paddlex/inference/pipelines/multilingual_speech_recognition/pipeline.py
+++ b/paddlex/inference/pipelines/multilingual_speech_recognition/pipeline.py
@@ -16,6 +16,7 @@ from typing import Any, Dict, Optional, Union, List
 
				 import numpy as np
			
 
				 
			
 
				 from ...utils.pp_option import PaddlePredictorOption
			
 
				+from ...utils.hpi import HPIConfig
			
 
				 from ..base import BasePipeline
			
 
				 from ...models.multilingual_speech_recognition.result import WhisperResult
			
 
				 
			
@@ -31,6 +32,7 @@ class MultilingualSpeechRecognitionPipeline(BasePipeline):
 
				         device: str = None,
			
 
				         pp_option: PaddlePredictorOption = None,
			
 
				         use_hpip: bool = False,
			
 
				+        hpi_config: Optional[Union[Dict[str, Any], HPIConfig]] = None,
			
 
				     ) -> None:
			
 
				         """
			
 
				         Initializes the class with given configurations and options.
			
@@ -39,9 +41,15 @@ class MultilingualSpeechRecognitionPipeline(BasePipeline):
 
				             config (Dict): Configuration dictionary containing model and other parameters.
			
 
				             device (str): The device to run the prediction on. Default is None.
			
 
				             pp_option (PaddlePredictorOption): Options for PaddlePaddle predictor. Default is None.
			
 
				-            use_hpip (bool): Whether to use high-performance inference (hpip) for prediction. Defaults to False.
			
 
				+            use_hpip (bool, optional): Whether to use the high-performance
			
 
				+                inference plugin (HPIP) by default. Defaults to False.
			
 
				+            hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
			
 
				+                The default high-performance inference configuration dictionary.
			
 
				+                Defaults to None.
			
 
				         """
			
 
				-        super().__init__(device=device, pp_option=pp_option, use_hpip=use_hpip)
			
 
				+        super().__init__(
			
 
				+            device=device, pp_option=pp_option, use_hpip=use_hpip, hpi_config=hpi_config
			
 
				+        )
			
 
				 
			
 
				         multilingual_speech_recognition_model_config = config["SubModules"][
			
 
				             "MultilingualSpeechRecognition"
			
--- a/paddlex/inference/pipelines/object_detection/pipeline.py
+++ b/paddlex/inference/pipelines/object_detection/pipeline.py
@@ -16,6 +16,7 @@ from typing import Any, Dict, Optional, Union, Tuple, List
 
				 import numpy as np
			
 
				 
			
 
				 from ...utils.pp_option import PaddlePredictorOption
			
 
				+from ...utils.hpi import HPIConfig
			
 
				 from ..base import BasePipeline
			
 
				 
			
 
				 from ...models.object_detection.result import DetResult
			
@@ -32,6 +33,7 @@ class ObjectDetectionPipeline(BasePipeline):
 
				         device: str = None,
			
 
				         pp_option: PaddlePredictorOption = None,
			
 
				         use_hpip: bool = False,
			
 
				+        hpi_config: Optional[Union[Dict[str, Any], HPIConfig]] = None,
			
 
				     ) -> None:
			
 
				         """
			
 
				         Initializes the class with given configurations and options.
			
@@ -40,9 +42,15 @@ class ObjectDetectionPipeline(BasePipeline):
 
				             config (Dict): Configuration dictionary containing model and other parameters.
			
 
				             device (str): The device to run the prediction on. Default is None.
			
 
				             pp_option (PaddlePredictorOption): Options for PaddlePaddle predictor. Default is None.
			
 
				-            use_hpip (bool): Whether to use high-performance inference (hpip) for prediction. Defaults to False.
			
 
				+            use_hpip (bool, optional): Whether to use the high-performance
			
 
				+                inference plugin (HPIP) by default. Defaults to False.
			
 
				+            hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
			
 
				+                The default high-performance inference configuration dictionary.
			
 
				+                Defaults to None.
			
 
				         """
			
 
				-        super().__init__(device=device, pp_option=pp_option, use_hpip=use_hpip)
			
 
				+        super().__init__(
			
 
				+            device=device, pp_option=pp_option, use_hpip=use_hpip, hpi_config=hpi_config
			
 
				+        )
			
 
				         model_cfg = config["SubModules"]["ObjectDetection"]
			
 
				         model_kwargs = {}
			
 
				         if "threshold" in model_cfg:
			
--- a/paddlex/inference/pipelines/ocr/pipeline.py
+++ b/paddlex/inference/pipelines/ocr/pipeline.py
@@ -18,6 +18,7 @@ from scipy.ndimage import rotate
 
				 from ...common.reader import ReadImage
			
 
				 from ...common.batch_sampler import ImageBatchSampler
			
 
				 from ...utils.pp_option import PaddlePredictorOption
			
 
				+from ...utils.hpi import HPIConfig
			
 
				 from ..base import BasePipeline
			
 
				 from ..components import (
			
 
				     CropByPolys,
			
@@ -41,6 +42,7 @@ class OCRPipeline(BasePipeline):
 
				         device: Optional[str] = None,
			
 
				         pp_option: Optional[PaddlePredictorOption] = None,
			
 
				         use_hpip: bool = False,
			
 
				+        hpi_config: Optional[Union[Dict[str, Any], HPIConfig]] = None,
			
 
				     ) -> None:
			
 
				         """
			
 
				         Initializes the class with given configurations and options.
			
@@ -49,9 +51,15 @@ class OCRPipeline(BasePipeline):
 
				             config (Dict): Configuration dictionary containing various settings.
			
 
				             device (str, optional): Device to run the predictions on. Defaults to None.
			
 
				             pp_option (PaddlePredictorOption, optional): PaddlePredictor options. Defaults to None.
			
 
				-            use_hpip (bool, optional): Whether to use high-performance inference (hpip) for prediction. Defaults to False.
			
 
				+            use_hpip (bool, optional): Whether to use the high-performance
			
 
				+                inference plugin (HPIP) by default. Defaults to False.
			
 
				+            hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
			
 
				+                The default high-performance inference configuration dictionary.
			
 
				+                Defaults to None.
			
 
				         """
			
 
				-        super().__init__(device=device, pp_option=pp_option, use_hpip=use_hpip)
			
 
				+        super().__init__(
			
 
				+            device=device, pp_option=pp_option, use_hpip=use_hpip, hpi_config=hpi_config
			
 
				+        )
			
 
				 
			
 
				         self.use_doc_preprocessor = config.get("use_doc_preprocessor", True)
			
 
				         if self.use_doc_preprocessor:
			
@@ -84,6 +92,7 @@ class OCRPipeline(BasePipeline):
 
				             self.text_det_limit_type = text_det_config.get("limit_type", "max")
			
 
				             self.text_det_thresh = text_det_config.get("thresh", 0.3)
			
 
				             self.text_det_box_thresh = text_det_config.get("box_thresh", 0.6)
			
 
				+            self.input_shape = text_det_config.get("input_shape", None)
			
 
				             self.text_det_unclip_ratio = text_det_config.get("unclip_ratio", 2.0)
			
 
				             self._sort_boxes = SortQuadBoxes()
			
 
				             self._crop_by_polys = CropByPolys(det_box_type="quad")
			
@@ -93,6 +102,7 @@ class OCRPipeline(BasePipeline):
 
				             self.text_det_thresh = text_det_config.get("thresh", 0.2)
			
 
				             self.text_det_box_thresh = text_det_config.get("box_thresh", 0.6)
			
 
				             self.text_det_unclip_ratio = text_det_config.get("unclip_ratio", 0.5)
			
 
				+            self.input_shape = text_det_config.get("input_shape", None)
			
 
				             self._sort_boxes = SortPolyBoxes()
			
 
				             self._crop_by_polys = CropByPolys(det_box_type="poly")
			
 
				         else:
			
@@ -105,6 +115,7 @@ class OCRPipeline(BasePipeline):
 
				             thresh=self.text_det_thresh,
			
 
				             box_thresh=self.text_det_box_thresh,
			
 
				             unclip_ratio=self.text_det_unclip_ratio,
			
 
				+            input_shape=self.input_shape,
			
 
				         )
			
 
				 
			
 
				         text_rec_config = config.get("SubModules", {}).get(
			
@@ -112,7 +123,10 @@ class OCRPipeline(BasePipeline):
 
				             {"model_config_error": "config error for text_rec_model!"},
			
 
				         )
			
 
				         self.text_rec_score_thresh = text_rec_config.get("score_thresh", 0)
			
 
				-        self.text_rec_model = self.create_model(text_rec_config)
			
 
				+        self.input_shape = text_rec_config.get("input_shape", None)
			
 
				+        self.text_rec_model = self.create_model(
			
 
				+            text_rec_config, input_shape=self.input_shape
			
 
				+        )
			
 
				 
			
 
				         self.batch_sampler = ImageBatchSampler(batch_size=1)
			
 
				         self.img_reader = ReadImage(format="BGR")
			
--- a/paddlex/inference/pipelines/open_vocabulary_detection/pipeline.py
+++ b/paddlex/inference/pipelines/open_vocabulary_detection/pipeline.py
@@ -15,6 +15,7 @@
 
				 from typing import Any, Dict, Optional, Union, List
			
 
				 import numpy as np
			
 
				 from ...utils.pp_option import PaddlePredictorOption
			
 
				+from ...utils.hpi import HPIConfig
			
 
				 from ..base import BasePipeline
			
 
				 
			
 
				 from ...models.object_detection.result import DetResult
			
@@ -31,6 +32,7 @@ class OpenVocabularyDetectionPipeline(BasePipeline):
 
				         device: str = None,
			
 
				         pp_option: PaddlePredictorOption = None,
			
 
				         use_hpip: bool = False,
			
 
				+        hpi_config: Optional[Union[Dict[str, Any], HPIConfig]] = None,
			
 
				     ) -> None:
			
 
				         """
			
 
				         Initializes the class with given configurations and options.
			
@@ -39,9 +41,15 @@ class OpenVocabularyDetectionPipeline(BasePipeline):
 
				             config (Dict): Configuration dictionary containing model and other parameters.
			
 
				             device (str): The device to run the prediction on. Default is None.
			
 
				             pp_option (PaddlePredictorOption): Options for PaddlePaddle predictor. Default is None.
			
 
				-            use_hpip (bool): Whether to use high-performance inference (hpip) for prediction. Defaults to False.
			
 
				+            use_hpip (bool, optional): Whether to use the high-performance
			
 
				+                inference plugin (HPIP) by default. Defaults to False.
			
 
				+            hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
			
 
				+                The default high-performance inference configuration dictionary.
			
 
				+                Defaults to None.
			
 
				         """
			
 
				-        super().__init__(device=device, pp_option=pp_option, use_hpip=use_hpip)
			
 
				+        super().__init__(
			
 
				+            device=device, pp_option=pp_option, use_hpip=use_hpip, hpi_config=hpi_config
			
 
				+        )
			
 
				 
			
 
				         open_vocabulary_detection_model_config = config.get("SubModules", {}).get(
			
 
				             "OpenVocabularyDetection",
			
--- a/paddlex/inference/pipelines/open_vocabulary_segmentation/pipeline.py
+++ b/paddlex/inference/pipelines/open_vocabulary_segmentation/pipeline.py
@@ -15,6 +15,7 @@
 
				 from typing import Any, Dict, Optional, Union, Tuple, List
			
 
				 import numpy as np
			
 
				 from ...utils.pp_option import PaddlePredictorOption
			
 
				+from ...utils.hpi import HPIConfig
			
 
				 from ..base import BasePipeline
			
 
				 
			
 
				 from ...models.open_vocabulary_segmentation.results import SAMSegResult
			
@@ -33,6 +34,7 @@ class OpenVocabularySegmentationPipeline(BasePipeline):
 
				         device: str = None,
			
 
				         pp_option: PaddlePredictorOption = None,
			
 
				         use_hpip: bool = False,
			
 
				+        hpi_config: Optional[Union[Dict[str, Any], HPIConfig]] = None,
			
 
				     ) -> None:
			
 
				         """
			
 
				         Initializes the class with given configurations and options.
			
@@ -41,9 +43,15 @@ class OpenVocabularySegmentationPipeline(BasePipeline):
 
				             config (Dict): Configuration dictionary containing model and other parameters.
			
 
				             device (str): The device to run the prediction on. Default is None.
			
 
				             pp_option (PaddlePredictorOption): Options for PaddlePaddle predictor. Default is None.
			
 
				-            use_hpip (bool): Whether to use high-performance inference (hpip) for prediction. Defaults to False.
			
 
				+            use_hpip (bool, optional): Whether to use the high-performance
			
 
				+                inference plugin (HPIP) by default. Defaults to False.
			
 
				+            hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
			
 
				+                The default high-performance inference configuration dictionary.
			
 
				+                Defaults to None.
			
 
				         """
			
 
				-        super().__init__(device=device, pp_option=pp_option, use_hpip=use_hpip)
			
 
				+        super().__init__(
			
 
				+            device=device, pp_option=pp_option, use_hpip=use_hpip, hpi_config=hpi_config
			
 
				+        )
			
 
				 
			
 
				         # create box-prompted SAM-H
			
 
				         box_prompted_model_cfg = config.get("SubModules", {}).get(
			
--- a/paddlex/inference/pipelines/pp_chatocr/pipeline_base.py
+++ b/paddlex/inference/pipelines/pp_chatocr/pipeline_base.py
@@ -12,10 +12,11 @@
 
				 # See the License for the specific language governing permissions and
			
 
				 # limitations under the License.
			
 
				 
			
 
				-from typing import Any, Dict, Optional
			
 
				+from typing import Any, Dict, Optional, Union
			
 
				 from ..base import BasePipeline
			
 
				 from ....utils import logging
			
 
				 from ...utils.pp_option import PaddlePredictorOption
			
 
				+from ...utils.hpi import HPIConfig
			
 
				 
			
 
				 
			
 
				 class PP_ChatOCR_Pipeline(BasePipeline):
			
@@ -26,6 +27,7 @@ class PP_ChatOCR_Pipeline(BasePipeline):
 
				         device: str = None,
			
 
				         pp_option: PaddlePredictorOption = None,
			
 
				         use_hpip: bool = False,
			
 
				+        hpi_config: Optional[Union[Dict[str, Any], HPIConfig]] = None,
			
 
				     ) -> None:
			
 
				         """Initializes the pp-chatocrv3-doc pipeline.
			
 
				 
			
@@ -33,10 +35,16 @@ class PP_ChatOCR_Pipeline(BasePipeline):
 
				             config (Dict): Configuration dictionary containing various settings.
			
 
				             device (str, optional): Device to run the predictions on. Defaults to None.
			
 
				             pp_option (PaddlePredictorOption, optional): PaddlePredictor options. Defaults to None.
			
 
				-            use_hpip (bool, optional): Whether to use high-performance inference (hpip) for prediction. Defaults to False.
			
 
				+            use_hpip (bool, optional): Whether to use the high-performance
			
 
				+                inference plugin (HPIP) by default. Defaults to False.
			
 
				+            hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
			
 
				+                The default high-performance inference configuration dictionary.
			
 
				+                Defaults to None.
			
 
				         """
			
 
				 
			
 
				-        super().__init__(device=device, pp_option=pp_option, use_hpip=use_hpip)
			
 
				+        super().__init__(
			
 
				+            device=device, pp_option=pp_option, use_hpip=use_hpip, hpi_config=hpi_config
			
 
				+        )
			
 
				 
			
 
				     def visual_predict(self):
			
 
				         """
			
--- a/paddlex/inference/pipelines/pp_chatocr/pipeline_v3.py
+++ b/paddlex/inference/pipelines/pp_chatocr/pipeline_v3.py
@@ -24,6 +24,7 @@ from ...common.batch_sampler import ImageBatchSampler
 
				 from ....utils import logging
			
 
				 from ....utils.file_interface import custom_open
			
 
				 from ...utils.pp_option import PaddlePredictorOption
			
 
				+from ...utils.hpi import HPIConfig
			
 
				 from ..layout_parsing.result import LayoutParsingResult
			
 
				 from ..components.chat_server import BaseChat
			
 
				 
			
@@ -39,6 +40,7 @@ class PP_ChatOCRv3_Pipeline(PP_ChatOCR_Pipeline):
 
				         device: str = None,
			
 
				         pp_option: PaddlePredictorOption = None,
			
 
				         use_hpip: bool = False,
			
 
				+        hpi_config: Optional[Union[Dict[str, Any], HPIConfig]] = None,
			
 
				         initial_predictor: bool = True,
			
 
				     ) -> None:
			
 
				         """Initializes the pp-chatocrv3-doc pipeline.
			
@@ -47,12 +49,17 @@ class PP_ChatOCRv3_Pipeline(PP_ChatOCR_Pipeline):
 
				             config (Dict): Configuration dictionary containing various settings.
			
 
				             device (str, optional): Device to run the predictions on. Defaults to None.
			
 
				             pp_option (PaddlePredictorOption, optional): PaddlePredictor options. Defaults to None.
			
 
				-            use_hpip (bool, optional): Whether to use high-performance inference (hpip) for prediction. Defaults to False.
			
 
				-            use_layout_parsing (bool, optional): Whether to use layout parsing. Defaults to True.
			
 
				+            use_hpip (bool, optional): Whether to use the high-performance
			
 
				+                inference plugin (HPIP) by default. Defaults to False.
			
 
				+            hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
			
 
				+                The default high-performance inference configuration dictionary.
			
 
				+                Defaults to None.
			
 
				             initial_predictor (bool, optional): Whether to initialize the predictor. Defaults to True.
			
 
				         """
			
 
				 
			
 
				-        super().__init__(device=device, pp_option=pp_option, use_hpip=use_hpip)
			
 
				+        super().__init__(
			
 
				+            device=device, pp_option=pp_option, use_hpip=use_hpip, hpi_config=hpi_config
			
 
				+        )
			
 
				 
			
 
				         self.pipeline_name = config["pipeline_name"]
			
 
				         self.config = config
			
--- a/paddlex/inference/pipelines/pp_chatocr/pipeline_v4.py
+++ b/paddlex/inference/pipelines/pp_chatocr/pipeline_v4.py
@@ -26,6 +26,7 @@ from ...common.batch_sampler import ImageBatchSampler
 
				 from ....utils import logging
			
 
				 from ....utils.file_interface import custom_open
			
 
				 from ...utils.pp_option import PaddlePredictorOption
			
 
				+from ...utils.hpi import HPIConfig
			
 
				 from ..layout_parsing.result import LayoutParsingResult
			
 
				 from ..components.chat_server import BaseChat
			
 
				 
			
@@ -41,6 +42,7 @@ class PP_ChatOCRv4_Pipeline(PP_ChatOCR_Pipeline):
 
				         device: str = None,
			
 
				         pp_option: PaddlePredictorOption = None,
			
 
				         use_hpip: bool = False,
			
 
				+        hpi_config: Optional[Union[Dict[str, Any], HPIConfig]] = None,
			
 
				         initial_predictor: bool = True,
			
 
				     ) -> None:
			
 
				         """Initializes the pp-chatocrv3-doc pipeline.
			
@@ -49,12 +51,17 @@ class PP_ChatOCRv4_Pipeline(PP_ChatOCR_Pipeline):
 
				             config (Dict): Configuration dictionary containing various settings.
			
 
				             device (str, optional): Device to run the predictions on. Defaults to None.
			
 
				             pp_option (PaddlePredictorOption, optional): PaddlePredictor options. Defaults to None.
			
 
				-            use_hpip (bool, optional): Whether to use high-performance inference (hpip) for prediction. Defaults to False.
			
 
				-            use_layout_parsing (bool, optional): Whether to use layout parsing. Defaults to True.
			
 
				+            use_hpip (bool, optional): Whether to use the high-performance
			
 
				+                inference plugin (HPIP) by default. Defaults to False.
			
 
				+            hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
			
 
				+                The default high-performance inference configuration dictionary.
			
 
				+                Defaults to None.
			
 
				             initial_predictor (bool, optional): Whether to initialize the predictor. Defaults to True.
			
 
				         """
			
 
				 
			
 
				-        super().__init__(device=device, pp_option=pp_option, use_hpip=use_hpip)
			
 
				+        super().__init__(
			
 
				+            device=device, pp_option=pp_option, use_hpip=use_hpip, hpi_config=hpi_config
			
 
				+        )
			
 
				 
			
 
				         self.pipeline_name = config["pipeline_name"]
			
 
				         self.config = config
			
--- a/paddlex/inference/pipelines/pp_shitu_v2/pipeline.py
+++ b/paddlex/inference/pipelines/pp_shitu_v2/pipeline.py
@@ -12,11 +12,12 @@
 
				 # See the License for the specific language governing permissions and
			
 
				 # limitations under the License.
			
 
				 
			
 
				-from typing import Any, Dict, Optional
			
 
				+from typing import Any, Dict, Optional, Union
			
 
				 
			
 
				 import numpy as np
			
 
				 
			
 
				 from ...utils.pp_option import PaddlePredictorOption
			
 
				+from ...utils.hpi import HPIConfig
			
 
				 from ...common.reader import ReadImage
			
 
				 from ...common.batch_sampler import ImageBatchSampler
			
 
				 from ..components import CropByBoxes, FaissIndexer, FaissBuilder, IndexData
			
@@ -35,8 +36,11 @@ class ShiTuV2Pipeline(BasePipeline):
 
				         device: str = None,
			
 
				         pp_option: PaddlePredictorOption = None,
			
 
				         use_hpip: bool = False,
			
 
				+        hpi_config: Optional[Union[Dict[str, Any], HPIConfig]] = None,
			
 
				     ):
			
 
				-        super().__init__(device=device, pp_option=pp_option, use_hpip=use_hpip)
			
 
				+        super().__init__(
			
 
				+            device=device, pp_option=pp_option, use_hpip=use_hpip, hpi_config=hpi_config
			
 
				+        )
			
 
				 
			
 
				         self._topk, self._rec_threshold, self._hamming_radius, self._det_threshold = (
			
 
				             config.get("rec_topk", 5),
			
--- a/paddlex/inference/pipelines/rotated_object_detection/pipeline.py
+++ b/paddlex/inference/pipelines/rotated_object_detection/pipeline.py
@@ -15,6 +15,7 @@
 
				 from typing import Any, Dict, Optional, Union, List
			
 
				 import numpy as np
			
 
				 from ...utils.pp_option import PaddlePredictorOption
			
 
				+from ...utils.hpi import HPIConfig
			
 
				 from ..base import BasePipeline
			
 
				 
			
 
				 from ...models.object_detection.result import DetResult
			
@@ -31,6 +32,7 @@ class RotatedObjectDetectionPipeline(BasePipeline):
 
				         device: str = None,
			
 
				         pp_option: PaddlePredictorOption = None,
			
 
				         use_hpip: bool = False,
			
 
				+        hpi_config: Optional[Union[Dict[str, Any], HPIConfig]] = None,
			
 
				     ) -> None:
			
 
				         """
			
 
				         Initializes the class with given configurations and options.
			
@@ -39,9 +41,15 @@ class RotatedObjectDetectionPipeline(BasePipeline):
 
				             config (Dict): Configuration dictionary containing model and other parameters.
			
 
				             device (str): The device to run the prediction on. Default is None.
			
 
				             pp_option (PaddlePredictorOption): Options for PaddlePaddle predictor. Default is None.
			
 
				-            use_hpip (bool): Whether to use high-performance inference (hpip) for prediction. Defaults to False.
			
 
				+            use_hpip (bool, optional): Whether to use the high-performance
			
 
				+                inference plugin (HPIP) by default. Defaults to False.
			
 
				+            hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
			
 
				+                The default high-performance inference configuration dictionary.
			
 
				+                Defaults to None.
			
 
				         """
			
 
				-        super().__init__(device=device, pp_option=pp_option, use_hpip=use_hpip)
			
 
				+        super().__init__(
			
 
				+            device=device, pp_option=pp_option, use_hpip=use_hpip, hpi_config=hpi_config
			
 
				+        )
			
 
				 
			
 
				         rotated_object_detection_model_config = config["SubModules"][
			
 
				             "RotatedObjectDetection"
			
--- a/paddlex/inference/pipelines/seal_recognition/pipeline.py
+++ b/paddlex/inference/pipelines/seal_recognition/pipeline.py
@@ -21,6 +21,7 @@ from ..components import CropByBoxes
 
				 from .result import SealRecognitionResult
			
 
				 from ....utils import logging
			
 
				 from ...utils.pp_option import PaddlePredictorOption
			
 
				+from ...utils.hpi import HPIConfig
			
 
				 from ...common.reader import ReadImage
			
 
				 from ...common.batch_sampler import ImageBatchSampler
			
 
				 from ..doc_preprocessor.result import DocPreprocessorResult
			
@@ -39,6 +40,7 @@ class SealRecognitionPipeline(BasePipeline):
 
				         device: str = None,
			
 
				         pp_option: PaddlePredictorOption = None,
			
 
				         use_hpip: bool = False,
			
 
				+        hpi_config: Optional[Union[Dict[str, Any], HPIConfig]] = None,
			
 
				     ) -> None:
			
 
				         """Initializes the seal recognition pipeline.
			
 
				 
			
@@ -46,10 +48,16 @@ class SealRecognitionPipeline(BasePipeline):
 
				             config (Dict): Configuration dictionary containing various settings.
			
 
				             device (str, optional): Device to run the predictions on. Defaults to None.
			
 
				             pp_option (PaddlePredictorOption, optional): PaddlePredictor options. Defaults to None.
			
 
				-            use_hpip (bool, optional): Whether to use high-performance inference (hpip) for prediction. Defaults to False.
			
 
				+            use_hpip (bool, optional): Whether to use the high-performance
			
 
				+                inference plugin (HPIP) by default. Defaults to False.
			
 
				+            hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
			
 
				+                The default high-performance inference configuration dictionary.
			
 
				+                Defaults to None.
			
 
				         """
			
 
				 
			
 
				-        super().__init__(device=device, pp_option=pp_option, use_hpip=use_hpip)
			
 
				+        super().__init__(
			
 
				+            device=device, pp_option=pp_option, use_hpip=use_hpip, hpi_config=hpi_config
			
 
				+        )
			
 
				 
			
 
				         self.use_doc_preprocessor = config.get("use_doc_preprocessor", True)
			
 
				         if self.use_doc_preprocessor:
			
--- a/paddlex/inference/pipelines/semantic_segmentation/pipeline.py
+++ b/paddlex/inference/pipelines/semantic_segmentation/pipeline.py
@@ -15,6 +15,7 @@
 
				 from typing import Union, Any, Tuple, List, Dict, Optional, Literal
			
 
				 import numpy as np
			
 
				 from ...utils.pp_option import PaddlePredictorOption
			
 
				+from ...utils.hpi import HPIConfig
			
 
				 from ..base import BasePipeline
			
 
				 
			
 
				 from ...models.semantic_segmentation.result import SegResult
			
@@ -31,6 +32,7 @@ class SemanticSegmentationPipeline(BasePipeline):
 
				         device: str = None,
			
 
				         pp_option: PaddlePredictorOption = None,
			
 
				         use_hpip: bool = False,
			
 
				+        hpi_config: Optional[Union[Dict[str, Any], HPIConfig]] = None,
			
 
				     ) -> None:
			
 
				         """
			
 
				         Initializes the class with given configurations and options.
			
@@ -39,9 +41,15 @@ class SemanticSegmentationPipeline(BasePipeline):
 
				             config (Dict): Configuration dictionary containing model and other parameters.
			
 
				             device (str): The device to run the prediction on. Default is None.
			
 
				             pp_option (PaddlePredictorOption): Options for PaddlePaddle predictor. Default is None.
			
 
				-            use_hpip (bool): Whether to use high-performance inference (hpip) for prediction. Defaults to False.
			
 
				+            use_hpip (bool, optional): Whether to use the high-performance
			
 
				+                inference plugin (HPIP) by default. Defaults to False.
			
 
				+            hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
			
 
				+                The default high-performance inference configuration dictionary.
			
 
				+                Defaults to None.
			
 
				         """
			
 
				-        super().__init__(device=device, pp_option=pp_option, use_hpip=use_hpip)
			
 
				+        super().__init__(
			
 
				+            device=device, pp_option=pp_option, use_hpip=use_hpip, hpi_config=hpi_config
			
 
				+        )
			
 
				 
			
 
				         semantic_segmentation_model_config = config["SubModules"][
			
 
				             "SemanticSegmentation"
			
--- a/paddlex/inference/pipelines/small_object_detection/pipeline.py
+++ b/paddlex/inference/pipelines/small_object_detection/pipeline.py
@@ -15,6 +15,7 @@
 
				 from typing import Any, Dict, Optional, Union, List
			
 
				 import numpy as np
			
 
				 from ...utils.pp_option import PaddlePredictorOption
			
 
				+from ...utils.hpi import HPIConfig
			
 
				 from ..base import BasePipeline
			
 
				 
			
 
				 from ...models.object_detection.result import DetResult
			
@@ -31,6 +32,7 @@ class SmallObjectDetectionPipeline(BasePipeline):
 
				         device: str = None,
			
 
				         pp_option: PaddlePredictorOption = None,
			
 
				         use_hpip: bool = False,
			
 
				+        hpi_config: Optional[Union[Dict[str, Any], HPIConfig]] = None,
			
 
				     ) -> None:
			
 
				         """
			
 
				         Initializes the class with given configurations and options.
			
@@ -39,9 +41,15 @@ class SmallObjectDetectionPipeline(BasePipeline):
 
				             config (Dict): Configuration dictionary containing model and other parameters.
			
 
				             device (str): The device to run the prediction on. Default is None.
			
 
				             pp_option (PaddlePredictorOption): Options for PaddlePaddle predictor. Default is None.
			
 
				-            use_hpip (bool): Whether to use high-performance inference (hpip) for prediction. Defaults to False.
			
 
				+            use_hpip (bool, optional): Whether to use the high-performance
			
 
				+                inference plugin (HPIP) by default. Defaults to False.
			
 
				+            hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
			
 
				+                The default high-performance inference configuration dictionary.
			
 
				+                Defaults to None.
			
 
				         """
			
 
				-        super().__init__(device=device, pp_option=pp_option, use_hpip=use_hpip)
			
 
				+        super().__init__(
			
 
				+            device=device, pp_option=pp_option, use_hpip=use_hpip, hpi_config=hpi_config
			
 
				+        )
			
 
				 
			
 
				         small_object_detection_model_config = config["SubModules"][
			
 
				             "SmallObjectDetection"
			
--- a/paddlex/inference/pipelines/table_recognition/pipeline.py
+++ b/paddlex/inference/pipelines/table_recognition/pipeline.py
@@ -24,6 +24,7 @@ from .table_recognition_post_processing import get_table_recognition_res
 
				 from .result import SingleTableRecognitionResult, TableRecognitionResult
			
 
				 from ....utils import logging
			
 
				 from ...utils.pp_option import PaddlePredictorOption
			
 
				+from ...utils.hpi import HPIConfig
			
 
				 from ...common.reader import ReadImage
			
 
				 from ...common.batch_sampler import ImageBatchSampler
			
 
				 from ..ocr.result import OCRResult
			
@@ -43,6 +44,7 @@ class TableRecognitionPipeline(BasePipeline):
 
				         device: str = None,
			
 
				         pp_option: PaddlePredictorOption = None,
			
 
				         use_hpip: bool = False,
			
 
				+        hpi_config: Optional[Union[Dict[str, Any], HPIConfig]] = None,
			
 
				     ) -> None:
			
 
				         """Initializes the layout parsing pipeline.
			
 
				 
			
@@ -50,10 +52,16 @@ class TableRecognitionPipeline(BasePipeline):
 
				             config (Dict): Configuration dictionary containing various settings.
			
 
				             device (str, optional): Device to run the predictions on. Defaults to None.
			
 
				             pp_option (PaddlePredictorOption, optional): PaddlePredictor options. Defaults to None.
			
 
				-            use_hpip (bool, optional): Whether to use high-performance inference (hpip) for prediction. Defaults to False.
			
 
				+            use_hpip (bool, optional): Whether to use the high-performance
			
 
				+                inference plugin (HPIP) by default. Defaults to False.
			
 
				+            hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
			
 
				+                The default high-performance inference configuration dictionary.
			
 
				+                Defaults to None.
			
 
				         """
			
 
				 
			
 
				-        super().__init__(device=device, pp_option=pp_option, use_hpip=use_hpip)
			
 
				+        super().__init__(
			
 
				+            device=device, pp_option=pp_option, use_hpip=use_hpip, hpi_config=hpi_config
			
 
				+        )
			
 
				 
			
 
				         self.use_doc_preprocessor = config.get("use_doc_preprocessor", True)
			
 
				         if self.use_doc_preprocessor:
			
@@ -90,8 +98,7 @@ class TableRecognitionPipeline(BasePipeline):
 
				             self.general_ocr_pipeline = self.create_pipeline(general_ocr_config)
			
 
				         else:
			
 
				             self.general_ocr_config_bak = config.get("SubPipelines", {}).get(
			
 
				-                "GeneralOCR",
			
 
				-                None
			
 
				+                "GeneralOCR", None
			
 
				             )
			
 
				 
			
 
				         self._crop_by_boxes = CropByBoxes()
			
@@ -222,7 +229,7 @@ class TableRecognitionPipeline(BasePipeline):
 
				             doc_preprocessor_res = {}
			
 
				             doc_preprocessor_image = image_array
			
 
				         return doc_preprocessor_res, doc_preprocessor_image
			
 
				-    
			
 
				+
			
 
				     def split_ocr_bboxes_by_table_cells(self, ori_img, cells_bboxes):
			
 
				         """
			
 
				         Splits OCR bounding boxes by table cells and retrieves text.
			
@@ -246,7 +253,7 @@ class TableRecognitionPipeline(BasePipeline):
 
				             # Perform OCR on the defined region of the image and get the recognized text.
			
 
				             rec_te = next(self.general_ocr_pipeline(ori_img[y1:y2, x1:x2, :]))
			
 
				             # Concatenate the texts and append them to the texts_list.
			
 
				-            texts_list.append(''.join(rec_te["rec_texts"]))
			
 
				+            texts_list.append("".join(rec_te["rec_texts"]))
			
 
				         # Return the list of recognized texts from each cell.
			
 
				         return texts_list
			
 
				 
			
@@ -302,9 +309,15 @@ class TableRecognitionPipeline(BasePipeline):
 
				         """
			
 
				         table_structure_pred = next(self.table_structure_model(image_array))
			
 
				         if use_table_cells_ocr_results == True:
			
 
				-            table_cells_result = list(map(lambda arr: arr.tolist(), table_structure_pred["bbox"]))
			
 
				-            table_cells_result = [[rect[0], rect[1], rect[4], rect[5]] for rect in table_cells_result]
			
 
				-            cells_texts_list = self.split_ocr_bboxes_by_table_cells(image_array, table_cells_result)
			
 
				+            table_cells_result = list(
			
 
				+                map(lambda arr: arr.tolist(), table_structure_pred["bbox"])
			
 
				+            )
			
 
				+            table_cells_result = [
			
 
				+                [rect[0], rect[1], rect[4], rect[5]] for rect in table_cells_result
			
 
				+            ]
			
 
				+            cells_texts_list = self.split_ocr_bboxes_by_table_cells(
			
 
				+                image_array, table_cells_result
			
 
				+            )
			
 
				         else:
			
 
				             cells_texts_list = []
			
 
				         single_table_recognition_res = get_table_recognition_res(
			
@@ -409,7 +422,9 @@ class TableRecognitionPipeline(BasePipeline):
 
				                 )
			
 
				             elif use_table_cells_ocr_results == True:
			
 
				                 assert self.general_ocr_config_bak != None
			
 
				-                self.general_ocr_pipeline = self.create_pipeline(self.general_ocr_config_bak)
			
 
				+                self.general_ocr_pipeline = self.create_pipeline(
			
 
				+                    self.general_ocr_config_bak
			
 
				+                )
			
 
				 
			
 
				             table_res_list = []
			
 
				             table_region_id = 1
			
--- a/paddlex/inference/pipelines/table_recognition/pipeline_v2.py
+++ b/paddlex/inference/pipelines/table_recognition/pipeline_v2.py
@@ -12,11 +12,9 @@
 
				 # See the License for the specific language governing permissions and
			
 
				 # limitations under the License.
			
 
				 
			
 
				-import os, sys
			
 
				 from typing import Any, Dict, Optional, Union, List, Tuple
			
 
				 import numpy as np
			
 
				 import math
			
 
				-import cv2
			
 
				 from sklearn.cluster import KMeans
			
 
				 from ..base import BasePipeline
			
 
				 from ..components import CropByBoxes
			
@@ -28,6 +26,7 @@ from .table_recognition_post_processing import (
 
				 from .result import SingleTableRecognitionResult, TableRecognitionResult
			
 
				 from ....utils import logging
			
 
				 from ...utils.pp_option import PaddlePredictorOption
			
 
				+from ...utils.hpi import HPIConfig
			
 
				 from ...common.reader import ReadImage
			
 
				 from ...common.batch_sampler import ImageBatchSampler
			
 
				 from ..ocr.result import OCRResult
			
@@ -47,7 +46,7 @@ class TableRecognitionPipelineV2(BasePipeline):
 
				         device: str = None,
			
 
				         pp_option: PaddlePredictorOption = None,
			
 
				         use_hpip: bool = False,
			
 
				-        hpi_params: Optional[Dict[str, Any]] = None,
			
 
				+        hpi_config: Optional[Union[Dict[str, Any], HPIConfig]] = None,
			
 
				     ) -> None:
			
 
				         """Initializes the layout parsing pipeline.
			
 
				 
			
@@ -55,12 +54,15 @@ class TableRecognitionPipelineV2(BasePipeline):
 
				             config (Dict): Configuration dictionary containing various settings.
			
 
				             device (str, optional): Device to run the predictions on. Defaults to None.
			
 
				             pp_option (PaddlePredictorOption, optional): PaddlePredictor options. Defaults to None.
			
 
				-            use_hpip (bool, optional): Whether to use high-performance inference (hpip) for prediction. Defaults to False.
			
 
				-            hpi_params (Optional[Dict[str, Any]], optional): HPIP parameters. Defaults to None.
			
 
				+            use_hpip (bool, optional): Whether to use the high-performance
			
 
				+                inference plugin (HPIP) by default. Defaults to False.
			
 
				+            hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
			
 
				+                The default high-performance inference configuration dictionary.
			
 
				+                Defaults to None.
			
 
				         """
			
 
				 
			
 
				         super().__init__(
			
 
				-            device=device, pp_option=pp_option, use_hpip=use_hpip, hpi_params=hpi_params
			
 
				+            device=device, pp_option=pp_option, use_hpip=use_hpip, hpi_config=hpi_config
			
 
				         )
			
 
				 
			
 
				         self.use_doc_preprocessor = config.get("use_doc_preprocessor", True)
			
@@ -130,8 +132,7 @@ class TableRecognitionPipelineV2(BasePipeline):
 
				             self.general_ocr_pipeline = self.create_pipeline(general_ocr_config)
			
 
				         else:
			
 
				             self.general_ocr_config_bak = config.get("SubPipelines", {}).get(
			
 
				-                "GeneralOCR",
			
 
				-                None
			
 
				+                "GeneralOCR", None
			
 
				             )
			
 
				 
			
 
				         self._crop_by_boxes = CropByBoxes()
			
@@ -600,15 +601,25 @@ class TableRecognitionPipelineV2(BasePipeline):
 
				                 use_e2e_model = True
			
 
				             else:
			
 
				                 table_cells_pred = next(
			
 
				-                    self.wireless_table_cells_detection_model(image_array, threshold=0.3)
			
 
				+                    self.wireless_table_cells_detection_model(
			
 
				+                        image_array, threshold=0.3
			
 
				+                    )
			
 
				                 )  # Setting the threshold to 0.3 can improve the accuracy of table cells detection.
			
 
				                 # If you really want more or fewer table cells detection boxes, the threshold can be adjusted.
			
 
				 
			
 
				         if use_e2e_model == False:
			
 
				-            table_structure_result = self.extract_results(table_structure_pred, "table_stru")
			
 
				-            table_cells_result, table_cells_score = self.extract_results(table_cells_pred, "det")
			
 
				-            table_cells_result, table_cells_score = self.cells_det_results_nms(table_cells_result, table_cells_score)
			
 
				-            ocr_det_boxes = self.get_region_ocr_det_boxes(overall_ocr_res["rec_boxes"].tolist(), table_box)
			
 
				+            table_structure_result = self.extract_results(
			
 
				+                table_structure_pred, "table_stru"
			
 
				+            )
			
 
				+            table_cells_result, table_cells_score = self.extract_results(
			
 
				+                table_cells_pred, "det"
			
 
				+            )
			
 
				+            table_cells_result, table_cells_score = self.cells_det_results_nms(
			
 
				+                table_cells_result, table_cells_score
			
 
				+            )
			
 
				+            ocr_det_boxes = self.get_region_ocr_det_boxes(
			
 
				+                overall_ocr_res["rec_boxes"].tolist(), table_box
			
 
				+            )
			
 
				             table_cells_result = self.cells_det_results_reprocessing(
			
 
				                 table_cells_result,
			
 
				                 table_cells_score,
			
@@ -616,7 +627,9 @@ class TableRecognitionPipelineV2(BasePipeline):
 
				                 len(table_structure_pred["bbox"]),
			
 
				             )
			
 
				             if use_table_cells_ocr_results == True:
			
 
				-                cells_texts_list = self.split_ocr_bboxes_by_table_cells(image_array, table_cells_result)
			
 
				+                cells_texts_list = self.split_ocr_bboxes_by_table_cells(
			
 
				+                    image_array, table_cells_result
			
 
				+                )
			
 
				             else:
			
 
				                 cells_texts_list = []
			
 
				             single_table_recognition_res = get_table_recognition_res(
			
@@ -629,9 +642,16 @@ class TableRecognitionPipelineV2(BasePipeline):
 
				             )
			
 
				         else:
			
 
				             if use_table_cells_ocr_results == True:
			
 
				-                table_cells_result_e2e = list(map(lambda arr: arr.tolist(), table_structure_pred["bbox"]))
			
 
				-                table_cells_result_e2e = [[rect[0], rect[1], rect[4], rect[5]]for rect in table_cells_result_e2e]
			
 
				-                cells_texts_list = self.split_ocr_bboxes_by_table_cells(image_array, table_cells_result_e2e)
			
 
				+                table_cells_result_e2e = list(
			
 
				+                    map(lambda arr: arr.tolist(), table_structure_pred["bbox"])
			
 
				+                )
			
 
				+                table_cells_result_e2e = [
			
 
				+                    [rect[0], rect[1], rect[4], rect[5]]
			
 
				+                    for rect in table_cells_result_e2e
			
 
				+                ]
			
 
				+                cells_texts_list = self.split_ocr_bboxes_by_table_cells(
			
 
				+                    image_array, table_cells_result_e2e
			
 
				+                )
			
 
				             else:
			
 
				                 cells_texts_list = []
			
 
				             single_table_recognition_res = get_table_recognition_res_e2e(
			
@@ -737,7 +757,9 @@ class TableRecognitionPipelineV2(BasePipeline):
 
				                 )
			
 
				             elif use_table_cells_ocr_results == True:
			
 
				                 assert self.general_ocr_config_bak != None
			
 
				-                self.general_ocr_pipeline = self.create_pipeline(self.general_ocr_config_bak)
			
 
				+                self.general_ocr_pipeline = self.create_pipeline(
			
 
				+                    self.general_ocr_config_bak
			
 
				+                )
			
 
				 
			
 
				             table_res_list = []
			
 
				             table_region_id = 1
			
--- a/paddlex/inference/pipelines/ts_anomaly_detection/pipeline.py
+++ b/paddlex/inference/pipelines/ts_anomaly_detection/pipeline.py
@@ -16,6 +16,7 @@ from typing import Any, Dict, Optional, Union, List
 
				 import pandas as pd
			
 
				 
			
 
				 from ...utils.pp_option import PaddlePredictorOption
			
 
				+from ...utils.hpi import HPIConfig
			
 
				 from ..base import BasePipeline
			
 
				 
			
 
				 from ...models.ts_anomaly_detection.result import TSAdResult
			
@@ -32,6 +33,7 @@ class TSAnomalyDetPipeline(BasePipeline):
 
				         device: str = None,
			
 
				         pp_option: PaddlePredictorOption = None,
			
 
				         use_hpip: bool = False,
			
 
				+        hpi_config: Optional[Union[Dict[str, Any], HPIConfig]] = None,
			
 
				     ) -> None:
			
 
				         """Initializes the Time Series ad pipeline.
			
 
				 
			
@@ -39,10 +41,16 @@ class TSAnomalyDetPipeline(BasePipeline):
 
				             config (Dict): Configuration dictionary containing various settings.
			
 
				             device (str, optional): Device to run the predictions on. Defaults to None.
			
 
				             pp_option (PaddlePredictorOption, optional): PaddlePredictor options. Defaults to None.
			
 
				-            use_hpip (bool, optional): Whether to use high-performance inference (hpip) for prediction. Defaults to False.
			
 
				+            use_hpip (bool, optional): Whether to use the high-performance
			
 
				+                inference plugin (HPIP) by default. Defaults to False.
			
 
				+            hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
			
 
				+                The default high-performance inference configuration dictionary.
			
 
				+                Defaults to None.
			
 
				         """
			
 
				 
			
 
				-        super().__init__(device=device, pp_option=pp_option, use_hpip=use_hpip)
			
 
				+        super().__init__(
			
 
				+            device=device, pp_option=pp_option, use_hpip=use_hpip, hpi_config=hpi_config
			
 
				+        )
			
 
				 
			
 
				         ts_ad_model_config = config["SubModules"]["TSAnomalyDetection"]
			
 
				         self.ts_ad_model = self.create_model(ts_ad_model_config)
			
--- a/paddlex/inference/pipelines/ts_classification/pipeline.py
+++ b/paddlex/inference/pipelines/ts_classification/pipeline.py
@@ -16,6 +16,7 @@ from typing import Any, Dict, Optional, Union, List
 
				 import pandas as pd
			
 
				 
			
 
				 from ...utils.pp_option import PaddlePredictorOption
			
 
				+from ...utils.hpi import HPIConfig
			
 
				 from ..base import BasePipeline
			
 
				 
			
 
				 from ...models.ts_classification.result import TSClsResult
			
@@ -32,6 +33,7 @@ class TSClsPipeline(BasePipeline):
 
				         device: str = None,
			
 
				         pp_option: PaddlePredictorOption = None,
			
 
				         use_hpip: bool = False,
			
 
				+        hpi_config: Optional[Union[Dict[str, Any], HPIConfig]] = None,
			
 
				     ) -> None:
			
 
				         """Initializes the Time Series classification pipeline.
			
 
				 
			
@@ -39,10 +41,16 @@ class TSClsPipeline(BasePipeline):
 
				             config (Dict): Configuration dictionary containing various settings.
			
 
				             device (str, optional): Device to run the predictions on. Defaults to None.
			
 
				             pp_option (PaddlePredictorOption, optional): PaddlePredictor options. Defaults to None.
			
 
				-            use_hpip (bool, optional): Whether to use high-performance inference (hpip) for prediction. Defaults to False.
			
 
				+            use_hpip (bool, optional): Whether to use the high-performance
			
 
				+                inference plugin (HPIP) by default. Defaults to False.
			
 
				+            hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
			
 
				+                The default high-performance inference configuration dictionary.
			
 
				+                Defaults to None.
			
 
				         """
			
 
				 
			
 
				-        super().__init__(device=device, pp_option=pp_option, use_hpip=use_hpip)
			
 
				+        super().__init__(
			
 
				+            device=device, pp_option=pp_option, use_hpip=use_hpip, hpi_config=hpi_config
			
 
				+        )
			
 
				 
			
 
				         ts_classification_model_config = config["SubModules"]["TSClassification"]
			
 
				         self.ts_classification_model = self.create_model(ts_classification_model_config)
			
--- a/paddlex/inference/pipelines/ts_forecasting/pipeline.py
+++ b/paddlex/inference/pipelines/ts_forecasting/pipeline.py
@@ -16,6 +16,7 @@ from typing import Any, Dict, Optional, Union, List
 
				 import pandas as pd
			
 
				 
			
 
				 from ...utils.pp_option import PaddlePredictorOption
			
 
				+from ...utils.hpi import HPIConfig
			
 
				 from ..base import BasePipeline
			
 
				 
			
 
				 from ...models.ts_forecasting.result import TSFcResult
			
@@ -32,6 +33,7 @@ class TSFcPipeline(BasePipeline):
 
				         device: str = None,
			
 
				         pp_option: PaddlePredictorOption = None,
			
 
				         use_hpip: bool = False,
			
 
				+        hpi_config: Optional[Union[Dict[str, Any], HPIConfig]] = None,
			
 
				     ) -> None:
			
 
				         """Initializes the Time Series Forecast pipeline.
			
 
				 
			
@@ -39,10 +41,16 @@ class TSFcPipeline(BasePipeline):
 
				             config (Dict): Configuration dictionary containing various settings.
			
 
				             device (str, optional): Device to run the predictions on. Defaults to None.
			
 
				             pp_option (PaddlePredictorOption, optional): PaddlePredictor options. Defaults to None.
			
 
				-            use_hpip (bool, optional): Whether to use high-performance inference (hpip) for prediction. Defaults to False.
			
 
				+            use_hpip (bool, optional): Whether to use the high-performance
			
 
				+                inference plugin (HPIP) by default. Defaults to False.
			
 
				+            hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
			
 
				+                The default high-performance inference configuration dictionary.
			
 
				+                Defaults to None.
			
 
				         """
			
 
				 
			
 
				-        super().__init__(device=device, pp_option=pp_option, use_hpip=use_hpip)
			
 
				+        super().__init__(
			
 
				+            device=device, pp_option=pp_option, use_hpip=use_hpip, hpi_config=hpi_config
			
 
				+        )
			
 
				 
			
 
				         ts_forecast_model_config = config["SubModules"]["TSForecast"]
			
 
				         self.ts_forecast_model = self.create_model(ts_forecast_model_config)
			
--- a/paddlex/inference/pipelines/video_classification/pipeline.py
+++ b/paddlex/inference/pipelines/video_classification/pipeline.py
@@ -15,6 +15,7 @@
 
				 from typing import Any, Dict, Optional, Union, List
			
 
				 import numpy as np
			
 
				 from ...utils.pp_option import PaddlePredictorOption
			
 
				+from ...utils.hpi import HPIConfig
			
 
				 from ..base import BasePipeline
			
 
				 
			
 
				 from ...models.video_classification.result import TopkVideoResult
			
@@ -31,6 +32,7 @@ class VideoClassificationPipeline(BasePipeline):
 
				         device: str = None,
			
 
				         pp_option: PaddlePredictorOption = None,
			
 
				         use_hpip: bool = False,
			
 
				+        hpi_config: Optional[Union[Dict[str, Any], HPIConfig]] = None,
			
 
				     ) -> None:
			
 
				         """
			
 
				         Initializes the class with given configurations and options.
			
@@ -39,9 +41,15 @@ class VideoClassificationPipeline(BasePipeline):
 
				             config (Dict): Configuration dictionary containing model and other parameters.
			
 
				             device (str): The device to run the prediction on. Default is None.
			
 
				             pp_option (PaddlePredictorOption): Options for PaddlePaddle predictor. Default is None.
			
 
				-            use_hpip (bool): Whether to use high-performance inference (hpip) for prediction. Defaults to False.
			
 
				+            use_hpip (bool, optional): Whether to use the high-performance
			
 
				+                inference plugin (HPIP) by default. Defaults to False.
			
 
				+            hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
			
 
				+                The default high-performance inference configuration dictionary.
			
 
				+                Defaults to None.
			
 
				         """
			
 
				-        super().__init__(device=device, pp_option=pp_option, use_hpip=use_hpip)
			
 
				+        super().__init__(
			
 
				+            device=device, pp_option=pp_option, use_hpip=use_hpip, hpi_config=hpi_config
			
 
				+        )
			
 
				 
			
 
				         video_classification_model_config = config["SubModules"]["VideoClassification"]
			
 
				         self.video_classification_model = self.create_model(
			
--- a/paddlex/inference/pipelines/video_detection/pipeline.py
+++ b/paddlex/inference/pipelines/video_detection/pipeline.py
@@ -15,6 +15,7 @@
 
				 from typing import Any, Dict, Optional, Union, List
			
 
				 import numpy as np
			
 
				 from ...utils.pp_option import PaddlePredictorOption
			
 
				+from ...utils.hpi import HPIConfig
			
 
				 from ..base import BasePipeline
			
 
				 
			
 
				 from ...models.video_detection.result import DetVideoResult
			
@@ -31,6 +32,7 @@ class VideoDetectionPipeline(BasePipeline):
 
				         device: str = None,
			
 
				         pp_option: PaddlePredictorOption = None,
			
 
				         use_hpip: bool = False,
			
 
				+        hpi_config: Optional[Union[Dict[str, Any], HPIConfig]] = None,
			
 
				     ) -> None:
			
 
				         """
			
 
				         Initializes the class with given configurations and options.
			
@@ -39,9 +41,15 @@ class VideoDetectionPipeline(BasePipeline):
 
				             config (Dict): Configuration dictionary containing model and other parameters.
			
 
				             device (str): The device to run the prediction on. Default is None.
			
 
				             pp_option (PaddlePredictorOption): Options for PaddlePaddle predictor. Default is None.
			
 
				-            use_hpip (bool): Whether to use high-performance inference (hpip) for prediction. Defaults to False.
			
 
				+            use_hpip (bool, optional): Whether to use the high-performance
			
 
				+                inference plugin (HPIP) by default. Defaults to False.
			
 
				+            hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
			
 
				+                The default high-performance inference configuration dictionary.
			
 
				+                Defaults to None.
			
 
				         """
			
 
				-        super().__init__(device=device, pp_option=pp_option, use_hpip=use_hpip)
			
 
				+        super().__init__(
			
 
				+            device=device, pp_option=pp_option, use_hpip=use_hpip, hpi_config=hpi_config
			
 
				+        )
			
 
				 
			
 
				         video_detection_model_config = config["SubModules"]["VideoDetection"]
			
 
				         model_kwargs = {}
			
@@ -49,7 +57,9 @@ class VideoDetectionPipeline(BasePipeline):
 
				             model_kwargs["nms_thresh"] = video_detection_model_config["nms_thresh"]
			
 
				         if "score_thresh" in video_detection_model_config:
			
 
				             model_kwargs["score_thresh"] = video_detection_model_config["score_thresh"]
			
 
				-        self.video_detection_model = self.create_model(video_detection_model_config, **model_kwargs)
			
 
				+        self.video_detection_model = self.create_model(
			
 
				+            video_detection_model_config, **model_kwargs
			
 
				+        )
			
 
				 
			
 
				     def predict(
			
 
				         self,
			
--- a/paddlex/inference/utils/benchmark.py
+++ b/paddlex/inference/utils/benchmark.py
@@ -162,11 +162,9 @@ class Benchmark:
 
				         # 3. Operations do not nest, except that the entry point operation
			
 
				         #    contains all other operations.
			
 
				         # 4. The input batch size for each operation is `batch_size`.
			
 
				-        # 5. Inference operations are always performed, while preprocessing and
			
 
				-        #    postprocessing operations are optional.
			
 
				-        # 6. If present, preprocessing operations are always performed before
			
 
				-        #    inference operations, and inference operations are completed before
			
 
				-        #    any postprocessing operations. There is no interleaving among these
			
 
				+        # 5. Preprocessing operations are always performed before inference
			
 
				+        #    operations, and inference operations are completed before
			
 
				+        #    postprocessing operations. There is no interleaving among these
			
 
				         #    stages.
			
 
				 
			
 
				         logs = {k: v for k, v in self.logs.items()}
			
@@ -275,8 +273,8 @@ class Benchmark:
 
				                 i[:4] + (f"{i[4]:.8f}", f"{i[5]:.8f}") for i in summary_list
			
 
				             ]
			
 
				             table.add_rows(summary_list)
			
 
				-            table_name = "WarmUp Data".center(len(str(table).split("\n")[0]), " ")
			
 
				-            logging.info(table_name)
			
 
				+            table_title = "Warmup Data".center(len(str(table).split("\n")[0]), " ")
			
 
				+            logging.info(table_title)
			
 
				             logging.info(table)
			
 
				 
			
 
				         else:
			
@@ -286,8 +284,8 @@ class Benchmark:
 
				             ]
			
 
				             table = PrettyTable(operation_head)
			
 
				             table.add_rows(operation_list)
			
 
				-            table_name = "Operation Info".center(len(str(table).split("\n")[0]), " ")
			
 
				-            logging.info(table_name)
			
 
				+            table_title = "Operation Info".center(len(str(table).split("\n")[0]), " ")
			
 
				+            logging.info(table_title)
			
 
				             logging.info(table)
			
 
				 
			
 
				             detail_head = [
			
@@ -301,8 +299,8 @@ class Benchmark:
 
				             table = PrettyTable(detail_head)
			
 
				             detail_list = [i[:4] + (f"{i[4]:.8f}", f"{i[5]:.8f}") for i in detail_list]
			
 
				             table.add_rows(detail_list)
			
 
				-            table_name = "Detail Data".center(len(str(table).split("\n")[0]), " ")
			
 
				-            logging.info(table_name)
			
 
				+            table_title = "Detail Data".center(len(str(table).split("\n")[0]), " ")
			
 
				+            logging.info(table_title)
			
 
				             logging.info(table)
			
 
				 
			
 
				             summary_head = [
			
@@ -318,8 +316,8 @@ class Benchmark:
 
				                 i[:4] + (f"{i[4]:.8f}", f"{i[5]:.8f}") for i in summary_list
			
 
				             ]
			
 
				             table.add_rows(summary_list)
			
 
				-            table_name = "Summary Data".center(len(str(table).split("\n")[0]), " ")
			
 
				-            logging.info(table_name)
			
 
				+            table_title = "Summary Data".center(len(str(table).split("\n")[0]), " ")
			
 
				+            logging.info(table_title)
			
 
				             logging.info(table)
			
 
				 
			
 
				             if INFER_BENCHMARK_OUTPUT_DIR:
			
--- a/paddlex/inference/utils/hpi.py
+++ b/paddlex/inference/utils/hpi.py
@@ -12,12 +12,16 @@
 
				 # See the License for the specific language governing permissions and
			
 
				 # limitations under the License.
			
 
				 
			
 
				+import importlib.resources
			
 
				+import json
			
 
				+import platform
			
 
				+from functools import lru_cache
			
 
				 from os import PathLike
			
 
				 from pathlib import Path
			
 
				-from typing import Dict, List, Literal, Optional, Tuple, TypedDict, Union
			
 
				+from typing import Any, Dict, Final, List, Literal, Optional, Tuple, TypedDict, Union
			
 
				 
			
 
				-from pydantic import BaseModel
			
 
				-from typing_extensions import TypeAlias
			
 
				+from pydantic import BaseModel, Field
			
 
				+from typing_extensions import Annotated, TypeAlias
			
 
				 
			
 
				 from ...utils.flags import FLAGS_json_format_model
			
 
				 
			
@@ -47,6 +51,42 @@ InferenceBackend: TypeAlias = Literal[
 
				 ]
			
 
				 
			
 
				 
			
 
				+class OpenVINOConfig(BaseModel):
			
 
				+    cpu_num_threads: int = 8
			
 
				+
			
 
				+
			
 
				+class ONNXRuntimeConfig(BaseModel):
			
 
				+    cpu_num_threads: int = 8
			
 
				+
			
 
				+
			
 
				+class TensorRTConfig(BaseModel):
			
 
				+    precision: Literal["fp32", "fp16"] = "fp32"
			
 
				+    use_dynamic_shapes: bool = True
			
 
				+    dynamic_shapes: Optional[Dict[str, List[List[int]]]] = None
			
 
				+    # TODO: Control caching behavior
			
 
				+
			
 
				+
			
 
				+class OMConfig(BaseModel):
			
 
				+    pass
			
 
				+
			
 
				+
			
 
				+class HPIConfig(BaseModel):
			
 
				+    pdx_model_name: Annotated[str, Field(alias="model_name")]
			
 
				+    device_type: str
			
 
				+    device_id: Optional[int] = None
			
 
				+    auto_config: bool = True
			
 
				+    backend: Optional[InferenceBackend] = None
			
 
				+    backend_config: Optional[Dict[str, Any]] = None
			
 
				+    hpi_info: Optional[HPIInfo] = None
			
 
				+    auto_paddle2onnx: bool = True
			
 
				+    # TODO: Add more validation logic here
			
 
				+
			
 
				+
			
 
				+class ModelInfo(BaseModel):
			
 
				+    name: str
			
 
				+    hpi_info: Optional[HPIInfo] = None
			
 
				+
			
 
				+
			
 
				 ModelFormat: TypeAlias = Literal["paddle", "onnx", "om"]
			
 
				 
			
 
				 
			
@@ -80,3 +120,115 @@ def get_model_paths(
 
				     if (model_dir / f"{model_file_prefix}.om").exists():
			
 
				         model_paths["om"] = model_dir / f"{model_file_prefix}.om"
			
 
				     return model_paths
			
 
				+
			
 
				+
			
 
				+@lru_cache(1)
			
 
				+def _get_hpi_model_info_collection():
			
 
				+    with importlib.resources.open_text(
			
 
				+        __package__, "hpi_model_info_collection.json", encoding="utf-8"
			
 
				+    ) as f:
			
 
				+        hpi_model_info_collection = json.load(f)
			
 
				+    return hpi_model_info_collection
			
 
				+
			
 
				+
			
 
				+def suggest_inference_backend_and_config(
			
 
				+    hpi_config: HPIConfig,
			
 
				+    available_backends: Optional[List[InferenceBackend]] = None,
			
 
				+) -> Union[Tuple[InferenceBackend, Dict[str, Any]], Tuple[None, str]]:
			
 
				+    # TODO: The current strategy is naive. It would be better to consider
			
 
				+    # additional important factors, such as NVIDIA GPU compute capability and
			
 
				+    # device manufacturers. We should also allow users to provide hints.
			
 
				+
			
 
				+    import lazy_paddle as paddle
			
 
				+
			
 
				+    if available_backends is not None and not available_backends:
			
 
				+        return None, "No inference backends are available."
			
 
				+
			
 
				+    paddle_version = paddle.__version__
			
 
				+    if paddle_version != "3.0.0-rc0":
			
 
				+        return None, f"{repr(paddle_version)} is not a supported Paddle version."
			
 
				+
			
 
				+    if hpi_config.device_type == "cpu":
			
 
				+        uname = platform.uname()
			
 
				+        arch = uname.machine.lower()
			
 
				+        if arch == "x86_64":
			
 
				+            key = "cpu_x64"
			
 
				+        else:
			
 
				+            return None, f"{repr(arch)} is not a supported architecture."
			
 
				+    elif hpi_config.device_type == "gpu":
			
 
				+        # FIXME: We should not rely on the PaddlePaddle library to detemine CUDA
			
 
				+        # and cuDNN versions.
			
 
				+        # Should we inject environment info from the outside?
			
 
				+        import lazy_paddle.version
			
 
				+
			
 
				+        cuda_version = lazy_paddle.version.cuda()
			
 
				+        cuda_version = cuda_version.replace(".", "")
			
 
				+        cudnn_version = lazy_paddle.version.cudnn().rsplit(".", 1)[0]
			
 
				+        cudnn_version = cudnn_version.replace(".", "")
			
 
				+        key = f"gpu_cuda{cuda_version}_cudnn{cudnn_version}"
			
 
				+    else:
			
 
				+        return None, f"{repr(hpi_config.device_type)} is not a supported device type."
			
 
				+
			
 
				+    hpi_model_info_collection = _get_hpi_model_info_collection()
			
 
				+
			
 
				+    if key not in hpi_model_info_collection:
			
 
				+        return None, "No prior knowledge can be utilized."
			
 
				+    hpi_model_info_collection_for_env = hpi_model_info_collection[key]
			
 
				+
			
 
				+    if hpi_config.pdx_model_name not in hpi_model_info_collection_for_env:
			
 
				+        return None, f"{repr(hpi_config.pdx_model_name)} is not a known model."
			
 
				+    supported_pseudo_backends = hpi_model_info_collection_for_env[
			
 
				+        hpi_config.pdx_model_name
			
 
				+    ]
			
 
				+
			
 
				+    candidate_backends = []
			
 
				+    backend_to_pseudo_backend = {}
			
 
				+    for pb in supported_pseudo_backends:
			
 
				+        if pb.startswith("paddle"):
			
 
				+            backend = "paddle"
			
 
				+        elif pb.startswith("tensorrt"):
			
 
				+            backend = "tensorrt"
			
 
				+        else:
			
 
				+            backend = pb
			
 
				+        if available_backends is not None and backend not in available_backends:
			
 
				+            continue
			
 
				+        candidate_backends.append(backend)
			
 
				+        backend_to_pseudo_backend[backend] = pb
			
 
				+
			
 
				+    if not candidate_backends:
			
 
				+        return None, "No inference backend can be selected."
			
 
				+
			
 
				+    if hpi_config.backend is not None:
			
 
				+        if hpi_config.backend not in candidate_backends:
			
 
				+            return (
			
 
				+                None,
			
 
				+                f"{repr(hpi_config.backend)} is not a supported inference backend.",
			
 
				+            )
			
 
				+        suggested_backend = hpi_config.backend
			
 
				+    else:
			
 
				+        # The first backend is the preferred one.
			
 
				+        suggested_backend = candidate_backends[0]
			
 
				+
			
 
				+    suggested_backend_config = {}
			
 
				+    if suggested_backend == "paddle":
			
 
				+        pseudo_backend = backend_to_pseudo_backend["paddle"]
			
 
				+        assert pseudo_backend in (
			
 
				+            "paddle",
			
 
				+            "paddle_tensorrt_fp32",
			
 
				+            "paddle_tensorrt_fp16",
			
 
				+        ), pseudo_backend
			
 
				+        if pseudo_backend == "paddle_tensorrt_fp32":
			
 
				+            suggested_backend_config.update({"run_mode": "trt_fp32"})
			
 
				+        elif pseudo_backend == "paddle_tensorrt_fp16":
			
 
				+            # TODO: Check if the target device supports FP16.
			
 
				+            suggested_backend_config.update({"run_mode": "trt_fp16"})
			
 
				+    elif suggested_backend == "tensorrt":
			
 
				+        pseudo_backend = backend_to_pseudo_backend["tensorrt"]
			
 
				+        assert pseudo_backend in ("tensorrt", "tensorrt_fp16"), pseudo_backend
			
 
				+        if pseudo_backend == "tensorrt_fp16":
			
 
				+            suggested_backend_config.update({"precision": "fp16"})
			
 
				+
			
 
				+    if hpi_config.backend_config is not None:
			
 
				+        suggested_backend_config.update(hpi_config.backend_config)
			
 
				+
			
 
				+    return suggested_backend, suggested_backend_config
			
--- a/paddlex/inference/utils/hpi_model_info_collection.json
+++ b/paddlex/inference/utils/hpi_model_info_collection.json
@@ -0,0 +1,1874 @@
 
				+{
			
 
				+  "cpu_x64": {
			
 
				+    "PP-DocLayout-L": [
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "PP-DocLayout-M": [
			
 
				+      "paddle",
			
 
				+      "openvino",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PP-DocLayout-S": [
			
 
				+      "openvino",
			
 
				+      "paddle",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "RT-DETR-L_wired_table_cell_det": [
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "RT-DETR-L_wireless_table_cell_det": [
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "PP-LCNet_x1_0_table_cls": [
			
 
				+      "openvino",
			
 
				+      "paddle",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "STFPM": [
			
 
				+      "onnxruntime",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "PP-LCNet_x1_0_doc_ori": [
			
 
				+      "openvino",
			
 
				+      "paddle",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "BlazeFace": [
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "BlazeFace-FPN-SSH": [
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "PicoDet_LCNet_x2_5_face": [
			
 
				+      "paddle",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PP-YOLOE_plus-S_face": [
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "MobileFaceNet": [
			
 
				+      "paddle",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "ResNet50_face": [
			
 
				+      "onnxruntime",
			
 
				+      "paddle",
			
 
				+      "openvino"
			
 
				+    ],
			
 
				+    "LaTeX_OCR_rec": [
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "PP-ShiTuV2_rec": [
			
 
				+      "onnxruntime",
			
 
				+      "openvino",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "PP-ShiTuV2_rec_CLIP_vit_base": [
			
 
				+      "paddle",
			
 
				+      "onnxruntime",
			
 
				+      "openvino"
			
 
				+    ],
			
 
				+    "PP-ShiTuV2_rec_CLIP_vit_large": [
			
 
				+      "paddle",
			
 
				+      "onnxruntime",
			
 
				+      "openvino"
			
 
				+    ],
			
 
				+    "PP-YOLOE-L_human": [
			
 
				+      "paddle",
			
 
				+      "openvino"
			
 
				+    ],
			
 
				+    "PP-YOLOE-S_human": [
			
 
				+      "openvino",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "CLIP_vit_base_patch16_224": [
			
 
				+      "paddle",
			
 
				+      "onnxruntime",
			
 
				+      "openvino"
			
 
				+    ],
			
 
				+    "CLIP_vit_large_patch14_224": [
			
 
				+      "paddle",
			
 
				+      "onnxruntime",
			
 
				+      "openvino"
			
 
				+    ],
			
 
				+    "ConvNeXt_base_224": [
			
 
				+      "onnxruntime",
			
 
				+      "paddle",
			
 
				+      "openvino"
			
 
				+    ],
			
 
				+    "ConvNeXt_base_384": [
			
 
				+      "onnxruntime",
			
 
				+      "paddle",
			
 
				+      "openvino"
			
 
				+    ],
			
 
				+    "ConvNeXt_large_224": [
			
 
				+      "onnxruntime",
			
 
				+      "paddle",
			
 
				+      "openvino"
			
 
				+    ],
			
 
				+    "ConvNeXt_large_384": [
			
 
				+      "onnxruntime",
			
 
				+      "paddle",
			
 
				+      "openvino"
			
 
				+    ],
			
 
				+    "ConvNeXt_small": [
			
 
				+      "onnxruntime",
			
 
				+      "openvino",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "ConvNeXt_tiny": [
			
 
				+      "onnxruntime",
			
 
				+      "openvino",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "MobileNetV1_x0_5": [
			
 
				+      "onnxruntime",
			
 
				+      "openvino",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "MobileNetV1_x0_25": [
			
 
				+      "openvino",
			
 
				+      "onnxruntime",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "MobileNetV1_x0_75": [
			
 
				+      "onnxruntime",
			
 
				+      "openvino",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "MobileNetV1_x1_0": [
			
 
				+      "onnxruntime",
			
 
				+      "openvino",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "MobileNetV2_x0_5": [
			
 
				+      "openvino",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "MobileNetV2_x0_25": [
			
 
				+      "openvino",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "MobileNetV2_x1_0": [
			
 
				+      "openvino",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "MobileNetV2_x1_5": [
			
 
				+      "openvino",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "MobileNetV2_x2_0": [
			
 
				+      "openvino",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "MobileNetV3_large_x0_5": [
			
 
				+      "openvino",
			
 
				+      "onnxruntime",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "MobileNetV3_large_x0_35": [
			
 
				+      "openvino",
			
 
				+      "onnxruntime",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "MobileNetV3_large_x0_75": [
			
 
				+      "openvino",
			
 
				+      "onnxruntime",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "MobileNetV3_large_x1_0": [
			
 
				+      "openvino",
			
 
				+      "paddle",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "MobileNetV3_large_x1_25": [
			
 
				+      "openvino",
			
 
				+      "paddle",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "MobileNetV3_small_x0_5": [
			
 
				+      "openvino",
			
 
				+      "onnxruntime",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "MobileNetV3_small_x0_35": [
			
 
				+      "openvino",
			
 
				+      "onnxruntime",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "MobileNetV3_small_x0_75": [
			
 
				+      "openvino",
			
 
				+      "onnxruntime",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "MobileNetV3_small_x1_0": [
			
 
				+      "openvino",
			
 
				+      "onnxruntime",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "MobileNetV3_small_x1_25": [
			
 
				+      "openvino",
			
 
				+      "onnxruntime",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "PP-HGNet_base": [
			
 
				+      "paddle",
			
 
				+      "openvino",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PP-HGNet_small": [
			
 
				+      "paddle",
			
 
				+      "onnxruntime",
			
 
				+      "openvino"
			
 
				+    ],
			
 
				+    "PP-HGNet_tiny": [
			
 
				+      "onnxruntime",
			
 
				+      "paddle",
			
 
				+      "openvino"
			
 
				+    ],
			
 
				+    "PP-HGNetV2-B0": [
			
 
				+      "openvino",
			
 
				+      "onnxruntime",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "PP-HGNetV2-B1": [
			
 
				+      "openvino",
			
 
				+      "onnxruntime",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "PP-HGNetV2-B2": [
			
 
				+      "openvino",
			
 
				+      "onnxruntime",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "PP-HGNetV2-B3": [
			
 
				+      "openvino",
			
 
				+      "onnxruntime",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "PP-HGNetV2-B4": [
			
 
				+      "onnxruntime",
			
 
				+      "openvino",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "PP-HGNetV2-B5": [
			
 
				+      "paddle",
			
 
				+      "openvino",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PP-HGNetV2-B6": [
			
 
				+      "paddle",
			
 
				+      "openvino",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PP-LCNet_x0_5": [
			
 
				+      "openvino",
			
 
				+      "paddle",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PP-LCNet_x0_25": [
			
 
				+      "openvino",
			
 
				+      "paddle",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PP-LCNet_x0_35": [
			
 
				+      "openvino",
			
 
				+      "paddle",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PP-LCNet_x0_75": [
			
 
				+      "openvino",
			
 
				+      "paddle",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PP-LCNet_x1_0": [
			
 
				+      "openvino",
			
 
				+      "paddle",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PP-LCNet_x1_5": [
			
 
				+      "openvino",
			
 
				+      "paddle",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PP-LCNet_x2_0": [
			
 
				+      "openvino",
			
 
				+      "paddle",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PP-LCNet_x2_5": [
			
 
				+      "openvino",
			
 
				+      "paddle",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PP-LCNetV2_base": [
			
 
				+      "openvino",
			
 
				+      "onnxruntime",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "PP-LCNetV2_large": [
			
 
				+      "openvino",
			
 
				+      "paddle",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PP-LCNetV2_small": [
			
 
				+      "openvino",
			
 
				+      "onnxruntime",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "ResNet18_vd": [
			
 
				+      "onnxruntime",
			
 
				+      "paddle",
			
 
				+      "openvino"
			
 
				+    ],
			
 
				+    "ResNet18": [
			
 
				+      "onnxruntime",
			
 
				+      "paddle",
			
 
				+      "openvino"
			
 
				+    ],
			
 
				+    "ResNet34_vd": [
			
 
				+      "onnxruntime",
			
 
				+      "paddle",
			
 
				+      "openvino"
			
 
				+    ],
			
 
				+    "ResNet34": [
			
 
				+      "onnxruntime",
			
 
				+      "paddle",
			
 
				+      "openvino"
			
 
				+    ],
			
 
				+    "ResNet50_vd": [
			
 
				+      "onnxruntime",
			
 
				+      "paddle",
			
 
				+      "openvino"
			
 
				+    ],
			
 
				+    "ResNet50": [
			
 
				+      "onnxruntime",
			
 
				+      "paddle",
			
 
				+      "openvino"
			
 
				+    ],
			
 
				+    "ResNet101_vd": [
			
 
				+      "paddle",
			
 
				+      "openvino",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "ResNet101": [
			
 
				+      "onnxruntime",
			
 
				+      "paddle",
			
 
				+      "openvino"
			
 
				+    ],
			
 
				+    "ResNet152_vd": [
			
 
				+      "paddle",
			
 
				+      "openvino",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "ResNet152": [
			
 
				+      "onnxruntime",
			
 
				+      "paddle",
			
 
				+      "openvino"
			
 
				+    ],
			
 
				+    "ResNet200_vd": [
			
 
				+      "paddle",
			
 
				+      "openvino",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "SwinTransformer_base_patch4_window7_224": [
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "SwinTransformer_base_patch4_window12_384": [
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "SwinTransformer_large_patch4_window7_224": [
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "SwinTransformer_large_patch4_window12_384": [
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "SwinTransformer_small_patch4_window7_224": [
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "SwinTransformer_tiny_patch4_window7_224": [
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "FasterNet-L": [
			
 
				+      "paddle",
			
 
				+      "onnxruntime",
			
 
				+      "openvino"
			
 
				+    ],
			
 
				+    "FasterNet-M": [
			
 
				+      "paddle",
			
 
				+      "onnxruntime",
			
 
				+      "openvino"
			
 
				+    ],
			
 
				+    "FasterNet-S": [
			
 
				+      "onnxruntime",
			
 
				+      "paddle",
			
 
				+      "openvino"
			
 
				+    ],
			
 
				+    "FasterNet-T0": [
			
 
				+      "openvino",
			
 
				+      "onnxruntime",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "FasterNet-T1": [
			
 
				+      "openvino",
			
 
				+      "onnxruntime",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "FasterNet-T2": [
			
 
				+      "onnxruntime",
			
 
				+      "openvino",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "MobileNetV4_conv_large": [
			
 
				+      "onnxruntime",
			
 
				+      "paddle",
			
 
				+      "openvino"
			
 
				+    ],
			
 
				+    "MobileNetV4_conv_medium": [
			
 
				+      "onnxruntime",
			
 
				+      "openvino",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "MobileNetV4_conv_small": [
			
 
				+      "onnxruntime",
			
 
				+      "openvino",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "MobileNetV4_hybrid_large": [
			
 
				+      "paddle",
			
 
				+      "openvino",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "MobileNetV4_hybrid_medium": [
			
 
				+      "onnxruntime",
			
 
				+      "openvino",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "StarNet-S1": [
			
 
				+      "openvino",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "StarNet-S2": [
			
 
				+      "openvino",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "StarNet-S3": [
			
 
				+      "openvino",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "StarNet-S4": [
			
 
				+      "openvino",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "UVDoc": [
			
 
				+      "onnxruntime",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "Mask-RT-DETR-M": [],
			
 
				+    "Mask-RT-DETR-S": [],
			
 
				+    "Mask-RT-DETR-X": [
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "Cascade-MaskRCNN-ResNet50-FPN": [],
			
 
				+    "Cascade-MaskRCNN-ResNet50-vd-SSLDv2-FPN": [],
			
 
				+    "MaskRCNN-ResNet50-FPN": [],
			
 
				+    "MaskRCNN-ResNet50-vd-FPN": [],
			
 
				+    "MaskRCNN-ResNet50": [],
			
 
				+    "MaskRCNN-ResNet101-FPN": [],
			
 
				+    "MaskRCNN-ResNet101-vd-FPN": [],
			
 
				+    "MaskRCNN-ResNeXt101-vd-FPN": [],
			
 
				+    "PP-YOLOE_seg-S": [],
			
 
				+    "Mask-RT-DETR-H": [
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "Mask-RT-DETR-L": [
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "SOLOv2": [],
			
 
				+    "PP-ShiTuV2_det": [
			
 
				+      "paddle",
			
 
				+      "openvino",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "CLIP_vit_base_patch16_448_ML": [
			
 
				+      "paddle",
			
 
				+      "onnxruntime",
			
 
				+      "openvino"
			
 
				+    ],
			
 
				+    "PP-HGNetV2-B0_ML": [
			
 
				+      "openvino",
			
 
				+      "paddle",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PP-HGNetV2-B4_ML": [
			
 
				+      "onnxruntime",
			
 
				+      "paddle",
			
 
				+      "openvino"
			
 
				+    ],
			
 
				+    "PP-HGNetV2-B6_ML": [
			
 
				+      "paddle",
			
 
				+      "openvino",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PP-LCNet_x1_0_ML": [
			
 
				+      "openvino",
			
 
				+      "paddle",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "ResNet50_ML": [
			
 
				+      "onnxruntime",
			
 
				+      "paddle",
			
 
				+      "openvino"
			
 
				+    ],
			
 
				+    "PicoDet-L": [
			
 
				+      "paddle",
			
 
				+      "openvino",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PicoDet-S": [
			
 
				+      "openvino",
			
 
				+      "paddle",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PP-YOLOE_plus-L": [
			
 
				+      "paddle",
			
 
				+      "openvino"
			
 
				+    ],
			
 
				+    "PP-YOLOE_plus-M": [
			
 
				+      "paddle",
			
 
				+      "openvino"
			
 
				+    ],
			
 
				+    "PP-YOLOE_plus-S": [
			
 
				+      "openvino",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "PP-YOLOE_plus-X": [
			
 
				+      "paddle",
			
 
				+      "openvino"
			
 
				+    ],
			
 
				+    "RT-DETR-H": [
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "RT-DETR-L": [
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "RT-DETR-R18": [
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "RT-DETR-R50": [
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "RT-DETR-X": [
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "YOLOv3-DarkNet53": [
			
 
				+      "paddle",
			
 
				+      "onnxruntime",
			
 
				+      "openvino"
			
 
				+    ],
			
 
				+    "YOLOv3-MobileNetV3": [
			
 
				+      "paddle",
			
 
				+      "openvino",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "YOLOv3-ResNet50_vd_DCN": [
			
 
				+      "openvino",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "YOLOX-L": [
			
 
				+      "onnxruntime",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "YOLOX-M": [
			
 
				+      "onnxruntime",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "YOLOX-N": [
			
 
				+      "onnxruntime",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "YOLOX-S": [
			
 
				+      "onnxruntime",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "YOLOX-T": [
			
 
				+      "onnxruntime",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "YOLOX-X": [
			
 
				+      "onnxruntime",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "Cascade-FasterRCNN-ResNet50-FPN": [],
			
 
				+    "Cascade-FasterRCNN-ResNet50-vd-SSLDv2-FPN": [],
			
 
				+    "CenterNet-DLA-34": [],
			
 
				+    "CenterNet-ResNet50": [],
			
 
				+    "DETR-R50": [
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "FasterRCNN-ResNet34-FPN": [],
			
 
				+    "FasterRCNN-ResNet50-FPN": [],
			
 
				+    "FasterRCNN-ResNet50-vd-FPN": [],
			
 
				+    "FasterRCNN-ResNet50-vd-SSLDv2-FPN": [],
			
 
				+    "FasterRCNN-ResNet50": [],
			
 
				+    "FasterRCNN-ResNet101-FPN": [],
			
 
				+    "FasterRCNN-ResNet101": [],
			
 
				+    "FasterRCNN-ResNeXt101-vd-FPN": [],
			
 
				+    "FasterRCNN-Swin-Tiny-FPN": [],
			
 
				+    "FCOS-ResNet50": [
			
 
				+      "paddle",
			
 
				+      "openvino",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PicoDet-M": [
			
 
				+      "openvino",
			
 
				+      "paddle",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PicoDet-XS": [
			
 
				+      "openvino",
			
 
				+      "paddle",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PP-LCNet_x1_0_pedestrian_attribute": [
			
 
				+      "openvino",
			
 
				+      "paddle",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "Deeplabv3_Plus-R50": [
			
 
				+      "paddle",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "Deeplabv3_Plus-R101": [
			
 
				+      "paddle",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "Deeplabv3-R50": [
			
 
				+      "paddle",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "Deeplabv3-R101": [
			
 
				+      "paddle",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "OCRNet_HRNet-W18": [
			
 
				+      "paddle",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "OCRNet_HRNet-W48": [
			
 
				+      "paddle",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PP-LiteSeg-T": [
			
 
				+      "onnxruntime",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "SegFormer-B0": [
			
 
				+      "onnxruntime",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "SegFormer-B1": [
			
 
				+      "onnxruntime",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "SegFormer-B2": [
			
 
				+      "onnxruntime",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "SegFormer-B3": [
			
 
				+      "onnxruntime",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "SegFormer-B4": [
			
 
				+      "onnxruntime",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "SegFormer-B5": [
			
 
				+      "onnxruntime",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "SeaFormer_base": [
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "SeaFormer_large": [
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "SeaFormer_small": [
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "SeaFormer_tiny": [
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "PP-LiteSeg-B": [
			
 
				+      "onnxruntime",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "MaskFormer_small": [],
			
 
				+    "MaskFormer_tiny": [],
			
 
				+    "PP-YOLOE_plus_SOD-S": [
			
 
				+      "openvino",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "PP-YOLOE_plus_SOD-L": [
			
 
				+      "paddle",
			
 
				+      "openvino"
			
 
				+    ],
			
 
				+    "PP-YOLOE_plus_SOD-largesize-L": [
			
 
				+      "paddle",
			
 
				+      "openvino"
			
 
				+    ],
			
 
				+    "PicoDet-S_layout_3cls": [
			
 
				+      "openvino",
			
 
				+      "paddle",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PicoDet-S_layout_17cls": [
			
 
				+      "openvino",
			
 
				+      "paddle",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PicoDet-L_layout_3cls": [
			
 
				+      "paddle",
			
 
				+      "openvino",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PicoDet-L_layout_17cls": [
			
 
				+      "paddle",
			
 
				+      "openvino",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "RT-DETR-H_layout_3cls": [
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "RT-DETR-H_layout_17cls": [
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "PicoDet_layout_1x": [
			
 
				+      "openvino",
			
 
				+      "paddle",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PicoDet_layout_1x_table": [
			
 
				+      "openvino",
			
 
				+      "paddle",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "SLANet": [
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "SLANet_plus": [
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "PP-OCRv4_mobile_det": [
			
 
				+      "openvino",
			
 
				+      "paddle",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PP-OCRv4_server_det": [
			
 
				+      "paddle",
			
 
				+      "openvino",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PP-OCRv3_mobile_det": [
			
 
				+      "paddle",
			
 
				+      "openvino",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PP-OCRv3_server_det": [
			
 
				+      "paddle",
			
 
				+      "openvino",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PP-OCRv4_mobile_seal_det": [
			
 
				+      "openvino",
			
 
				+      "paddle",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PP-OCRv4_server_seal_det": [
			
 
				+      "paddle",
			
 
				+      "openvino",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PP-OCRv4_mobile_rec": [
			
 
				+      "openvino",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "PP-OCRv4_server_rec": [
			
 
				+      "paddle",
			
 
				+      "openvino"
			
 
				+    ],
			
 
				+    "ch_SVTRv2_rec": [
			
 
				+      "openvino",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "ch_RepSVTR_rec": [
			
 
				+      "openvino",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "PP-OCRv4_server_rec_doc": [
			
 
				+      "paddle",
			
 
				+      "openvino"
			
 
				+    ],
			
 
				+    "ta_PP-OCRv3_mobile_rec": [
			
 
				+      "openvino",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "latin_PP-OCRv3_mobile_rec": [
			
 
				+      "openvino",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "chinese_cht_PP-OCRv3_mobile_rec": [
			
 
				+      "openvino",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "ka_PP-OCRv3_mobile_rec": [
			
 
				+      "openvino",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "PP-OCRv3_mobile_rec": [
			
 
				+      "openvino",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "korean_PP-OCRv3_mobile_rec": [
			
 
				+      "openvino",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "en_PP-OCRv3_mobile_rec": [
			
 
				+      "openvino",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "devanagari_PP-OCRv3_mobile_rec": [
			
 
				+      "openvino",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "te_PP-OCRv3_mobile_rec": [
			
 
				+      "openvino",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "en_PP-OCRv4_mobile_rec": [
			
 
				+      "openvino",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "arabic_PP-OCRv3_mobile_rec": [
			
 
				+      "openvino",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "japan_PP-OCRv3_mobile_rec": [
			
 
				+      "openvino",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "cyrillic_PP-OCRv3_mobile_rec": [
			
 
				+      "openvino",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "AutoEncoder_ad": [
			
 
				+      "onnxruntime",
			
 
				+      "openvino",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "DLinear_ad": [
			
 
				+      "onnxruntime",
			
 
				+      "openvino",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "Nonstationary_ad": [
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "PatchTST_ad": [
			
 
				+      "onnxruntime",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "TimesNet_ad": [],
			
 
				+    "TimesNet_cls": [
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "DLinear": [
			
 
				+      "onnxruntime",
			
 
				+      "openvino",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "NLinear": [
			
 
				+      "onnxruntime",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "Nonstationary": [
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "PatchTST": [
			
 
				+      "onnxruntime",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "RLinear": [
			
 
				+      "onnxruntime",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "TiDE": [
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "TimesNet": [
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "PP-LCNet_x1_0_vehicle_attribute": [
			
 
				+      "openvino",
			
 
				+      "paddle",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PP-YOLOE-S_vehicle": [
			
 
				+      "openvino",
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "PP-YOLOE-L_vehicle": [
			
 
				+      "paddle",
			
 
				+      "openvino"
			
 
				+    ]
			
 
				+  },
			
 
				+  "gpu_cuda118_cudnn86": {
			
 
				+    "PP-DocLayout-L": [
			
 
				+      "paddle_tensorrt_fp16"
			
 
				+    ],
			
 
				+    "PP-DocLayout-M": [
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PP-DocLayout-S": [
			
 
				+      "tensorrt_fp16",
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "RT-DETR-L_wired_table_cell_det": [
			
 
				+      "paddle_tensorrt_fp16"
			
 
				+    ],
			
 
				+    "RT-DETR-L_wireless_table_cell_det": [
			
 
				+      "paddle_tensorrt_fp16"
			
 
				+    ],
			
 
				+    "PP-LCNet_x1_0_table_cls": [
			
 
				+      "tensorrt_fp16",
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "STFPM": [
			
 
				+      "tensorrt_fp16",
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PP-LCNet_x1_0_doc_ori": [
			
 
				+      "tensorrt_fp16",
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "BlazeFace": [
			
 
				+      "paddle_tensorrt_fp16"
			
 
				+    ],
			
 
				+    "BlazeFace-FPN-SSH": [
			
 
				+      "paddle_tensorrt_fp16"
			
 
				+    ],
			
 
				+    "PicoDet_LCNet_x2_5_face": [
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PP-YOLOE_plus-S_face": [
			
 
				+      "paddle_tensorrt_fp16"
			
 
				+    ],
			
 
				+    "MobileFaceNet": [
			
 
				+      "tensorrt_fp16",
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "ResNet50_face": [
			
 
				+      "tensorrt_fp16",
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "LaTeX_OCR_rec": [
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "PP-ShiTuV2_rec": [
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PP-ShiTuV2_rec_CLIP_vit_base": [
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PP-ShiTuV2_rec_CLIP_vit_large": [
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PP-YOLOE-L_human": [
			
 
				+      "paddle_tensorrt_fp16"
			
 
				+    ],
			
 
				+    "PP-YOLOE-S_human": [
			
 
				+      "paddle_tensorrt_fp16"
			
 
				+    ],
			
 
				+    "CLIP_vit_base_patch16_224": [
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "CLIP_vit_large_patch14_224": [
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "ConvNeXt_base_224": [
			
 
				+      "paddle_tensorrt",
			
 
				+      "tensorrt",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "ConvNeXt_base_384": [
			
 
				+      "paddle_tensorrt",
			
 
				+      "tensorrt",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "ConvNeXt_large_224": [
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "tensorrt",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "ConvNeXt_large_384": [
			
 
				+      "paddle_tensorrt",
			
 
				+      "tensorrt",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "ConvNeXt_small": [
			
 
				+      "paddle_tensorrt",
			
 
				+      "tensorrt",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "ConvNeXt_tiny": [
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "tensorrt",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "MobileNetV1_x0_5": [
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "MobileNetV1_x0_25": [
			
 
				+      "tensorrt_fp16",
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "MobileNetV1_x0_75": [
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "MobileNetV1_x1_0": [
			
 
				+      "tensorrt_fp16",
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "MobileNetV2_x0_5": [
			
 
				+      "paddle_tensorrt_fp16"
			
 
				+    ],
			
 
				+    "MobileNetV2_x0_25": [
			
 
				+      "paddle_tensorrt_fp16"
			
 
				+    ],
			
 
				+    "MobileNetV2_x1_0": [
			
 
				+      "paddle_tensorrt_fp16"
			
 
				+    ],
			
 
				+    "MobileNetV2_x1_5": [
			
 
				+      "paddle_tensorrt_fp16"
			
 
				+    ],
			
 
				+    "MobileNetV2_x2_0": [
			
 
				+      "paddle_tensorrt_fp16"
			
 
				+    ],
			
 
				+    "MobileNetV3_large_x0_5": [
			
 
				+      "tensorrt_fp16",
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "MobileNetV3_large_x0_35": [
			
 
				+      "tensorrt_fp16",
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "MobileNetV3_large_x0_75": [
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "MobileNetV3_large_x1_0": [
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "MobileNetV3_large_x1_25": [
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "MobileNetV3_small_x0_5": [
			
 
				+      "tensorrt_fp16",
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "MobileNetV3_small_x0_35": [
			
 
				+      "tensorrt_fp16",
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "MobileNetV3_small_x0_75": [
			
 
				+      "tensorrt_fp16",
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "MobileNetV3_small_x1_0": [
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "MobileNetV3_small_x1_25": [
			
 
				+      "tensorrt_fp16",
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PP-HGNet_base": [
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PP-HGNet_small": [
			
 
				+      "tensorrt_fp16",
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PP-HGNet_tiny": [
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PP-HGNetV2-B0": [
			
 
				+      "tensorrt_fp16",
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PP-HGNetV2-B1": [
			
 
				+      "tensorrt_fp16",
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PP-HGNetV2-B2": [
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PP-HGNetV2-B3": [
			
 
				+      "tensorrt_fp16",
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PP-HGNetV2-B4": [
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PP-HGNetV2-B5": [
			
 
				+      "tensorrt_fp16",
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PP-HGNetV2-B6": [
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PP-LCNet_x0_5": [
			
 
				+      "tensorrt_fp16",
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PP-LCNet_x0_25": [
			
 
				+      "tensorrt_fp16",
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PP-LCNet_x0_35": [
			
 
				+      "tensorrt_fp16",
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PP-LCNet_x0_75": [
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PP-LCNet_x1_0": [
			
 
				+      "tensorrt_fp16",
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PP-LCNet_x1_5": [
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PP-LCNet_x2_0": [
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PP-LCNet_x2_5": [
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PP-LCNetV2_base": [
			
 
				+      "tensorrt_fp16",
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PP-LCNetV2_large": [
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PP-LCNetV2_small": [
			
 
				+      "tensorrt_fp16",
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "ResNet18_vd": [
			
 
				+      "tensorrt_fp16",
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "ResNet18": [
			
 
				+      "tensorrt_fp16",
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "ResNet34_vd": [
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "ResNet34": [
			
 
				+      "tensorrt_fp16",
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "ResNet50_vd": [
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "ResNet50": [
			
 
				+      "tensorrt_fp16",
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "ResNet101_vd": [
			
 
				+      "tensorrt_fp16",
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "ResNet101": [
			
 
				+      "tensorrt_fp16",
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "ResNet152_vd": [
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "ResNet152": [
			
 
				+      "tensorrt_fp16",
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "ResNet200_vd": [
			
 
				+      "tensorrt_fp16",
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "SwinTransformer_base_patch4_window7_224": [
			
 
				+      "paddle_tensorrt_fp16"
			
 
				+    ],
			
 
				+    "SwinTransformer_base_patch4_window12_384": [
			
 
				+      "paddle_tensorrt_fp16"
			
 
				+    ],
			
 
				+    "SwinTransformer_large_patch4_window7_224": [
			
 
				+      "paddle_tensorrt_fp16"
			
 
				+    ],
			
 
				+    "SwinTransformer_large_patch4_window12_384": [
			
 
				+      "paddle_tensorrt_fp16"
			
 
				+    ],
			
 
				+    "SwinTransformer_small_patch4_window7_224": [
			
 
				+      "paddle_tensorrt_fp16"
			
 
				+    ],
			
 
				+    "SwinTransformer_tiny_patch4_window7_224": [
			
 
				+      "paddle_tensorrt_fp16"
			
 
				+    ],
			
 
				+    "FasterNet-L": [
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "FasterNet-M": [
			
 
				+      "tensorrt_fp16",
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "FasterNet-S": [
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "FasterNet-T0": [
			
 
				+      "tensorrt_fp16",
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "FasterNet-T1": [
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "FasterNet-T2": [
			
 
				+      "tensorrt_fp16",
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "MobileNetV4_conv_large": [
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "MobileNetV4_conv_medium": [
			
 
				+      "tensorrt_fp16",
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "MobileNetV4_conv_small": [
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "MobileNetV4_hybrid_large": [
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "MobileNetV4_hybrid_medium": [
			
 
				+      "tensorrt_fp16",
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "StarNet-S1": [
			
 
				+      "paddle_tensorrt_fp16"
			
 
				+    ],
			
 
				+    "StarNet-S2": [
			
 
				+      "paddle_tensorrt_fp16"
			
 
				+    ],
			
 
				+    "StarNet-S3": [
			
 
				+      "paddle_tensorrt_fp16"
			
 
				+    ],
			
 
				+    "StarNet-S4": [
			
 
				+      "paddle_tensorrt_fp16"
			
 
				+    ],
			
 
				+    "UVDoc": [
			
 
				+      "tensorrt_fp16",
			
 
				+      "paddle",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "Mask-RT-DETR-M": [
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "Mask-RT-DETR-S": [],
			
 
				+    "Mask-RT-DETR-X": [
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "Cascade-MaskRCNN-ResNet50-FPN": [
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "Cascade-MaskRCNN-ResNet50-vd-SSLDv2-FPN": [
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "MaskRCNN-ResNet50-FPN": [
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "MaskRCNN-ResNet50-vd-FPN": [
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "MaskRCNN-ResNet50": [
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "MaskRCNN-ResNet101-FPN": [
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "MaskRCNN-ResNet101-vd-FPN": [
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "MaskRCNN-ResNeXt101-vd-FPN": [
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "PP-YOLOE_seg-S": [],
			
 
				+    "Mask-RT-DETR-H": [
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "Mask-RT-DETR-L": [
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "SOLOv2": [],
			
 
				+    "PP-ShiTuV2_det": [
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "CLIP_vit_base_patch16_448_ML": [
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PP-HGNetV2-B0_ML": [
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PP-HGNetV2-B4_ML": [
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PP-HGNetV2-B6_ML": [
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PP-LCNet_x1_0_ML": [
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "ResNet50_ML": [
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PicoDet-L": [
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PicoDet-S": [
			
 
				+      "tensorrt_fp16",
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PP-YOLOE_plus-L": [
			
 
				+      "paddle_tensorrt_fp16"
			
 
				+    ],
			
 
				+    "PP-YOLOE_plus-M": [
			
 
				+      "paddle_tensorrt_fp16"
			
 
				+    ],
			
 
				+    "PP-YOLOE_plus-S": [
			
 
				+      "paddle_tensorrt_fp16"
			
 
				+    ],
			
 
				+    "PP-YOLOE_plus-X": [
			
 
				+      "paddle_tensorrt_fp16"
			
 
				+    ],
			
 
				+    "RT-DETR-H": [
			
 
				+      "paddle_tensorrt_fp16"
			
 
				+    ],
			
 
				+    "RT-DETR-L": [
			
 
				+      "paddle_tensorrt_fp16"
			
 
				+    ],
			
 
				+    "RT-DETR-R18": [
			
 
				+      "paddle_tensorrt_fp16"
			
 
				+    ],
			
 
				+    "RT-DETR-R50": [
			
 
				+      "paddle_tensorrt_fp16"
			
 
				+    ],
			
 
				+    "RT-DETR-X": [
			
 
				+      "paddle_tensorrt_fp16"
			
 
				+    ],
			
 
				+    "YOLOv3-DarkNet53": [
			
 
				+      "tensorrt_fp16",
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "YOLOv3-MobileNetV3": [
			
 
				+      "tensorrt_fp16",
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "YOLOv3-ResNet50_vd_DCN": [
			
 
				+      "paddle_tensorrt_fp16"
			
 
				+    ],
			
 
				+    "YOLOX-L": [
			
 
				+      "tensorrt_fp16",
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "YOLOX-M": [
			
 
				+      "tensorrt_fp16",
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "YOLOX-N": [
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "YOLOX-S": [
			
 
				+      "tensorrt",
			
 
				+      "paddle_tensorrt",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "YOLOX-T": [
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "YOLOX-X": [
			
 
				+      "tensorrt_fp16",
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "Cascade-FasterRCNN-ResNet50-FPN": [
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "Cascade-FasterRCNN-ResNet50-vd-SSLDv2-FPN": [
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "CenterNet-DLA-34": [],
			
 
				+    "CenterNet-ResNet50": [],
			
 
				+    "DETR-R50": [
			
 
				+      "paddle_tensorrt_fp16"
			
 
				+    ],
			
 
				+    "FasterRCNN-ResNet34-FPN": [
			
 
				+      "paddle_tensorrt_fp16"
			
 
				+    ],
			
 
				+    "FasterRCNN-ResNet50-FPN": [
			
 
				+      "paddle_tensorrt_fp16"
			
 
				+    ],
			
 
				+    "FasterRCNN-ResNet50-vd-FPN": [
			
 
				+      "paddle_tensorrt_fp16"
			
 
				+    ],
			
 
				+    "FasterRCNN-ResNet50-vd-SSLDv2-FPN": [
			
 
				+      "paddle_tensorrt_fp16"
			
 
				+    ],
			
 
				+    "FasterRCNN-ResNet50": [
			
 
				+      "paddle_tensorrt_fp16"
			
 
				+    ],
			
 
				+    "FasterRCNN-ResNet101-FPN": [
			
 
				+      "paddle_tensorrt_fp16"
			
 
				+    ],
			
 
				+    "FasterRCNN-ResNet101": [
			
 
				+      "paddle_tensorrt_fp16"
			
 
				+    ],
			
 
				+    "FasterRCNN-ResNeXt101-vd-FPN": [
			
 
				+      "paddle_tensorrt_fp16"
			
 
				+    ],
			
 
				+    "FasterRCNN-Swin-Tiny-FPN": [],
			
 
				+    "FCOS-ResNet50": [
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PicoDet-M": [
			
 
				+      "tensorrt_fp16",
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PicoDet-XS": [
			
 
				+      "tensorrt_fp16",
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PP-LCNet_x1_0_pedestrian_attribute": [
			
 
				+      "tensorrt_fp16",
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "Deeplabv3_Plus-R50": [
			
 
				+      "tensorrt_fp16",
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "Deeplabv3_Plus-R101": [
			
 
				+      "tensorrt_fp16",
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "Deeplabv3-R50": [
			
 
				+      "tensorrt_fp16",
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "Deeplabv3-R101": [
			
 
				+      "tensorrt_fp16",
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "OCRNet_HRNet-W18": [
			
 
				+      "tensorrt_fp16",
			
 
				+      "paddle_tensorrt_fp16"
			
 
				+    ],
			
 
				+    "OCRNet_HRNet-W48": [
			
 
				+      "tensorrt_fp16",
			
 
				+      "paddle_tensorrt_fp16"
			
 
				+    ],
			
 
				+    "PP-LiteSeg-T": [
			
 
				+      "tensorrt_fp16",
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "SegFormer-B0": [
			
 
				+      "tensorrt_fp16",
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "SegFormer-B1": [
			
 
				+      "tensorrt_fp16",
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "SegFormer-B2": [
			
 
				+      "tensorrt_fp16",
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "SegFormer-B3": [
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "SegFormer-B4": [
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "onnxruntime",
			
 
				+      "tensorrt"
			
 
				+    ],
			
 
				+    "SegFormer-B5": [
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "SeaFormer_base": [
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "SeaFormer_large": [
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "SeaFormer_small": [
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "SeaFormer_tiny": [
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "PP-LiteSeg-B": [
			
 
				+      "tensorrt_fp16",
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "MaskFormer_small": [
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "MaskFormer_tiny": [
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "PP-YOLOE_plus_SOD-S": [
			
 
				+      "paddle_tensorrt_fp16"
			
 
				+    ],
			
 
				+    "PP-YOLOE_plus_SOD-L": [
			
 
				+      "paddle_tensorrt_fp16"
			
 
				+    ],
			
 
				+    "PP-YOLOE_plus_SOD-largesize-L": [
			
 
				+      "paddle_tensorrt_fp16"
			
 
				+    ],
			
 
				+    "PicoDet-S_layout_3cls": [
			
 
				+      "tensorrt_fp16",
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PicoDet-S_layout_17cls": [
			
 
				+      "tensorrt_fp16",
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PicoDet-L_layout_3cls": [
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PicoDet-L_layout_17cls": [
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "RT-DETR-H_layout_3cls": [
			
 
				+      "paddle_tensorrt_fp16"
			
 
				+    ],
			
 
				+    "RT-DETR-H_layout_17cls": [
			
 
				+      "paddle_tensorrt"
			
 
				+    ],
			
 
				+    "PicoDet_layout_1x": [
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PicoDet_layout_1x_table": [
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "SLANet": [
			
 
				+      "paddle_tensorrt_fp16"
			
 
				+    ],
			
 
				+    "SLANet_plus": [
			
 
				+      "paddle_tensorrt_fp16"
			
 
				+    ],
			
 
				+    "PP-OCRv4_mobile_det": [
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PP-OCRv4_server_det": [
			
 
				+      "tensorrt",
			
 
				+      "paddle_tensorrt",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PP-OCRv3_mobile_det": [
			
 
				+      "tensorrt_fp16",
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PP-OCRv3_server_det": [
			
 
				+      "tensorrt_fp16",
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PP-OCRv4_mobile_seal_det": [
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PP-OCRv4_server_seal_det": [
			
 
				+      "tensorrt",
			
 
				+      "paddle_tensorrt",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PP-OCRv4_mobile_rec": [
			
 
				+      "paddle_tensorrt_fp16"
			
 
				+    ],
			
 
				+    "PP-OCRv4_server_rec": [
			
 
				+      "paddle_tensorrt_fp16"
			
 
				+    ],
			
 
				+    "ch_SVTRv2_rec": [
			
 
				+      "paddle_tensorrt_fp16"
			
 
				+    ],
			
 
				+    "ch_RepSVTR_rec": [
			
 
				+      "paddle_tensorrt_fp16"
			
 
				+    ],
			
 
				+    "PP-OCRv4_server_rec_doc": [
			
 
				+      "paddle_tensorrt_fp16"
			
 
				+    ],
			
 
				+    "ta_PP-OCRv3_mobile_rec": [
			
 
				+      "paddle_tensorrt_fp16"
			
 
				+    ],
			
 
				+    "latin_PP-OCRv3_mobile_rec": [
			
 
				+      "paddle_tensorrt_fp16"
			
 
				+    ],
			
 
				+    "chinese_cht_PP-OCRv3_mobile_rec": [
			
 
				+      "paddle_tensorrt_fp16"
			
 
				+    ],
			
 
				+    "ka_PP-OCRv3_mobile_rec": [
			
 
				+      "paddle_tensorrt_fp16"
			
 
				+    ],
			
 
				+    "PP-OCRv3_mobile_rec": [
			
 
				+      "paddle_tensorrt_fp16"
			
 
				+    ],
			
 
				+    "korean_PP-OCRv3_mobile_rec": [
			
 
				+      "paddle_tensorrt_fp16"
			
 
				+    ],
			
 
				+    "en_PP-OCRv3_mobile_rec": [
			
 
				+      "paddle_tensorrt_fp16"
			
 
				+    ],
			
 
				+    "devanagari_PP-OCRv3_mobile_rec": [
			
 
				+      "paddle_tensorrt_fp16"
			
 
				+    ],
			
 
				+    "te_PP-OCRv3_mobile_rec": [
			
 
				+      "paddle_tensorrt_fp16"
			
 
				+    ],
			
 
				+    "en_PP-OCRv4_mobile_rec": [
			
 
				+      "paddle_tensorrt_fp16"
			
 
				+    ],
			
 
				+    "arabic_PP-OCRv3_mobile_rec": [
			
 
				+      "paddle_tensorrt_fp16"
			
 
				+    ],
			
 
				+    "japan_PP-OCRv3_mobile_rec": [
			
 
				+      "paddle_tensorrt_fp16"
			
 
				+    ],
			
 
				+    "cyrillic_PP-OCRv3_mobile_rec": [
			
 
				+      "paddle_tensorrt_fp16"
			
 
				+    ],
			
 
				+    "AutoEncoder_ad": [
			
 
				+      "tensorrt_fp16",
			
 
				+      "onnxruntime",
			
 
				+      "paddle_tensorrt_fp16"
			
 
				+    ],
			
 
				+    "DLinear_ad": [
			
 
				+      "tensorrt_fp16",
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "Nonstationary_ad": [
			
 
				+      "paddle_tensorrt"
			
 
				+    ],
			
 
				+    "PatchTST_ad": [
			
 
				+      "tensorrt_fp16",
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "TimesNet_ad": [],
			
 
				+    "TimesNet_cls": [
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "DLinear": [
			
 
				+      "tensorrt_fp16",
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "NLinear": [
			
 
				+      "tensorrt_fp16",
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "Nonstationary": [
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "PatchTST": [
			
 
				+      "tensorrt_fp16",
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "RLinear": [
			
 
				+      "tensorrt_fp16",
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "TiDE": [
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "TimesNet": [
			
 
				+      "paddle"
			
 
				+    ],
			
 
				+    "PP-LCNet_x1_0_vehicle_attribute": [
			
 
				+      "tensorrt_fp16",
			
 
				+      "paddle_tensorrt_fp16",
			
 
				+      "onnxruntime"
			
 
				+    ],
			
 
				+    "PP-YOLOE-S_vehicle": [
			
 
				+      "paddle_tensorrt_fp16"
			
 
				+    ],
			
 
				+    "PP-YOLOE-L_vehicle": [
			
 
				+      "paddle_tensorrt_fp16"
			
 
				+    ]
			
 
				+  }
			
 
				+}
			
--- a/paddlex/inference/utils/pp_option.py
+++ b/paddlex/inference/utils/pp_option.py
@@ -44,14 +44,18 @@ class PaddlePredictorOption(object):
 
				     )
			
 
				     SUPPORT_DEVICE = ("gpu", "cpu", "npu", "xpu", "mlu", "dcu", "gcu")
			
 
				 
			
 
				-    def __init__(self, model_name=None, **kwargs):
			
 
				+    def __init__(self, model_name, **kwargs):
			
 
				         super().__init__()
			
 
				-        self.model_name = model_name
			
 
				+        self._model_name = model_name
			
 
				         self._cfg = {}
			
 
				         self._init_option(**kwargs)
			
 
				         self._changed = False
			
 
				 
			
 
				     @property
			
 
				+    def model_name(self):
			
 
				+        return self._model_name
			
 
				+
			
 
				+    @property
			
 
				     def changed(self):
			
 
				         return self._changed
			
 
				 
			
@@ -121,9 +125,9 @@ class PaddlePredictorOption(object):
 
				                 f"`run_mode` must be {support_run_mode_str}, but received {repr(run_mode)}."
			
 
				             )
			
 
				         # TRT Blocklist
			
 
				-        if run_mode.startswith("trt") and self.model_name in TRT_BLOCKLIST:
			
 
				+        if run_mode.startswith("trt") and self._model_name in TRT_BLOCKLIST:
			
 
				             logging.warning(
			
 
				-                f"The model({self.model_name}) is not supported to run in trt mode! Using `paddle` instead!"
			
 
				+                f"The model({self._model_name}) is not supported to run in trt mode! Using `paddle` instead!"
			
 
				             )
			
 
				             run_mode = "paddle"
			
 
				 
			
--- a/paddlex/model.py
+++ b/paddlex/model.py
@@ -83,15 +83,30 @@ class _ModelBasedConfig(_BaseModel):
 
				 
			
 
				         model_dir = predict_kwargs.pop("model_dir", None)
			
 
				 
			
 
				-        device = self._config.Global.get("device")
			
 
				-        kernel_option = predict_kwargs.pop("kernel_option", {})
			
 
				-        pp_option = PaddlePredictorOption(self._model_name, **kernel_option)
			
 
				+        UNSET = object()
			
 
				+        device = self._config.Global.get("device", None)
			
 
				+        kernel_option = predict_kwargs.pop("kernel_option", UNSET)
			
 
				+        use_hpip = predict_kwargs.pop("use_hpip", UNSET)
			
 
				+        hpi_config = predict_kwargs.pop("hpi_config", UNSET)
			
 
				+
			
 
				+        create_predictor_kwargs = {}
			
 
				+        if kernel_option is not UNSET:
			
 
				+            kernel_option.setdefault("model_name", self._model_name)
			
 
				+            create_predictor_kwargs["pp_option"] = PaddlePredictorOption(
			
 
				+                **kernel_option
			
 
				+            )
			
 
				+        if use_hpip is not UNSET:
			
 
				+            create_predictor_kwargs["use_hpip"] = use_hpip
			
 
				+        else:
			
 
				+            create_predictor_kwargs["use_hpip"] = False
			
 
				+        if hpi_config is not UNSET:
			
 
				+            create_predictor_kwargs["hpi_config"] = hpi_config
			
 
				 
			
 
				         predictor = create_predictor(
			
 
				             self._model_name,
			
 
				             model_dir,
			
 
				             device=device,
			
 
				-            pp_option=pp_option,
			
 
				+            **create_predictor_kwargs,
			
 
				         )
			
 
				         assert "input" in predict_kwargs
			
 
				         return predict_kwargs, predictor
			
--- a/paddlex/paddlex_cli.py
+++ b/paddlex/paddlex_cli.py
@@ -21,6 +21,7 @@ import shutil
 
				 from pathlib import Path
			
 
				 
			
 
				 from . import create_pipeline
			
 
				+from .constants import MODEL_FILE_PREFIX
			
 
				 from .inference.pipelines import load_pipeline_config
			
 
				 from .repo_manager import setup, get_all_supported_repo_names
			
 
				 from .utils.flags import FLAGS_json_format_model
			
@@ -123,7 +124,9 @@ def args_cfg():
 
				         help="Device to run the pipeline on (e.g., 'cpu', 'gpu:0').",
			
 
				     )
			
 
				     pipeline_group.add_argument(
			
 
				-        "--use_hpip", action="store_true", help="Enable HPIP acceleration if available."
			
 
				+        "--use_hpip",
			
 
				+        action="store_true",
			
 
				+        help="Enable HPIP acceleration by default.",
			
 
				     )
			
 
				     pipeline_group.add_argument(
			
 
				         "--get_pipeline_config",
			
@@ -154,15 +157,18 @@ def args_cfg():
 
				 
			
 
				     ################# paddle2onnx #################
			
 
				     paddle2onnx_group.add_argument(
			
 
				-        "--paddle2onnx", action="store_true", help="Convert Paddle model to ONNX format"
			
 
				+        "--paddle2onnx",
			
 
				+        action="store_true",
			
 
				+        help="Convert PaddlePaddle model to ONNX format",
			
 
				     )
			
 
				     paddle2onnx_group.add_argument(
			
 
				-        "--paddle_model_dir", type=str, help="Directory containing the Paddle model"
			
 
				+        "--paddle_model_dir",
			
 
				+        type=str,
			
 
				+        help="Directory containing the PaddlePaddle model",
			
 
				     )
			
 
				     paddle2onnx_group.add_argument(
			
 
				         "--onnx_model_dir",
			
 
				         type=str,
			
 
				-        default="onnx",
			
 
				         help="Output directory for the ONNX model",
			
 
				     )
			
 
				     paddle2onnx_group.add_argument(
			
@@ -223,21 +229,23 @@ def install(args):
 
				             )
			
 
				 
			
 
				     def _install_hpi_deps(device_type):
			
 
				-        support_device_type = ["cpu", "gpu"]
			
 
				-        if device_type not in support_device_type:
			
 
				+        supported_device_types = ["cpu", "gpu", "npu"]
			
 
				+        if device_type not in supported_device_types:
			
 
				             logging.error(
			
 
				                 "HPI installation failed!\n"
			
 
				                 "Supported device_type: %s. Your input device_type: %s.\n"
			
 
				                 "Please ensure the device_type is correct.",
			
 
				-                support_device_type,
			
 
				+                supported_device_types,
			
 
				                 device_type,
			
 
				             )
			
 
				             sys.exit(2)
			
 
				 
			
 
				         if device_type == "cpu":
			
 
				-            packages = ["ultra-infer-python", "paddlex-hpi"]
			
 
				+            packages = ["ultra-infer-python"]
			
 
				         elif device_type == "gpu":
			
 
				-            packages = ["ultra-infer-gpu-python", "paddlex-hpi"]
			
 
				+            packages = ["ultra-infer-gpu-python"]
			
 
				+        elif device_type == "npu":
			
 
				+            packages = ["ultra-infer-npu-python"]
			
 
				 
			
 
				         with importlib.resources.path("paddlex", "hpip_links.html") as f:
			
 
				             return subprocess.check_call(
			
@@ -336,10 +344,10 @@ def serve(pipeline, *, device, use_hpip, host, port):
 
				 
			
 
				 # TODO: Move to another module
			
 
				 def paddle_to_onnx(paddle_model_dir, onnx_model_dir, *, opset_version):
			
 
				-    PD_MODEL_FILE_PREFIX = "inference"
			
 
				-    PD_PARAMS_FILENAME = "inference.pdiparams"
			
 
				-    ONNX_MODEL_FILENAME = "inference.onnx"
			
 
				-    CONFIG_FILENAME = "inference.yml"
			
 
				+    PD_MODEL_FILE_PREFIX = MODEL_FILE_PREFIX
			
 
				+    PD_PARAMS_FILENAME = f"{MODEL_FILE_PREFIX}.pdiparams"
			
 
				+    ONNX_MODEL_FILENAME = f"{MODEL_FILE_PREFIX}.onnx"
			
 
				+    CONFIG_FILENAME = f"{MODEL_FILE_PREFIX}.yml"
			
 
				     ADDITIONAL_FILENAMES = ["scaler.pkl"]
			
 
				 
			
 
				     def _check_input_dir(input_dir, pd_model_file_ext):
			
@@ -407,6 +415,8 @@ def paddle_to_onnx(paddle_model_dir, onnx_model_dir, *, opset_version):
 
				             logging.info(f"Copied {src_path} to {dst_path}")
			
 
				 
			
 
				     paddle_model_dir = Path(paddle_model_dir)
			
 
				+    if not onnx_model_dir:
			
 
				+        onnx_model_dir = paddle_model_dir
			
 
				     onnx_model_dir = Path(onnx_model_dir)
			
 
				     logging.info(f"Input dir: {paddle_model_dir}")
			
 
				     logging.info(f"Output dir: {onnx_model_dir}")
			
--- a/paddlex/serving_requirements.txt
+++ b/paddlex/serving_requirements.txt
@@ -2,8 +2,6 @@ aiohttp>=3.9
 
				 bce-python-sdk>=0.9
			
 
				 fastapi>=0.110
			
 
				 filetype>=1.2
			
 
				-pydantic>=2
			
 
				 starlette>=0.36
			
 
				-typing_extensions>=4.11
			
 
				 uvicorn>=0.16
			
 
				 yarl>=1.9
			
--- a/paddlex/utils/config.py
+++ b/paddlex/utils/config.py
@@ -17,7 +17,6 @@ import copy
 
				 import argparse
			
 
				 import yaml
			
 
				 from . import logging
			
 
				-from .errors import raise_key_not_found_error
			
 
				 from .file_interface import custom_open
			
 
				 
			
 
				 __all__ = ["get_config"]
			
@@ -30,7 +29,7 @@ class AttrDict(dict):
 
				         if key in self:
			
 
				             return self[key]
			
 
				         else:
			
 
				-            raise raise_key_not_found_error(key, self)
			
 
				+            raise AttributeError(key)
			
 
				 
			
 
				     def __setattr__(self, key, value):
			
 
				         if key in self.__dict__:
			
--- a/requirements.txt
+++ b/requirements.txt
@@ -32,6 +32,8 @@ premailer
 
				 PyMuPDF
			
 
				 ujson
			
 
				 Pillow
			
 
				+pydantic>=2
			
 
				+typing_extensions>=4.11
			
 
				 ######## For Chatocrv3 #######
			
 
				 langchain==0.2.17
			
 
				 langchain-openai==0.1.25
			
--- a/setup.py
+++ b/setup.py
@@ -97,6 +97,7 @@ def packages_and_package_data():
 
				     pkg_data.append("serving_requirements.txt")
			
 
				     pkg_data.append("paddle2onnx_requirements.txt")
			
 
				     pkg_data.append("hpip_links.html")
			
 
				+    pkg_data.append("inference/utils/hpi_model_info_collection.json")
			
 
				     ops_file_dir = "paddlex/ops"
			
 
				     ops_file_types = ["h", "hpp", "cpp", "cc", "cu"]
			
 
				     return pkgs, {