Browse Source

[Feat] Support multi-backend inference (#2860)

* Enhance paddle inference and HPI info parsing

* Fix bugs and backport

* Backward compatibility

* Add comments

* Add TODO

* Refine pptrt

* Support multibackend inference

* Update requirements

* Polish

* Update

* Update

* Add full support

* Fix bugs

* Upper case to lower case

* Support CLI

* Support auto-config

* Check if ultra_infer is built with backend

* Add note

* add npu om backend

* Rename omruntime to om

* Fix style

* Replace lazy_paddle with paddle

* MBI -> HPI

* Fix bugs

* Update pir-trt cache dir

* Check trt dynamic shapes and input names matched

* Add refactor todo

* Fix bugs

* Check paddle2onnx

* Add missing is_built_with_om

* Update HPI ref

* Update comment

* npu om backend support static input shape

* Fix bugs

* Fix bug

* Fix bugs

* Remove paddlex-hpi

* Revert "Remove paddlex-hpi"

This reverts commit a847242d555644744a691723807e1fb159fbe2f1.

* Merge develop

* Support benchmark

* Fix

* Polish

* Add comment

* Update issue template

* Update CLI

* Fix

* Load dynamic shapes from cache when using tensorrt

* Revert "Replace lazy_paddle with paddle"

This reverts commit b1271b7cb463c35a612ce6c84a62d8ced7f03e05.

* Polish

* raise attributeerror in attrdict

* Allow setting use_hpip and hpi_config at any level

* Log model name and model dir when creating model in pipeline

* Fix import paddle

* Change debug message

* Update links

* Update NPU links

* Change use_hpip semance

* Allow setting device

* Optimize

* Fix bug

* Use benchmark results again

* Update benchmark results

* support om pipeline

* Fix bugs

---------

Co-authored-by: a31413510 <31413510@qq.com>
Co-authored-by: root <root@gzxj-inf-sci-k8s-910b-h12si7-0073.gzxj.baidu.com>
Lin Manhui 7 months ago
parent
commit
a65d269b21
99 changed files with 4056 additions and 625 deletions
  1. 3 3
      .github/ISSUE_TEMPLATE/3_deploy.md
  2. 1 1
      docs/module_usage/instructions/benchmark.en.md
  3. 1 1
      docs/module_usage/instructions/benchmark.md
  4. 10 1
      libs/ultra-infer/CMakeLists.txt
  5. 27 0
      libs/ultra-infer/UltraInfer.cmake.in
  6. 29 0
      libs/ultra-infer/cmake/om.cmake
  7. 1 0
      libs/ultra-infer/python/setup.py
  8. 2 0
      libs/ultra-infer/python/ultra_infer/__init__.py
  9. 4 0
      libs/ultra-infer/python/ultra_infer/c_lib_wrap.py.in
  10. 4 0
      libs/ultra-infer/python/ultra_infer/runtime.py
  11. 4 0
      libs/ultra-infer/ultra_infer/core/config.h.in
  12. 4 2
      libs/ultra-infer/ultra_infer/pybind/runtime.cc
  13. 578 0
      libs/ultra-infer/ultra_infer/runtime/backends/om/om_backend.cc
  14. 82 0
      libs/ultra-infer/ultra_infer/runtime/backends/om/om_backend.h
  15. 7 0
      libs/ultra-infer/ultra_infer/runtime/enum_variables.cc
  16. 6 2
      libs/ultra-infer/ultra_infer/runtime/enum_variables.h
  17. 1 0
      libs/ultra-infer/ultra_infer/runtime/option_pybind.cc
  18. 18 0
      libs/ultra-infer/ultra_infer/runtime/runtime.cc
  19. 1 0
      libs/ultra-infer/ultra_infer/runtime/runtime.h
  20. 8 0
      libs/ultra-infer/ultra_infer/runtime/runtime_option.cc
  21. 1 0
      libs/ultra-infer/ultra_infer/runtime/runtime_option.h
  22. 18 0
      paddlex/constants.py
  23. 15 11
      paddlex/hpip_links.html
  24. 1 0
      paddlex/inference/__init__.py
  25. 4 8
      paddlex/inference/models/3d_bev_detection/predictor.py
  26. 14 48
      paddlex/inference/models/__init__.py
  27. 4 8
      paddlex/inference/models/anomaly_detection/predictor.py
  28. 1 1
      paddlex/inference/models/base/__init__.py
  29. 0 1
      paddlex/inference/models/base/predictor/__init__.py
  30. 267 36
      paddlex/inference/models/base/predictor/base_predictor.py
  31. 0 159
      paddlex/inference/models/base/predictor/basic_predictor.py
  32. 1 1
      paddlex/inference/models/common/__init__.py
  33. 339 42
      paddlex/inference/models/common/static_infer.py
  34. 4 11
      paddlex/inference/models/formula_recognition/predictor.py
  35. 4 9
      paddlex/inference/models/image_classification/predictor.py
  36. 4 9
      paddlex/inference/models/image_feature/predictor.py
  37. 1 1
      paddlex/inference/models/image_multilabel_classification/predictor.py
  38. 4 9
      paddlex/inference/models/image_unwarping/predictor.py
  39. 1 6
      paddlex/inference/models/instance_segmentation/predictor.py
  40. 2 2
      paddlex/inference/models/multilingual_speech_recognition/predictor.py
  41. 3 8
      paddlex/inference/models/object_detection/predictor.py
  42. 3 7
      paddlex/inference/models/open_vocabulary_detection/predictor.py
  43. 1 1
      paddlex/inference/models/open_vocabulary_detection/processors/groundingdino_processors.py
  44. 3 7
      paddlex/inference/models/open_vocabulary_segmentation/predictor.py
  45. 2 1
      paddlex/inference/models/open_vocabulary_segmentation/processors/sam_processer.py
  46. 4 9
      paddlex/inference/models/semantic_segmentation/predictor.py
  47. 3 8
      paddlex/inference/models/table_structure_recognition/predictor.py
  48. 9 9
      paddlex/inference/models/text_detection/predictor.py
  49. 16 2
      paddlex/inference/models/text_detection/processors.py
  50. 8 10
      paddlex/inference/models/text_recognition/predictor.py
  51. 14 2
      paddlex/inference/models/text_recognition/processors.py
  52. 4 9
      paddlex/inference/models/ts_anomaly_detection/predictor.py
  53. 4 9
      paddlex/inference/models/ts_classification/predictor.py
  54. 4 9
      paddlex/inference/models/ts_forecasting/predictor.py
  55. 3 10
      paddlex/inference/models/video_classification/predictor.py
  56. 3 7
      paddlex/inference/models/video_detection/predictor.py
  57. 10 2
      paddlex/inference/pipelines/3d_bev_detection/pipeline.py
  58. 17 4
      paddlex/inference/pipelines/__init__.py
  59. 10 2
      paddlex/inference/pipelines/anomaly_detection/pipeline.py
  60. 5 1
      paddlex/inference/pipelines/attribute_recognition/pipeline.py
  61. 28 9
      paddlex/inference/pipelines/base.py
  62. 10 2
      paddlex/inference/pipelines/doc_preprocessor/pipeline.py
  63. 10 2
      paddlex/inference/pipelines/formula_recognition/pipeline.py
  64. 10 2
      paddlex/inference/pipelines/image_classification/pipeline.py
  65. 10 2
      paddlex/inference/pipelines/image_multilabel_classification/pipeline.py
  66. 10 2
      paddlex/inference/pipelines/instance_segmentation/pipeline.py
  67. 10 2
      paddlex/inference/pipelines/keypoint_detection/pipeline.py
  68. 11 3
      paddlex/inference/pipelines/layout_parsing/pipeline.py
  69. 9 2
      paddlex/inference/pipelines/layout_parsing/pipeline_v2.py
  70. 10 2
      paddlex/inference/pipelines/multilingual_speech_recognition/pipeline.py
  71. 10 2
      paddlex/inference/pipelines/object_detection/pipeline.py
  72. 17 3
      paddlex/inference/pipelines/ocr/pipeline.py
  73. 10 2
      paddlex/inference/pipelines/open_vocabulary_detection/pipeline.py
  74. 10 2
      paddlex/inference/pipelines/open_vocabulary_segmentation/pipeline.py
  75. 11 3
      paddlex/inference/pipelines/pp_chatocr/pipeline_base.py
  76. 10 3
      paddlex/inference/pipelines/pp_chatocr/pipeline_v3.py
  77. 10 3
      paddlex/inference/pipelines/pp_chatocr/pipeline_v4.py
  78. 6 2
      paddlex/inference/pipelines/pp_shitu_v2/pipeline.py
  79. 10 2
      paddlex/inference/pipelines/rotated_object_detection/pipeline.py
  80. 10 2
      paddlex/inference/pipelines/seal_recognition/pipeline.py
  81. 10 2
      paddlex/inference/pipelines/semantic_segmentation/pipeline.py
  82. 10 2
      paddlex/inference/pipelines/small_object_detection/pipeline.py
  83. 25 10
      paddlex/inference/pipelines/table_recognition/pipeline.py
  84. 40 18
      paddlex/inference/pipelines/table_recognition/pipeline_v2.py
  85. 10 2
      paddlex/inference/pipelines/ts_anomaly_detection/pipeline.py
  86. 10 2
      paddlex/inference/pipelines/ts_classification/pipeline.py
  87. 10 2
      paddlex/inference/pipelines/ts_forecasting/pipeline.py
  88. 10 2
      paddlex/inference/pipelines/video_classification/pipeline.py
  89. 13 3
      paddlex/inference/pipelines/video_detection/pipeline.py
  90. 11 13
      paddlex/inference/utils/benchmark.py
  91. 155 3
      paddlex/inference/utils/hpi.py
  92. 1874 0
      paddlex/inference/utils/hpi_model_info_collection.json
  93. 8 4
      paddlex/inference/utils/pp_option.py
  94. 19 4
      paddlex/model.py
  95. 23 13
      paddlex/paddlex_cli.py
  96. 0 2
      paddlex/serving_requirements.txt
  97. 1 2
      paddlex/utils/config.py
  98. 2 0
      requirements.txt
  99. 1 0
      setup.py

+ 3 - 3
.github/ISSUE_TEMPLATE/3_deploy.md

@@ -22,13 +22,13 @@ assignees: ''
 
     * 您是否完全按照[高性能推理文档教程](https://paddlepaddle.github.io/PaddleX/main/pipeline_deploy/high_performance_inference.html)跑通了流程?
 
-    * 您使用的是离线激活方式还是在线激活方式?
-
 2. 服务化部署
 
     * 您是否完全按照[服务化部署文档教程](https://paddlepaddle.github.io/PaddleX/main/pipeline_deploy/serving.html)跑通了流程?
 
-    * 您在服务化部署中是否有使用高性能推理插件,如果是,您使用的是离线激活方式还是在线激活方式?
+    * 您在服务化部署中是否有使用高性能推理插件?
+
+    * 您使用了哪一种服务化部署方案?
 
     * 如果是多语言调用的问题,请给出调用示例子。
 

+ 1 - 1
docs/module_usage/instructions/benchmark.en.md

@@ -133,7 +133,7 @@ After enabling the benchmark feature, the benchmark results will be automaticall
 Below is an example of the benchmark results obtained by running the example program in Section 2:
 
 ```
-                                               WarmUp Data
+                                               Warmup Data
 +-------+------------+-----------+----------------+------------------------+----------------------------+
 | Iters | Batch Size | Instances |      Type      | Avg Time Per Iter (ms) | Avg Time Per Instance (ms) |
 +-------+------------+-----------+----------------+------------------------+----------------------------+

+ 1 - 1
docs/module_usage/instructions/benchmark.md

@@ -133,7 +133,7 @@ python test_infer.py
 运行第2节的示例程序所得到的 benchmark 结果如下:
 
 ```
-                                               WarmUp Data
+                                               Warmup Data
 +-------+------------+-----------+----------------+------------------------+----------------------------+
 | Iters | Batch Size | Instances |      Type      | Avg Time Per Iter (ms) | Avg Time Per Instance (ms) |
 +-------+------------+-----------+----------------+------------------------+----------------------------+

+ 10 - 1
libs/ultra-infer/CMakeLists.txt

@@ -70,6 +70,7 @@ option(ENABLE_SOPHGO_BACKEND "Whether to enable SOPHON backend." OFF)
 option(ENABLE_TVM_BACKEND "Whether to enable TVM backend." OFF)
 option(ENABLE_LITE_BACKEND "Whether to enable paddle lite backend." OFF)
 option(ENABLE_HORIZON_BACKEND "Whether to enable HORIZON backend." OFF)
+option(ENABLE_OM_BACKEND "Whether to enable OM backend." OFF)
 option(ENABLE_VISION "Whether to enable vision models usage." OFF)
 option(ENABLE_TEXT "Whether to enable text models usage." OFF)
 option(ENABLE_FLYCV "Whether to enable flycv to boost image preprocess." OFF)
@@ -172,6 +173,7 @@ file(GLOB_RECURSE DEPLOY_SOPHGO_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ult
 file(GLOB_RECURSE DEPLOY_TVM_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultra_infer/runtime/backends/tvm/*.cc)
 file(GLOB_RECURSE DEPLOY_LITE_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultra_infer/runtime/backends/lite/*.cc)
 file(GLOB_RECURSE DEPLOY_PIPELINE_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultra_infer/pipeline/*.cc)
+file(GLOB_RECURSE DEPLOY_OM_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultra_infer/runtime/backends/om/*.cc)
 file(GLOB_RECURSE DEPLOY_VISION_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultra_infer/vision/*.cc)
 file(GLOB_RECURSE DEPLOY_TEXT_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultra_infer/text/*.cc)
 file(GLOB_RECURSE DEPLOY_PYBIND_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultra_infer/pybind/*.cc ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultra_infer/*_pybind.cc)
@@ -194,7 +196,7 @@ list(REMOVE_ITEM ALL_DEPLOY_SRCS ${DEPLOY_ORT_SRCS} ${DEPLOY_PADDLE_SRCS}
                                  ${DEPLOY_PIPELINE_SRCS} ${DEPLOY_RKNPU2_SRCS}
                                  ${DEPLOY_SOPHGO_SRCS}
                                  ${DEPLOY_HORIZON_SRCS} ${DEPLOY_TVM_SRCS}
-                                 ${DEPLOY_PADDLE_CUSTOM_OP_SRCS})
+                                 ${DEPLOY_PADDLE_CUSTOM_OP_SRCS} ${DEPLOY_OM_SRCS})
 
 
 set(DEPEND_LIBS "")
@@ -328,6 +330,13 @@ if(ENABLE_POROS_BACKEND)
   execute_process(COMMAND ${Python_EXECUTABLE} ${PROJECT_SOURCE_DIR}/scripts/copy_directory.py ${TRT_DIRECTORY}/lib ${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt/lib)
 endif()
 
+if(ENABLE_OM_BACKEND)
+  add_definitions(-DENABLE_OM_BACKEND)
+  list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_OM_SRCS})
+  include(${PROJECT_SOURCE_DIR}/cmake/om.cmake)
+  list(APPEND DEPEND_LIBS ${NPU_libs})
+endif()
+
 if(WITH_GPU)
   add_definitions(-DWITH_GPU)
   include_directories(${CUDA_DIRECTORY}/include)

+ 27 - 0
libs/ultra-infer/UltraInfer.cmake.in

@@ -32,6 +32,7 @@ set(ENABLE_PADDLE_BACKEND @ENABLE_PADDLE_BACKEND@)
 set(ENABLE_OPENVINO_BACKEND @ENABLE_OPENVINO_BACKEND@)
 set(ENABLE_POROS_BACKEND @ENABLE_POROS_BACKEND@)
 set(ENABLE_TRT_BACKEND @ENABLE_TRT_BACKEND@)
+set(ENABLE_OM_BACKEND @ENABLE_OM_BACKEND@)
 set(ENABLE_PADDLE2ONNX @ENABLE_PADDLE2ONNX@)
 set(BUILD_PADDLE2ONNX @BUILD_PADDLE2ONNX@)
 
@@ -179,6 +180,32 @@ if(ENABLE_POROS_BACKEND)
   list(APPEND ULTRAINFER_INCS ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/poros/include ${TORCH_INCLUDE})
 endif()
 
+if(ENABLE_OM_BACKEND)
+  set(LIB_PATH $ENV{NPU_HOST_LIB})
+  if (NOT DEFINED ENV{NPU_HOST_LIB})
+    set(LIB_PATH "/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/lib64")
+    message(STATUS "set default LIB_PATH: ${LIB_PATH}")
+  else()
+    message(STATUS "set LIB_PATH: ${LIB_PATH}")
+  endif ()
+
+  set(INC_PATH $ENV{DDK_PATH})
+  if (NOT DEFINED ENV{DDK_PATH})
+    set(INC_PATH "/usr/local/Ascend/ascend-toolkit/latest")
+    message(STATUS "set default INC_PATH: ${INC_PATH}")
+  else()
+    message(STATUS "set INC_PATH: ${INC_PATH}")
+  endif ()
+
+  set(NPU_libs ascendcl stdc++)
+
+  link_directories(${LIB_PATH})
+
+  list(APPEND ULTRAINFER_LIBS ${NPU_libs})
+
+  list(APPEND ULTRAINFER_INCS ${INC_PATH}/runtime/include/)
+endif()
+
 if(WITH_GPU)
   if(NOT CUDA_DIRECTORY)
     set(CUDA_DIRECTORY "/usr/local/cuda")

+ 29 - 0
libs/ultra-infer/cmake/om.cmake

@@ -0,0 +1,29 @@
+set(CMAKE_CXX_FLAGS_DEBUG "-fPIC -O0 -g -Wall")
+set(CMAKE_CXX_FLAGS_RELEASE "-fPIC -O2 -Wall")
+
+set(INC_PATH $ENV{DDK_PATH})
+if (NOT DEFINED ENV{DDK_PATH})
+    set(INC_PATH "/usr/local/Ascend/ascend-toolkit/latest")
+    message(STATUS "set default INC_PATH: ${INC_PATH}")
+else()
+    message(STATUS "set INC_PATH: ${INC_PATH}")
+endif ()
+
+set(LIB_PATH $ENV{NPU_HOST_LIB})
+if (NOT DEFINED ENV{NPU_HOST_LIB})
+    set(LIB_PATH "/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/lib64")
+    message(STATUS "set default LIB_PATH: ${LIB_PATH}")
+else()
+    message(STATUS "set LIB_PATH: ${LIB_PATH}")
+endif ()
+
+
+set(NPU_libs ascendcl stdc++)
+
+include_directories(
+   ${INC_PATH}/runtime/include/
+)
+
+link_directories(
+    ${LIB_PATH}
+)

+ 1 - 0
libs/ultra-infer/python/setup.py

@@ -69,6 +69,7 @@ setup_configs["ENABLE_PADDLE_BACKEND"] = os.getenv("ENABLE_PADDLE_BACKEND", "OFF
 setup_configs["ENABLE_POROS_BACKEND"] = os.getenv("ENABLE_POROS_BACKEND", "OFF")
 setup_configs["ENABLE_TRT_BACKEND"] = os.getenv("ENABLE_TRT_BACKEND", "OFF")
 setup_configs["ENABLE_LITE_BACKEND"] = os.getenv("ENABLE_LITE_BACKEND", "OFF")
+setup_configs["ENABLE_OM_BACKEND"] = os.getenv("ENABLE_OM_BACKEND", "OFF")
 setup_configs["ENABLE_PADDLE2ONNX"] = os.getenv("ENABLE_PADDLE2ONNX", "OFF")
 setup_configs["ENABLE_VISION"] = os.getenv("ENABLE_VISION", "OFF")
 setup_configs["ENABLE_FLYCV"] = os.getenv("ENABLE_FLYCV", "OFF")

+ 2 - 0
libs/ultra-infer/python/ultra_infer/__init__.py

@@ -152,6 +152,8 @@ from .c_lib_wrap import (
     is_built_with_paddle,
     is_built_with_trt,
     get_default_cuda_directory,
+    is_built_with_openvino,
+    is_built_with_om,
 )
 
 

+ 4 - 0
libs/ultra-infer/python/ultra_infer/c_lib_wrap.py.in

@@ -43,6 +43,10 @@ def is_built_with_openvino() ->bool:
     return True if "@ENABLE_OPENVINO_BACKEND@" == "ON" else False
 
 
+def is_built_with_om() ->bool:
+    return True if "@ENABLE_OM_BACKEND@" == "ON" else False
+
+
 def get_default_cuda_directory() -> str:
     if not is_built_with_gpu():
        return ""

+ 4 - 0
libs/ultra-infer/python/ultra_infer/runtime.py

@@ -321,6 +321,10 @@ class RuntimeOption:
         """Wrapper function of use_lite_backend(), use Paddle Lite backend, support inference Paddle model on ARM CPU."""
         return self.use_lite_backend()
 
+    def use_om_backend(self):
+        """Use Om backend, support inference Om model on NPU"""
+        return self._option.use_om_backend()
+
     def set_lite_context_properties(self, context_properties):
         """Set nnadapter context properties for Paddle Lite backend."""
         logging.warning(

+ 4 - 0
libs/ultra-infer/ultra_infer/core/config.h.in

@@ -84,3 +84,7 @@
 #ifndef ENABLE_TVM_BACKEND
 #cmakedefine ENABLE_TVM_BACKEND
 #endif
+
+#ifndef ENABLE_OM_BACKEND
+#cmakedefine ENABLE_OM_BACKEND
+#endif

+ 4 - 2
libs/ultra-infer/ultra_infer/pybind/runtime.cc

@@ -137,7 +137,8 @@ void BindRuntime(pybind11::module &m) {
       .value("RKNPU2", Backend::RKNPU2)
       .value("SOPHGOTPU", Backend::SOPHGOTPU)
       .value("TVM", Backend::TVM)
-      .value("LITE", Backend::LITE);
+      .value("LITE", Backend::LITE)
+      .value("OMONNPU", Backend::OMONNPU);
   pybind11::enum_<ModelFormat>(m, "ModelFormat", pybind11::arithmetic(),
                                "ModelFormat for inference.")
       .value("PADDLE", ModelFormat::PADDLE)
@@ -145,7 +146,8 @@ void BindRuntime(pybind11::module &m) {
       .value("RKNN", ModelFormat::RKNN)
       .value("SOPHGO", ModelFormat::SOPHGO)
       .value("ONNX", ModelFormat::ONNX)
-      .value("TVMFormat", ModelFormat::TVMFormat);
+      .value("TVMFormat", ModelFormat::TVMFormat)
+      .value("OM", ModelFormat::OM);
   pybind11::enum_<Device>(m, "Device", pybind11::arithmetic(),
                           "Device for inference.")
       .value("CPU", Device::CPU)

+ 578 - 0
libs/ultra-infer/ultra_infer/runtime/backends/om/om_backend.cc

@@ -0,0 +1,578 @@
+// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultra_infer/runtime/backends/om/om_backend.h"
+
+#include "acl/acl.h"
+#include <chrono>
+#include <sys/stat.h>
+
+namespace ultra_infer {
+
+bool OmBackend::aclInitFlag = false;
+
+OmBackend::~OmBackend() {
+  FreeInputBuffer();
+  FreeOutputBuffer();
+  DestroyInput();
+  DestroyOutput();
+  DestroyResource();
+}
+
+TensorInfo OmBackend::GetInputInfo(int index) {
+  FDASSERT(index < NumInputs(),
+           "The index: %d should less than the number of inputs: %d.", index,
+           NumInputs());
+  return inputs_desc_[index];
+}
+
+std::vector<TensorInfo> OmBackend::GetInputInfos() { return inputs_desc_; }
+
+TensorInfo OmBackend::GetOutputInfo(int index) {
+  FDASSERT(index < NumOutputs(),
+           "The index: %d should less than the number of outputs %d.", index,
+           NumOutputs());
+
+  return outputs_desc_[index];
+}
+
+std::vector<TensorInfo> OmBackend::GetOutputInfos() { return outputs_desc_; }
+
+bool OmBackend::Init(const RuntimeOption &runtime_option) {
+  // ACL init
+  aclError ret = InitResource();
+  if (ret != true) {
+    FDERROR << "execute InitResource failed, errorCode = "
+            << static_cast<int32_t>(ret);
+    return false;
+  }
+
+  // model init;
+  const char *omModelPath = (char *)runtime_option.model_file.data();
+  FDINFO << "omModelPath = " << omModelPath;
+  ret = LoadModel(omModelPath);
+  if (ret != true) {
+    FDERROR << "execute LoadModel failed";
+    return false;
+  }
+
+  // build input/output info
+  ret = CreateModelDesc();
+  if (ret != true) {
+    FDERROR << "execute CreateModelDesc failed";
+    return false;
+  }
+  ret = CreateInput();
+  if (ret != true) {
+    FDERROR << "execute CreateInput failed";
+    FreeInputBuffer();
+    return false;
+  }
+  ret = CreateOutput();
+  if (ret != true) {
+    FDERROR << "execute CreateOutput failed";
+    FreeInputBuffer();
+    return false;
+  }
+
+  return true;
+}
+
+bool OmBackend::Infer(std::vector<FDTensor> &inputs,
+                      std::vector<FDTensor> *outputs, bool copy_to_fd) {
+  // set context
+  aclError aclRet = aclrtSetCurrentContext(context_);
+  if (aclRet != ACL_SUCCESS) {
+    FDERROR << "aclrtSetCurrentContext failed"
+            << ", errorCode is " << static_cast<int32_t>(aclRet);
+    return false;
+  }
+
+  // Judge whether the input and output size are the same
+  if (inputs.size() != inputs_desc_.size()) {
+    FDERROR << "[OmBackend] Size of the inputs(" << inputs.size()
+            << ") should keep same with the inputs of this model("
+            << inputs_desc_.size() << ")." << std::endl;
+    FreeInputBuffer();
+    return false;
+  }
+
+  // cp input tensor to inputBuffer
+  for (size_t i = 0; i < inputs.size(); ++i) {
+    if (inputs[i].Data() == nullptr) {
+      FDERROR << "inputs[i].Data is NULL." << std::endl;
+      return false;
+    }
+    size_t modelInputSize = aclmdlGetInputSizeByIndex(modelDesc_, i);
+    aclRet = aclrtMemcpy(inputBuffer[i], modelInputSize, inputs[i].Data(),
+                         inputs[i].Nbytes(), ACL_MEMCPY_DEVICE_TO_DEVICE);
+    if (aclRet != ACL_SUCCESS) {
+      FDERROR << "memcpy d2d failed. buffer size is " << modelInputSize
+              << ", inputs[i].Nbytes() is " << inputs[i].Nbytes()
+              << ", errorCode is " << static_cast<int32_t>(aclRet);
+      return false;
+    }
+  }
+
+  bool ret = Execute();
+  if (ret != true) {
+    FDERROR << "execute inference failed";
+    FreeInputBuffer();
+    DestroyInput();
+    DestroyOutput();
+    return false;
+  }
+
+  // cp outputbuffer to outputs
+  outputs->resize(outputs_desc_.size());
+  std::vector<int64_t> temp_shape(4);
+  for (size_t i = 0; i < outputs_desc_.size(); ++i) {
+    temp_shape.resize(outputs_desc_[i].shape.size());
+    for (int j = 0; j < outputs_desc_[i].shape.size(); ++j) {
+      temp_shape[j] = outputs_desc_[i].shape[j];
+    }
+    (*outputs)[i].Resize(temp_shape, outputs_desc_[i].dtype,
+                         outputs_desc_[i].name);
+    size_t modelOutputSize = aclmdlGetOutputSizeByIndex(modelDesc_, i);
+    if (modelOutputSize != (*outputs)[i].Nbytes()) {
+      FDERROR << "output size is not match, index: " << i
+              << ", modelOutputSize:" << modelOutputSize
+              << ", (*outputs)[i].Nbytes():" << (*outputs)[i].Nbytes();
+      return false;
+    }
+    aclError aclRet = aclrtMemcpy(
+        (*outputs)[i].MutableData(), (*outputs)[i].Nbytes(), outputBuffer[i],
+        (*outputs)[i].Nbytes(), ACL_MEMCPY_DEVICE_TO_HOST);
+    if (aclRet != ACL_SUCCESS) {
+      FDERROR << "memcpy h2d failed. buffer size is " << (*outputs)[i].Nbytes()
+              << ", errorCode is " << static_cast<int32_t>(aclRet);
+      return false;
+    }
+  }
+
+  return true;
+}
+
+bool OmBackend::InitResource() {
+  // ACL init
+  aclError ret;
+  if (aclInitFlag == false) {
+    ret = aclInit(NULL);
+    if (ret != ACL_SUCCESS) {
+      FDERROR << "acl init failed, errorCode = " << static_cast<int32_t>(ret);
+      return false;
+    }
+    aclInitFlag = true;
+  }
+  // set device
+  ret = aclrtSetDevice(deviceId_);
+  if (ret != ACL_SUCCESS) {
+    FDERROR << "acl set device" << deviceId_
+            << " failed, errorCode = " << static_cast<int32_t>(ret);
+    return false;
+  }
+
+  // create context (set current)
+  ret = aclrtCreateContext(&context_, deviceId_);
+  if (ret != ACL_SUCCESS) {
+    FDERROR << "acl create context failed, deviceId" << deviceId_
+            << ", errorCode = " << static_cast<int32_t>(ret);
+    return false;
+  }
+
+  // create stream
+  ret = aclrtCreateStream(&stream_);
+  if (ret != ACL_SUCCESS) {
+    FDERROR << "acl create stream failed, deviceId" << deviceId_
+            << ", errorCode = " << static_cast<int32_t>(ret);
+    return false;
+  }
+
+  // get run mode
+  // runMode is ACL_HOST which represents app is running in host
+  // runMode is ACL_DEVICE which represents app is running in device
+  aclrtRunMode runMode;
+  ret = aclrtGetRunMode(&runMode);
+  if (ret != ACL_SUCCESS) {
+    FDERROR << "acl get run mode failed, errorCode = "
+            << static_cast<int32_t>(ret);
+    return false;
+  }
+
+  return true;
+}
+
+bool OmBackend::LoadModel(const char *modelPath) {
+  if (loadFlag_) {
+    FDERROR << "model has already been loaded";
+    return false;
+  }
+  aclError ret = aclmdlQuerySize(modelPath, &modelWorkSize_, &modelWeightSize_);
+  if (ret != ACL_SUCCESS) {
+    FDERROR << "query model false, model file is" << modelPath
+            << ", errorCode is " << static_cast<int32_t>(ret);
+    return false;
+  }
+  // using ACL_MEM_MALLOC_HUGE_FIRST to malloc memory, huge memory is preferred
+  // to use and huge memory can improve performance.
+  ret = aclrtMalloc(&modelWorkPtr_, modelWorkSize_, ACL_MEM_MALLOC_HUGE_FIRST);
+  if (ret != ACL_SUCCESS) {
+    FDERROR << "malloc buffer for work failed, require size is "
+            << modelWorkSize_ << ", errorCode is " << static_cast<int32_t>(ret);
+    return false;
+  }
+
+  // using ACL_MEM_MALLOC_HUGE_FIRST to malloc memory, huge memory is preferred
+  // to use and huge memory can improve performance.
+  ret = aclrtMalloc(&modelWeightPtr_, modelWeightSize_,
+                    ACL_MEM_MALLOC_HUGE_FIRST);
+  if (ret != ACL_SUCCESS) {
+    FDERROR << "malloc buffer for weight failed, require size is "
+            << modelWeightSize_ << ", errorCode is "
+            << static_cast<int32_t>(ret);
+    return false;
+  }
+
+  ret = aclmdlLoadFromFileWithMem(modelPath, &modelId_, modelWorkPtr_,
+                                  modelWorkSize_, modelWeightPtr_,
+                                  modelWeightSize_);
+  if (ret != ACL_SUCCESS) {
+    FDERROR << "load model from file failed, model file is " << modelPath
+            << ", errorCode is " << static_cast<int32_t>(ret);
+    return false;
+  }
+
+  loadFlag_ = true;
+  FDINFO << "load model " << modelPath << " success";
+  return true;
+}
+
+bool OmBackend::Execute() {
+  aclError ret = aclmdlExecute(modelId_, input_, output_);
+  if (ret != ACL_SUCCESS) {
+    FDERROR << "execute model failed, modelId is " << modelId_
+            << ", errorCode is " << static_cast<int32_t>(ret);
+    return false;
+  }
+  return true;
+}
+
+bool OmBackend::CreateModelDesc() {
+  modelDesc_ = aclmdlCreateDesc();
+  if (modelDesc_ == nullptr) {
+    FDERROR << "create model description failed";
+    return false;
+  }
+
+  aclError ret = aclmdlGetDesc(modelDesc_, modelId_);
+  if (ret != ACL_SUCCESS) {
+    FDERROR << "get model description failed, modelId is " << modelId_
+            << ", errorCode is " << static_cast<int32_t>(ret);
+    return false;
+  }
+  return true;
+}
+
+bool OmBackend::CreateInput() {
+  // om used in this sample has only one input
+  if (modelDesc_ == nullptr) {
+    FDERROR << "no model description, create input failed";
+    return false;
+  }
+
+  // input:aclmdlDataset
+  input_ = aclmdlCreateDataset();
+  if (input_ == nullptr) {
+    FDERROR << "can't create dataset, create input failed";
+    return false;
+  }
+
+  // get input nums
+  size_t inputNum = aclmdlGetNumInputs(modelDesc_);
+  inputs_desc_.resize(inputNum);
+  inputBuffer.resize(inputNum, nullptr);
+  // inputBuffer = {nullptr};
+  for (size_t i = 0; i < inputNum; ++i) {
+    // get input size
+    size_t modelInputSize = aclmdlGetInputSizeByIndex(modelDesc_, i);
+    aclError ret =
+        aclrtMalloc(&inputBuffer[i], modelInputSize, ACL_MEM_MALLOC_HUGE_FIRST);
+    if (ret != ACL_SUCCESS) {
+      FDERROR << "can't malloc buffer, size is " << modelInputSize
+              << ", errorCode is " << static_cast<int32_t>(ret);
+      return false;
+    }
+    // inputData:aclDataBuffer
+    aclDataBuffer *inputData =
+        aclCreateDataBuffer(inputBuffer[i], modelInputSize);
+    if (inputData == nullptr) {
+      FDERROR << "can't create data buffer, create input failed";
+      return false;
+    }
+
+    // add aclDataBuffer to input
+    ret = aclmdlAddDatasetBuffer(input_, inputData);
+    if (ret != ACL_SUCCESS) {
+      FDERROR << "add input dataset buffer failed, errorCode is "
+              << static_cast<int32_t>(ret);
+      (void)aclDestroyDataBuffer(inputData);
+      inputData = nullptr;
+      return false;
+    }
+
+    // get name/shape/dtype of input to build inputs_desc_
+    const char *name;
+    name = aclmdlGetInputNameByIndex(modelDesc_, i);
+    std::string temp_name = name;
+
+    std::vector<int> temp_shape{};
+    aclmdlIODims dims;
+    ret = aclmdlGetInputDims(modelDesc_, i, &dims);
+    if (ret != ACL_SUCCESS) {
+      FDERROR << "get input tensor dims fail! ret=" << ret << std::endl;
+      return false;
+    }
+    int n_dims = (int)dims.dimCount;
+    temp_shape.resize(n_dims);
+    for (int j = 0; j < n_dims; j++) {
+      temp_shape[j] = (int)dims.dims[j];
+    }
+
+    aclDataType dtype = aclmdlGetInputDataType(modelDesc_, i);
+    FDDataType temp_dtype;
+    switch (dtype) {
+    case ACL_BOOL:
+      temp_dtype = FDDataType::BOOL;
+      break;
+    case ACL_UINT8:
+      temp_dtype = FDDataType::UINT8;
+      break;
+    case ACL_INT8:
+      temp_dtype = FDDataType::INT8;
+      break;
+    case ACL_INT16:
+      temp_dtype = FDDataType::INT16;
+      break;
+    case ACL_INT32:
+      temp_dtype = FDDataType::INT32;
+      break;
+    case ACL_INT64:
+      temp_dtype = FDDataType::INT64;
+      break;
+    case ACL_FLOAT16:
+      temp_dtype = FDDataType::FP16;
+      break;
+    case ACL_FLOAT:
+      temp_dtype = FDDataType::FP32;
+      break;
+    case ACL_DOUBLE:
+      temp_dtype = FDDataType::FP64;
+      break;
+    default:
+      FDERROR << "unsupported input tensor dtype: " << (int)dtype;
+      return false;
+    }
+    TensorInfo temp_input_info = {temp_name, temp_shape, temp_dtype};
+    inputs_desc_[i] = temp_input_info;
+  }
+  return true;
+}
+
+bool OmBackend::CreateOutput() {
+  if (modelDesc_ == nullptr) {
+    FDERROR << "no model description, create ouput failed";
+    return false;
+  }
+
+  output_ = aclmdlCreateDataset();
+  if (output_ == nullptr) {
+    FDERROR << "can't create dataset, create output failed";
+    return false;
+  }
+
+  size_t outputSize = aclmdlGetNumOutputs(modelDesc_);
+  outputs_desc_.resize(outputSize);
+  outputBuffer.resize(outputSize, nullptr);
+  for (size_t i = 0; i < outputSize; ++i) {
+    size_t modelOutputSize = aclmdlGetOutputSizeByIndex(modelDesc_, i);
+    aclError ret = aclrtMalloc(&outputBuffer[i], modelOutputSize,
+                               ACL_MEM_MALLOC_HUGE_FIRST);
+    if (ret != ACL_SUCCESS) {
+      FDERROR << "can't malloc buffer, size is " << modelOutputSize
+              << ", errorCode is " << static_cast<int32_t>(ret);
+      return false;
+    }
+
+    aclDataBuffer *outputData =
+        aclCreateDataBuffer(outputBuffer[i], modelOutputSize);
+    if (outputData == nullptr) {
+      FDERROR << "can't create data buffer, create output failed";
+      return false;
+    }
+
+    ret = aclmdlAddDatasetBuffer(output_, outputData);
+    if (ret != ACL_SUCCESS) {
+      FDERROR << "add output dataset buffer failed, errorCode is "
+              << static_cast<int32_t>(ret);
+      (void)aclDestroyDataBuffer(outputData);
+      return false;
+    }
+
+    const char *name;
+    name = aclmdlGetOutputNameByIndex(modelDesc_, i);
+    std::string temp_name = name;
+
+    std::vector<int> temp_shape{};
+    aclmdlIODims dims;
+    ret = aclmdlGetOutputDims(modelDesc_, i, &dims);
+    if (ret != ACL_SUCCESS) {
+      FDERROR << "get output tensor dims fail! ret=" << ret << std::endl;
+      return false;
+    }
+    int n_dims = (int)dims.dimCount;
+    temp_shape.resize(n_dims);
+    for (int j = 0; j < n_dims; j++) {
+      temp_shape[j] = (int)dims.dims[j];
+    }
+
+    aclDataType dtype = aclmdlGetOutputDataType(modelDesc_, i);
+    FDDataType temp_dtype;
+    switch (dtype) {
+    case ACL_BOOL:
+      temp_dtype = FDDataType::BOOL;
+      break;
+    case ACL_UINT8:
+      temp_dtype = FDDataType::UINT8;
+      break;
+    case ACL_INT8:
+      temp_dtype = FDDataType::INT8;
+      break;
+    case ACL_INT16:
+      temp_dtype = FDDataType::INT16;
+      break;
+    case ACL_INT32:
+      temp_dtype = FDDataType::INT32;
+      break;
+    case ACL_INT64:
+      temp_dtype = FDDataType::INT64;
+      break;
+    case ACL_FLOAT16:
+      temp_dtype = FDDataType::FP16;
+      break;
+    case ACL_FLOAT:
+      temp_dtype = FDDataType::FP32;
+      break;
+    case ACL_DOUBLE:
+      temp_dtype = FDDataType::FP64;
+      break;
+    default:
+      FDERROR << "unsupported output tensor dtype: " << (int)dtype;
+      return false;
+    }
+    TensorInfo temp_output_info = {temp_name, temp_shape, temp_dtype};
+    outputs_desc_[i] = temp_output_info;
+  }
+  return true;
+}
+
+void OmBackend::FreeInputBuffer() {
+  for (int i = 0; i < (int)inputs_desc_.size(); ++i) {
+    if (inputBuffer[i] != nullptr) {
+      (void)aclrtFree(inputBuffer[i]);
+      inputBuffer[i] = nullptr;
+    }
+  }
+}
+
+void OmBackend::FreeOutputBuffer() {
+  for (int i = 0; i < (int)outputs_desc_.size(); ++i) {
+    if (outputBuffer[i] != nullptr) {
+      (void)aclrtFree(outputBuffer[i]);
+      outputBuffer[i] = nullptr;
+    }
+  }
+}
+
+void OmBackend::DestroyInput() {
+  if (input_ == nullptr) {
+    return;
+  }
+
+  for (size_t i = 0; i < aclmdlGetDatasetNumBuffers(input_); ++i) {
+    aclDataBuffer *dataBuffer = aclmdlGetDatasetBuffer(input_, i);
+    (void)aclDestroyDataBuffer(dataBuffer);
+  }
+  (void)aclmdlDestroyDataset(input_);
+  input_ = nullptr;
+}
+
+void OmBackend::DestroyOutput() {
+  if (output_ == nullptr) {
+    return;
+  }
+
+  for (size_t i = 0; i < aclmdlGetDatasetNumBuffers(output_); ++i) {
+    aclDataBuffer *dataBuffer = aclmdlGetDatasetBuffer(output_, i);
+    void *data = aclGetDataBufferAddr(dataBuffer);
+    (void)aclrtFree(data);
+    (void)aclDestroyDataBuffer(dataBuffer);
+  }
+
+  (void)aclmdlDestroyDataset(output_);
+  output_ = nullptr;
+}
+
+void OmBackend::DestroyResource() {
+  // set context
+  aclError ret = aclrtSetCurrentContext(context_);
+  if (ret != ACL_SUCCESS) {
+    FDERROR << "aclrtSetCurrentContext failed"
+            << ", errorCode is " << static_cast<int32_t>(ret);
+    return;
+  }
+  if (stream_ != nullptr) {
+    ret = aclrtDestroyStream(stream_);
+    if (ret != ACL_SUCCESS) {
+      FDERROR << "destroy stream failed, errorCode = "
+              << static_cast<int32_t>(ret);
+    }
+    stream_ = nullptr;
+  }
+
+  if (context_ != nullptr) {
+    ret = aclrtDestroyContext(context_);
+    if (ret != ACL_SUCCESS) {
+      FDERROR << "destroy context failed, errorCode = "
+              << static_cast<int32_t>(ret);
+    }
+    context_ = nullptr;
+  }
+
+  ret = aclrtResetDevice(deviceId_);
+  if (ret != ACL_SUCCESS) {
+    FDERROR << "reset device " << deviceId_
+            << " failed, errorCode = " << static_cast<int32_t>(ret);
+  }
+
+  if (aclInitFlag == true) {
+    ret = aclFinalize();
+    if (ret != ACL_SUCCESS) {
+      FDERROR << "finalize acl failed, errorCode = "
+              << static_cast<int32_t>(ret);
+    }
+    aclInitFlag = false;
+  }
+}
+
+} // namespace ultra_infer

+ 82 - 0
libs/ultra-infer/ultra_infer/runtime/backends/om/om_backend.h

@@ -0,0 +1,82 @@
+// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <cstring>
+#include <iostream>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "acl/acl.h"
+#include "ultra_infer/core/fd_tensor.h"
+#include "ultra_infer/runtime/backends/backend.h"
+
+namespace ultra_infer {
+class OmBackend : public BaseBackend {
+public:
+  OmBackend() = default;
+  virtual ~OmBackend();
+
+  // OM Backend implementation.
+  bool Init(const RuntimeOption &runtime_option) override;
+
+  int NumInputs() const override {
+    return static_cast<int>(inputs_desc_.size());
+  }
+  int NumOutputs() const override {
+    return static_cast<int>(outputs_desc_.size());
+  }
+
+  TensorInfo GetInputInfo(int index) override;
+  TensorInfo GetOutputInfo(int index) override;
+  std::vector<TensorInfo> GetInputInfos() override;
+  std::vector<TensorInfo> GetOutputInfos() override;
+
+  bool Infer(std::vector<FDTensor> &inputs, std::vector<FDTensor> *outputs,
+             bool copy_to_fd = true) override;
+  static bool aclInitFlag;
+
+private:
+  std::vector<TensorInfo> inputs_desc_;
+  std::vector<TensorInfo> outputs_desc_;
+  std::vector<void *> inputBuffer;
+  std::vector<void *> outputBuffer;
+  bool loadFlag_ = false; // model load flag
+  int32_t deviceId_;
+  uint32_t modelId_;
+  size_t modelWorkSize_;   // model work memory buffer size
+  size_t modelWeightSize_; // model weight memory buffer size
+  void *modelWorkPtr_;     // model work memory buffer
+  void *modelWeightPtr_;   // model weight memory buffer
+  aclmdlDesc *modelDesc_;
+  aclmdlDataset *input_;
+  aclmdlDataset *output_;
+  aclrtContext context_;
+  aclrtStream stream_;
+
+  bool LoadModel(const char *modelPath);
+  bool Execute();
+  bool CreateInput();
+  void DestroyInput();
+  bool CreateOutput();
+  void DestroyOutput();
+  void DestroyResource();
+  bool CreateModelDesc();
+  void FreeInputBuffer();
+  void FreeOutputBuffer();
+  bool InitResource();
+};
+} // namespace ultra_infer

+ 7 - 0
libs/ultra-infer/ultra_infer/runtime/enum_variables.cc

@@ -36,6 +36,8 @@ std::ostream &operator<<(std::ostream &out, const Backend &backend) {
     out << "Backend::HORIZONNPU";
   } else if (backend == Backend::TVM) {
     out << "Backend::TVM";
+  } else if (backend == Backend::OMONNPU) {
+    out << "Backend::OMONNPU";
   } else {
     out << "UNKNOWN-Backend";
   }
@@ -92,6 +94,8 @@ std::ostream &operator<<(std::ostream &out, const ModelFormat &format) {
     out << "ModelFormat::HORIZON";
   } else if (format == ModelFormat::TVMFormat) {
     out << "ModelFormat::TVMFormat";
+  } else if (format == ModelFormat::OM) {
+    out << "ModelFormat::OM";
   } else {
     out << "UNKNOWN-ModelFormat";
   }
@@ -130,6 +134,9 @@ std::vector<Backend> GetAvailableBackends() {
 #ifdef ENABLE_TVM_BACKEND
   backends.push_back(Backend::TVM);
 #endif
+#ifdef ENABLE_OM_BACKEND
+  backends.push_back(Backend::OMONNPU);
+#endif
   return backends;
 }
 

+ 6 - 2
libs/ultra-infer/ultra_infer/runtime/enum_variables.h

@@ -39,6 +39,7 @@ enum Backend {
   SOPHGOTPU,  ///< SOPHGOTPU, support SOPHGO format model, Sophgo TPU only
   HORIZONNPU, ///< HORIZONNPU, support Horizon format model, Horizon NPU
   TVM,        ///< TVMBackend, support TVM format model, CPU / Nvidia GPU
+  OMONNPU,    ///< OMONNPU, support OM format model, OM NPU
 };
 
 /**
@@ -74,6 +75,7 @@ enum ModelFormat {
   SOPHGO,      ///< Model with SOPHGO format
   HORIZON,     ///< Model with HORIZON format
   TVMFormat,   ///< Model with TVM format
+  OM,          ///< Model with OM format
 };
 
 /// Describle all the supported backends for specified model format
@@ -87,7 +89,8 @@ static std::map<ModelFormat, std::vector<Backend>>
         {ModelFormat::HORIZON, {Backend::HORIZONNPU}},
         {ModelFormat::TORCHSCRIPT, {Backend::POROS}},
         {ModelFormat::SOPHGO, {Backend::SOPHGOTPU}},
-        {ModelFormat::TVMFormat, {Backend::TVM}}};
+        {ModelFormat::TVMFormat, {Backend::TVM}},
+        {ModelFormat::OM, {Backend::OMONNPU}}};
 
 /// Describle all the supported backends for specified device
 static std::map<Device, std::vector<Backend>> s_default_backends_by_device = {
@@ -104,7 +107,8 @@ static std::map<Device, std::vector<Backend>> s_default_backends_by_device = {
     {Device::KUNLUNXIN, {Backend::LITE, Backend::PDINFER}},
     {Device::ASCEND, {Backend::LITE}},
     {Device::SOPHGOTPUD, {Backend::SOPHGOTPU}},
-    {Device::DIRECTML, {Backend::ORT}}};
+    {Device::DIRECTML, {Backend::ORT}},
+    {Device::ASCEND, {Backend::OMONNPU}}};
 
 inline bool Supported(ModelFormat format, Backend backend) {
   auto iter = s_default_backends_by_format.find(format);

+ 1 - 0
libs/ultra-infer/ultra_infer/runtime/option_pybind.cc

@@ -65,6 +65,7 @@ void BindOption(pybind11::module &m) {
       .def("use_trt_backend", &RuntimeOption::UseTrtBackend)
       .def("use_openvino_backend", &RuntimeOption::UseOpenVINOBackend)
       .def("use_lite_backend", &RuntimeOption::UseLiteBackend)
+      .def("use_om_backend", &RuntimeOption::UseOMBackend)
       .def("enable_pinned_memory", &RuntimeOption::EnablePinnedMemory)
       .def("disable_pinned_memory", &RuntimeOption::DisablePinnedMemory)
       .def("use_ipu", &RuntimeOption::UseIpu)

+ 18 - 0
libs/ultra-infer/ultra_infer/runtime/runtime.cc

@@ -63,6 +63,10 @@
 #include "ultra_infer/runtime/backends/tvm/tvm_backend.h"
 #endif
 
+#ifdef ENABLE_OM_BACKEND
+#include "ultra_infer/runtime/backends/om/om_backend.h"
+#endif
+
 namespace ultra_infer {
 
 bool AutoSelectBackend(RuntimeOption &option) {
@@ -147,6 +151,8 @@ bool Runtime::Init(const RuntimeOption &_option) {
     CreateHorizonBackend();
   } else if (option.backend == Backend::TVM) {
     CreateTVMBackend();
+  } else if (option.backend == Backend::OMONNPU) {
+    CreateOMBackend();
   } else {
     std::string msg = Str(GetAvailableBackends());
     FDERROR << "The compiled UltraInfer only supports " << msg << ", "
@@ -397,6 +403,18 @@ void Runtime::CreatePorosBackend() {
          << "." << std::endl;
 }
 
+void Runtime::CreateOMBackend() {
+#ifdef ENABLE_OM_BACKEND
+  backend_ = utils::make_unique<OmBackend>();
+  FDASSERT(backend_->Init(option), "Failed to initialize om backend.");
+#else
+  FDASSERT(false, "OMBackend is not available, please compiled with ",
+           " ENABLE_OM_BACKEND=ON.");
+#endif
+  FDINFO << "Runtime initialized with Backend::OMONNPU in " << option.device
+         << "." << std::endl;
+}
+
 // only for poros backend
 bool Runtime::Compile(std::vector<std::vector<FDTensor>> &prewarm_tensors) {
 #ifdef ENABLE_POROS_BACKEND

+ 1 - 0
libs/ultra-infer/ultra_infer/runtime/runtime.h

@@ -119,6 +119,7 @@ private:
   void CreateSophgoNPUBackend();
   void CreatePorosBackend();
   void CreateTVMBackend();
+  void CreateOMBackend();
   std::unique_ptr<BaseBackend> backend_;
   std::vector<FDTensor> input_tensors_;
   std::vector<FDTensor> output_tensors_;

+ 8 - 0
libs/ultra-infer/ultra_infer/runtime/runtime_option.cc

@@ -234,6 +234,14 @@ void RuntimeOption::UseHorizonNPUBackend() {
 #endif
 }
 
+void RuntimeOption::UseOMBackend() {
+#ifdef ENABLE_OM_BACKEND
+  backend = Backend::OMONNPU;
+#else
+  FDASSERT(false, "The FastDeploy didn't compile with npu om");
+#endif
+}
+
 void RuntimeOption::SetPaddleMKLDNN(bool pd_mkldnn) {
   FDWARNING << "`RuntimeOption::SetPaddleMKLDNN` will be removed in v1.2.0, "
                "please modify its member variable directly, e.g "

+ 1 - 0
libs/ultra-infer/ultra_infer/runtime/runtime_option.h

@@ -277,6 +277,7 @@ struct ULTRAINFER_DECL RuntimeOption {
   void UseLiteBackend();
   void UseHorizonNPUBackend();
   void UseTVMBackend();
+  void UseOMBackend();
 };
 
 } // namespace ultra_infer

+ 18 - 0
paddlex/constants.py

@@ -0,0 +1,18 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Final
+
+
+MODEL_FILE_PREFIX: Final[str] = "inference"

+ 15 - 11
paddlex/hpip_links.html

@@ -4,16 +4,20 @@
         <title>PaddleX HPIP Links</title>
     </head>
     <body>
-        <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/hpi/ultra_infer/releases/3.0.0rc0/ultra_infer_python-1.0.0.3.0.0rc0-cp38-cp38-linux_x86_64.whl"></a>
-        <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/hpi/ultra_infer/releases/3.0.0rc0/ultra_infer_python-1.0.0.3.0.0rc0-cp39-cp39-linux_x86_64.whl"></a>
-        <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/hpi/ultra_infer/releases/3.0.0rc0/ultra_infer_python-1.0.0.3.0.0rc0-cp310-cp310-linux_x86_64.whl"></a>
-        <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/hpi/ultra_infer/releases/3.0.0rc0/ultra_infer_python-1.0.0.3.0.0rc0-cp311-cp311-linux_x86_64.whl"></a>
-        <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/hpi/ultra_infer/releases/3.0.0rc0/ultra_infer_python-1.0.0.3.0.0rc0-cp312-cp312-linux_x86_64.whl"></a>
-        <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/hpi/ultra_infer/releases/3.0.0rc0/ultra_infer_gpu_python-1.0.0.3.0.0rc0-cp38-cp38-linux_x86_64.whl"></a>
-        <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/hpi/ultra_infer/releases/3.0.0rc0/ultra_infer_gpu_python-1.0.0.3.0.0rc0-cp39-cp39-linux_x86_64.whl"></a>
-        <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/hpi/ultra_infer/releases/3.0.0rc0/ultra_infer_gpu_python-1.0.0.3.0.0rc0-cp310-cp310-linux_x86_64.whl"></a>
-        <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/hpi/ultra_infer/releases/3.0.0rc0/ultra_infer_gpu_python-1.0.0.3.0.0rc0-cp311-cp311-linux_x86_64.whl"></a>
-        <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/hpi/ultra_infer/releases/3.0.0rc0/ultra_infer_gpu_python-1.0.0.3.0.0rc0-cp312-cp312-linux_x86_64.whl"></a>
-        <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/hpi/ultra_infer/releases/3.0.0rc0/paddlex_hpi-3.0.0rc0-py3-none-any.whl"></a>
+        <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/hpi/ultra_infer/releases/new_hpi/v1.0.0/ultra_infer_python-1.0.0-cp38-cp38-linux_x86_64.whl"></a>
+        <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/hpi/ultra_infer/releases/new_hpi/v1.0.0/ultra_infer_python-1.0.0-cp39-cp39-linux_x86_64.whl"></a>
+        <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/hpi/ultra_infer/releases/new_hpi/v1.0.0/ultra_infer_python-1.0.0-cp310-cp310-linux_x86_64.whl"></a>
+        <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/hpi/ultra_infer/releases/new_hpi/v1.0.0/ultra_infer_python-1.0.0-cp311-cp311-linux_x86_64.whl"></a>
+        <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/hpi/ultra_infer/releases/new_hpi/v1.0.0/ultra_infer_python-1.0.0-cp312-cp312-linux_x86_64.whl"></a>
+        <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/hpi/ultra_infer/releases/new_hpi/v1.0.0/ultra_infer_gpu_python-1.0.0-cp38-cp38-linux_x86_64.whl"></a>
+        <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/hpi/ultra_infer/releases/new_hpi/v1.0.0/ultra_infer_gpu_python-1.0.0-cp39-cp39-linux_x86_64.whl"></a>
+        <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/hpi/ultra_infer/releases/new_hpi/v1.0.0/ultra_infer_gpu_python-1.0.0-cp310-cp310-linux_x86_64.whl"></a>
+        <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/hpi/ultra_infer/releases/new_hpi/v1.0.0/ultra_infer_gpu_python-1.0.0-cp311-cp311-linux_x86_64.whl"></a>
+        <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/hpi/ultra_infer/releases/new_hpi/v1.0.0/ultra_infer_gpu_python-1.0.0-cp312-cp312-linux_x86_64.whl"></a>
+        <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/hpi/ultra_infer/releases/new_hpi/v1.0.0/ultra_infer_npu_python-1.0.0-cp38-cp38-linux_x86_64.whl"></a>
+        <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/hpi/ultra_infer/releases/new_hpi/v1.0.0/ultra_infer_npu_python-1.0.0-cp39-cp39-linux_x86_64.whl"></a>
+        <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/hpi/ultra_infer/releases/new_hpi/v1.0.0/ultra_infer_npu_python-1.0.0-cp310-cp310-linux_x86_64.whl"></a>
+        <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/hpi/ultra_infer/releases/new_hpi/v1.0.0/ultra_infer_npu_python-1.0.0-cp311-cp311-linux_x86_64.whl"></a>
+        <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/hpi/ultra_infer/releases/new_hpi/v1.0.0/ultra_infer_npu_python-1.0.0-cp312-cp312-linux_x86_64.whl"></a>
     </body>
 </html>

+ 1 - 0
paddlex/inference/__init__.py

@@ -16,4 +16,5 @@ from ..utils import logging
 
 from .pipelines import create_pipeline, load_pipeline_config
 from .models import create_predictor
+from .utils.hpi import HPIConfig
 from .utils.pp_option import PaddlePredictorOption

+ 4 - 8
paddlex/inference/models/3d_bev_detection/predictor.py

@@ -27,7 +27,7 @@ MODELS = getattr(module_3d_model_list, "MODELS")
 from ...common.batch_sampler import Det3DBatchSampler
 from ...common.reader import ReadNuscenesData
 from ..common import StaticInfer
-from ..base import BasicPredictor
+from ..base import BasePredictor
 from ..base.predictor.base_predictor import PredictionWrap
 from .processors import (
     LoadPointsFromFile,
@@ -42,8 +42,8 @@ from .processors import (
 from .result import BEV3DDetResult
 
 
-class BEVDet3DPredictor(BasicPredictor):
-    """BEVDet3DPredictor that inherits from BasicPredictor."""
+class BEVDet3DPredictor(BasePredictor):
+    """BEVDet3DPredictor that inherits from BasePredictor."""
 
     entities = MODELS
 
@@ -105,11 +105,7 @@ class BEVDet3DPredictor(BasicPredictor):
                 pre_tfs[name] = op
         pre_tfs["GetInferInput"] = GetInferInput()
 
-        infer = StaticInfer(
-            model_dir=self.model_dir,
-            model_prefix=self.MODEL_FILE_PREFIX,
-            option=self.pp_option,
-        )
+        infer = self.create_static_infer()
 
         return pre_tfs, infer
 

+ 14 - 48
paddlex/inference/models/__init__.py

@@ -14,11 +14,12 @@
 
 
 from pathlib import Path
-from typing import Any, Dict, Optional
+from typing import Any, Dict, Optional, Union
 from importlib import import_module
 from ...utils import errors
+from ..utils.hpi import HPIConfig
 from ..utils.official_models import official_models
-from .base import BasePredictor, BasicPredictor
+from .base import BasePredictor
 
 from .image_classification import ClasPredictor
 from .object_detection import DetPredictor
@@ -53,38 +54,13 @@ module_3d_bev_detection = import_module(".3d_bev_detection", "paddlex.inference.
 BEVDet3DPredictor = getattr(module_3d_bev_detection, "BEVDet3DPredictor")
 
 
-def _create_hp_predictor(
-    model_name, model_dir, device, config, hpi_params, *args, **kwargs
-):
-    try:
-        from paddlex_hpi.models import HPPredictor
-    except ModuleNotFoundError:
-        raise RuntimeError(
-            "The PaddleX HPI plugin is not properly installed, and the high-performance model inference features are not available."
-        ) from None
-    try:
-        predictor = HPPredictor.get(model_name)(
-            model_dir=model_dir,
-            config=config,
-            device=device,
-            *args,
-            hpi_params=hpi_params,
-            **kwargs,
-        )
-    except errors.others.ClassNotFoundException:
-        raise ValueError(
-            f"{model_name} is not supported by the PaddleX HPI plugin."
-        ) from None
-    return predictor
-
-
 def create_predictor(
     model_name: str,
     model_dir: Optional[str] = None,
     device=None,
     pp_option=None,
     use_hpip: bool = False,
-    hpi_params: Optional[Dict[str, Any]] = None,
+    hpi_config: Optional[Union[Dict[str, Any], HPIConfig]] = None,
     *args,
     **kwargs,
 ) -> BasePredictor:
@@ -97,26 +73,16 @@ def create_predictor(
     assert (
         model_name == config["Global"]["model_name"]
     ), f"Model name mismatch,please input the correct model dir."
-
-    if use_hpip:
-        return _create_hp_predictor(
-            model_name=model_name,
-            model_dir=model_dir,
-            config=config,
-            hpi_params=hpi_params,
-            device=device,
-            *args,
-            **kwargs,
-        )
-    else:
-        return BasicPredictor.get(model_name)(
-            model_dir=model_dir,
-            config=config,
-            device=device,
-            pp_option=pp_option,
-            *args,
-            **kwargs,
-        )
+    return BasePredictor.get(model_name)(
+        model_dir=model_dir,
+        config=config,
+        device=device,
+        pp_option=pp_option,
+        use_hpip=use_hpip,
+        hpi_config=hpi_config,
+        *args,
+        **kwargs,
+    )
 
 
 def check_model(model):

+ 4 - 8
paddlex/inference/models/anomaly_detection/predictor.py

@@ -28,12 +28,12 @@ from ..common import (
     StaticInfer,
 )
 from .processors import MapToMask
-from ..base import BasicPredictor
+from ..base import BasePredictor
 from .result import UadResult
 
 
-class UadPredictor(BasicPredictor):
-    """UadPredictor that inherits from BasicPredictor."""
+class UadPredictor(BasePredictor):
+    """UadPredictor that inherits from BasePredictor."""
 
     entities = MODELS
 
@@ -82,11 +82,7 @@ class UadPredictor(BasicPredictor):
             preprocessors[name] = op
         preprocessors["ToBatch"] = ToBatch()
 
-        infer = StaticInfer(
-            model_dir=self.model_dir,
-            model_prefix=self.MODEL_FILE_PREFIX,
-            option=self.pp_option,
-        )
+        infer = self.create_static_infer()
         postprocessors = {"Map_to_mask": MapToMask()}
         return preprocessors, infer, postprocessors
 

+ 1 - 1
paddlex/inference/models/base/__init__.py

@@ -12,4 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from .predictor import BasePredictor, BasicPredictor
+from .predictor import BasePredictor

+ 0 - 1
paddlex/inference/models/base/predictor/__init__.py

@@ -13,4 +13,3 @@
 # limitations under the License.
 
 from .base_predictor import BasePredictor
-from .basic_predictor import BasicPredictor

+ 267 - 36
paddlex/inference/models/base/predictor/base_predictor.py

@@ -12,13 +12,28 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import List, Dict, Any, Iterator
+from abc import ABC, abstractmethod
+from copy import deepcopy
 from pathlib import Path
-from abc import abstractmethod, ABC
-
-from .....utils.flags import INFER_BENCHMARK
-from ....utils.io import YAMLReader
+from typing import Any, List, Dict, Iterator, Optional, Union
+
+from pydantic import ValidationError
+
+from ..... import constants
+from .....utils import logging
+from .....utils.device import get_default_device, parse_device
+from .....utils.flags import (
+    INFER_BENCHMARK,
+    INFER_BENCHMARK_WARMUP,
+    INFER_BENCHMARK_ITERS,
+)
+from .....utils.subclass_register import AutoRegisterABCMetaClass
 from ....common.batch_sampler import BaseBatchSampler
+from ....utils.benchmark import benchmark, ENTRY_POINT_NAME
+from ....utils.hpi import HPIInfo, HPIConfig
+from ....utils.io import YAMLReader
+from ....utils.pp_option import PaddlePredictorOption
+from ...common import HPInfer, PaddleInfer
 
 
 class PredictionWrap:
@@ -52,19 +67,45 @@ class PredictionWrap:
         return {key: self._data[key][idx] for key in self._keys}
 
 
-class BasePredictor(ABC):
-    """BasePredictor."""
-
-    MODEL_FILE_PREFIX = "inference"
-
-    def __init__(self, model_dir: str, config: Dict = None) -> None:
+class BasePredictor(
+    ABC,
+    metaclass=AutoRegisterABCMetaClass,
+):
+    MODEL_FILE_PREFIX = constants.MODEL_FILE_PREFIX
+
+    __is_base = True
+
+    def __init__(
+        self,
+        model_dir: str,
+        config: Optional[Dict[str, Any]] = None,
+        *,
+        device: Optional[str] = None,
+        batch_size: int = 1,
+        pp_option: Optional[PaddlePredictorOption] = None,
+        use_hpip: bool = False,
+        hpi_config: Optional[Union[Dict[str, Any], HPIConfig]] = None,
+    ) -> None:
         """Initializes the BasePredictor.
 
         Args:
-            model_dir (str): The directory where the static model files is stored.
-            config (dict, optional): The configuration of model to infer. Defaults to None.
+            model_dir (str): The directory where the model files are stored.
+            config (Optional[Dict[str, Any]], optional): The model configuration
+                dictionary. Defaults to None.
+            device (Optional[str], optional): The device to run the inference
+                engine on. Defaults to None.
+            batch_size (int, optional): The batch size to predict.
+                Defaults to 1.
+            pp_option (Optional[PaddlePredictorOption], optional): The inference
+                engine options. Defaults to None.
+            use_hpip (bool, optional): Whether to use high-performance inference
+                plugin. Defaults to False.
+            hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
+                The high-performance inference configuration dictionary.
+                Defaults to None.
         """
         super().__init__()
+
         self.model_dir = Path(model_dir)
         self.config = config if config else self.load_config(self.model_dir)
         self.batch_sampler = self._build_batch_sampler()
@@ -73,6 +114,19 @@ class BasePredictor(ABC):
         # alias predict() to the __call__()
         self.predict = self.__call__
 
+        self.batch_sampler.batch_size = batch_size
+        self._use_hpip = use_hpip
+        if not use_hpip:
+            if hpi_config is not None:
+                logging.warning("`hpi_config` will be ignored when not using HPIP.")
+            self._pp_option = self._prepare_pp_option(pp_option, device)
+        else:
+            if pp_option is not None:
+                logging.warning("`pp_option` will be ignored when using HPIP.")
+            self._hpi_config = self._prepare_hpi_config(hpi_config, device)
+
+        logging.debug(f"{self.__class__.__name__}: {self.model_dir}")
+
     @property
     def config_path(self) -> str:
         """
@@ -93,40 +147,105 @@ class BasePredictor(ABC):
         """
         return self.config["Global"]["model_name"]
 
-    @classmethod
-    def get_config_path(cls, model_dir) -> str:
-        """Get the path to the configuration file for the given model directory.
+    @property
+    def pp_option(self) -> PaddlePredictorOption:
+        if not hasattr(self, "_pp_option"):
+            raise AttributeError(f"{repr(self)} has no attribute 'pp_option'.")
+        return self._pp_option
+
+    @property
+    def hpi_config(self) -> HPIConfig:
+        if not hasattr(self, "_hpi_config"):
+            raise AttributeError(f"{repr(self)} has no attribute 'hpi_config'.")
+        return self._hpi_config
+
+    @property
+    def use_hpip(self) -> bool:
+        return self._use_hpip
+
+    def __call__(
+        self,
+        input: Any,
+        batch_size: Optional[int] = None,
+        **kwargs: Any,
+    ) -> Iterator[Any]:
+        """
+        Predict with the input data.
 
         Args:
-            model_dir (Path): The directory where the static model files is stored.
+            input (Any): The input data to be predicted.
+            batch_size (int, optional): The batch size to use. Defaults to None.
+            **kwargs (Dict[str, Any]): Additional keyword arguments to set up predictor.
 
         Returns:
-            Path: The path to the configuration file.
+            Iterator[Any]: An iterator yielding the prediction output.
         """
-        return model_dir / f"{cls.MODEL_FILE_PREFIX}.yml"
+        self.set_predictor(batch_size)
+        if INFER_BENCHMARK:
+            # TODO(zhang-prog): Get metadata of input data
+            @benchmark.timeit_with_options(name=ENTRY_POINT_NAME)
+            def _apply(input, **kwargs):
+                return list(self.apply(input, **kwargs))
+
+            if isinstance(input, list):
+                raise TypeError("`input` cannot be a list in benchmark mode")
+            input = [input] * batch_size
+
+            if not (INFER_BENCHMARK_WARMUP > 0 or INFER_BENCHMARK_ITERS > 0):
+                raise RuntimeError(
+                    "At least one of `INFER_BENCHMARK_WARMUP` and `INFER_BENCHMARK_ITERS` must be greater than zero"
+                )
+
+            if INFER_BENCHMARK_WARMUP > 0:
+                benchmark.start_warmup()
+                for _ in range(INFER_BENCHMARK_WARMUP):
+                    output = _apply(input, **kwargs)
+                benchmark.collect(batch_size)
+                benchmark.stop_warmup()
+
+            if INFER_BENCHMARK_ITERS > 0:
+                for _ in range(INFER_BENCHMARK_ITERS):
+                    output = _apply(input, **kwargs)
+                benchmark.collect(batch_size)
+
+            yield output[0]
+        else:
+            yield from self.apply(input, **kwargs)
 
-    @classmethod
-    def load_config(cls, model_dir) -> Dict:
-        """Load the configuration from the specified model directory.
+    def set_predictor(
+        self,
+        batch_size: Optional[int] = None,
+    ) -> None:
+        """
+        Sets the predictor configuration.
 
         Args:
-            model_dir (Path): The where the static model files is stored.
+            batch_size (Optional[int], optional): The batch size to use. Defaults to None.
 
         Returns:
-            dict: The loaded configuration dictionary.
+            None
         """
-        yaml_reader = YAMLReader()
-        return yaml_reader.read(cls.get_config_path(model_dir))
-
-    @abstractmethod
-    def __call__(self, input: Any, **kwargs: Dict[str, Any]) -> Iterator[Any]:
-        """Predict with the given input and additional keyword arguments."""
-        raise NotImplementedError
-
-    @abstractmethod
-    def set_predictor(self, batch_size: int = None, device: str = None, *args) -> None:
-        """Sets up the predictor."""
-        raise NotImplementedError
+        if batch_size:
+            self.batch_sampler.batch_size = batch_size
+
+    def get_hpi_info(self):
+        if "Hpi" not in self.config:
+            return None
+        try:
+            return HPIInfo.model_validate(self.config["Hpi"])
+        except ValidationError as e:
+            logging.exception("The HPI info in the model config file is invalid.")
+            raise RuntimeError(f"Invalid HPI info: {str(e)}") from e
+
+    def create_static_infer(self):
+        if not self._use_hpip:
+            return PaddleInfer(self.model_dir, self.MODEL_FILE_PREFIX, self._pp_option)
+        else:
+            return HPInfer(
+                self.model_dir,
+                self.MODEL_FILE_PREFIX,
+                self._hpi_config,
+            )
 
     def apply(self, input: Any, **kwargs) -> Iterator[Any]:
         """
@@ -164,6 +283,31 @@ class BasePredictor(ABC):
         """
         raise NotImplementedError
 
+    @classmethod
+    def get_config_path(cls, model_dir) -> str:
+        """Get the path to the configuration file for the given model directory.
+
+        Args:
+            model_dir (Path): The directory where the static model files is stored.
+
+        Returns:
+            Path: The path to the configuration file.
+        """
+        return model_dir / f"{cls.MODEL_FILE_PREFIX}.yml"
+
+    @classmethod
+    def load_config(cls, model_dir) -> Dict:
+        """Load the configuration from the specified model directory.
+
+        Args:
+            model_dir (Path): The where the static model files is stored.
+
+        Returns:
+            dict: The loaded configuration dictionary.
+        """
+        yaml_reader = YAMLReader()
+        return yaml_reader.read(cls.get_config_path(model_dir))
+
     @abstractmethod
     def _build_batch_sampler(self) -> BaseBatchSampler:
         """Build batch sampler.
@@ -181,3 +325,90 @@ class BasePredictor(ABC):
             type: The result class.
         """
         raise NotImplementedError
+
+    def _prepare_pp_option(
+        self,
+        pp_option: Optional[PaddlePredictorOption],
+        device: Optional[str],
+    ) -> PaddlePredictorOption:
+        if pp_option is None or device is not None:
+            device_info = self._get_device_info(device)
+        else:
+            device_info = None
+        if pp_option is None:
+            pp_option = PaddlePredictorOption(model_name=self.model_name)
+        if device_info:
+            pp_option.device_type = device_info[0]
+            pp_option.device_id = device_info[1]
+        hpi_info = self.get_hpi_info()
+        if hpi_info is not None:
+            hpi_info = hpi_info.model_dump(exclude_unset=True)
+            if pp_option.trt_dynamic_shapes is None:
+                trt_dynamic_shapes = (
+                    hpi_info.get("backend_configs", {})
+                    .get("paddle_infer", {})
+                    .get("trt_dynamic_shapes", None)
+                )
+                if trt_dynamic_shapes is not None:
+                    logging.debug(
+                        "TensorRT dynamic shapes set to %s", trt_dynamic_shapes
+                    )
+                    pp_option.trt_dynamic_shapes = trt_dynamic_shapes
+            if pp_option.trt_dynamic_shape_input_data is None:
+                trt_dynamic_shape_input_data = (
+                    hpi_info.get("backend_configs", {})
+                    .get("paddle_infer", {})
+                    .get("trt_dynamic_shape_input_data", None)
+                )
+                if trt_dynamic_shape_input_data is not None:
+                    logging.debug(
+                        "TensorRT dynamic shape input data set to %s",
+                        trt_dynamic_shape_input_data,
+                    )
+                    pp_option.trt_dynamic_shape_input_data = (
+                        trt_dynamic_shape_input_data
+                    )
+        return pp_option
+
+    def _prepare_hpi_config(
+        self,
+        hpi_config: Optional[Union[Dict[str, Any], HPIConfig]],
+        device: Optional[str],
+    ) -> HPIConfig:
+        if hpi_config is None:
+            hpi_config = {}
+        elif isinstance(hpi_config, HPIConfig):
+            hpi_config = hpi_config.model_dump(exclude_unset=True)
+        else:
+            hpi_config = deepcopy(hpi_config)
+
+        if "model_name" not in hpi_config:
+            hpi_config["model_name"] = self.model_name
+
+        if device is not None or "device_type" not in hpi_config:
+            device_type, device_id = self._get_device_info(device)
+            hpi_config["device_type"] = device_type
+            if device is not None or "device_id" not in hpi_config:
+                hpi_config["device_id"] = device_id
+
+        if "hpi_info" not in hpi_config:
+            hpi_info = self.get_hpi_info()
+            if hpi_info is not None:
+                hpi_config["hpi_info"] = hpi_info
+
+        hpi_config = HPIConfig.model_validate(hpi_config)
+
+        return hpi_config
+
+    # Should this be static?
+    def _get_device_info(self, device):
+        if device is None:
+            device = get_default_device()
+        device_type, device_ids = parse_device(device)
+        if device_ids is not None:
+            device_id = device_ids[0]
+        else:
+            device_id = None
+        if device_ids and len(device_ids) > 1:
+            logging.debug("Got multiple device IDs. Using the first one: %d", device_id)
+        return device_type, device_id

+ 0 - 159
paddlex/inference/models/base/predictor/basic_predictor.py

@@ -1,159 +0,0 @@
-# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from typing import Dict, Any, Iterator
-from abc import abstractmethod
-
-from .....utils.subclass_register import AutoRegisterABCMetaClass
-from .....utils.flags import (
-    INFER_BENCHMARK,
-    INFER_BENCHMARK_WARMUP,
-    INFER_BENCHMARK_ITERS,
-)
-from .....utils import logging
-from ....utils.pp_option import PaddlePredictorOption
-from ....utils.benchmark import benchmark, ENTRY_POINT_NAME
-from .base_predictor import BasePredictor
-
-
-class BasicPredictor(
-    BasePredictor,
-    metaclass=AutoRegisterABCMetaClass,
-):
-    """BasicPredictor."""
-
-    __is_base = True
-
-    def __init__(
-        self,
-        model_dir: str,
-        config: Dict[str, Any] = None,
-        device: str = None,
-        batch_size: int = 1,
-        pp_option: PaddlePredictorOption = None,
-    ) -> None:
-        """Initializes the BasicPredictor.
-
-        Args:
-            model_dir (str): The directory where the model files are stored.
-            config (Dict[str, Any], optional): The configuration dictionary. Defaults to None.
-            device (str, optional): The device to run the inference engine on. Defaults to None.
-            batch_size (int, optional): The batch size to predict. Defaults to 1.
-            pp_option (PaddlePredictorOption, optional): The inference engine options. Defaults to None.
-        """
-        super().__init__(model_dir=model_dir, config=config)
-        if not pp_option:
-            pp_option = PaddlePredictorOption(model_name=self.model_name)
-        if device:
-            pp_option.set_device(device)
-        trt_dynamic_shapes = (
-            self.config.get("Hpi", {})
-            .get("backend_configs", {})
-            .get("paddle_infer", {})
-            .get("trt_dynamic_shapes", None)
-        )
-        if trt_dynamic_shapes:
-            pp_option.trt_dynamic_shapes = trt_dynamic_shapes
-        trt_dynamic_shape_input_data = (
-            self.config.get("Hpi", {})
-            .get("backend_configs", {})
-            .get("paddle_infer", {})
-            .get("trt_dynamic_shape_input_data", None)
-        )
-        if trt_dynamic_shape_input_data:
-            pp_option.trt_dynamic_shape_input_data = trt_dynamic_shape_input_data
-
-        self.pp_option = pp_option
-        self.pp_option.batch_size = batch_size
-        self.batch_sampler.batch_size = batch_size
-
-        logging.debug(f"{self.__class__.__name__}: {self.model_dir}")
-
-    def __call__(
-        self,
-        input: Any,
-        batch_size: int = None,
-        device: str = None,
-        pp_option: PaddlePredictorOption = None,
-        **kwargs: Dict[str, Any],
-    ) -> Iterator[Any]:
-        """
-        Predict with the input data.
-
-        Args:
-            input (Any): The input data to be predicted.
-            batch_size (int, optional): The batch size to use. Defaults to None.
-            device (str, optional): The device to run the predictor on. Defaults to None.
-            pp_option (PaddlePredictorOption, optional): The predictor options to set. Defaults to None.
-            **kwargs (Dict[str, Any]): Additional keyword arguments to set up predictor.
-
-        Returns:
-            Iterator[Any]: An iterator yielding the prediction output.
-        """
-        self.set_predictor(batch_size, device, pp_option)
-        if INFER_BENCHMARK:
-            # TODO(zhang-prog): Get metadata of input data
-            @benchmark.timeit_with_options(name=ENTRY_POINT_NAME)
-            def _apply(input, **kwargs):
-                return list(self.apply(input, **kwargs))
-
-            if isinstance(input, list):
-                raise TypeError("`input` cannot be a list in benchmark mode")
-            input = [input] * batch_size
-
-            if not (INFER_BENCHMARK_WARMUP > 0 or INFER_BENCHMARK_ITERS > 0):
-                raise RuntimeError(
-                    "At least one of `INFER_BENCHMARK_WARMUP` and `INFER_BENCHMARK_ITERS` must be greater than zero"
-                )
-
-            if INFER_BENCHMARK_WARMUP > 0:
-                benchmark.start_warmup()
-                for _ in range(INFER_BENCHMARK_WARMUP):
-                    output = _apply(input, **kwargs)
-                benchmark.collect(batch_size)
-                benchmark.stop_warmup()
-
-            if INFER_BENCHMARK_ITERS > 0:
-                for _ in range(INFER_BENCHMARK_ITERS):
-                    output = _apply(input, **kwargs)
-                benchmark.collect(batch_size)
-
-            yield output[0]
-        else:
-            yield from self.apply(input, **kwargs)
-
-    def set_predictor(
-        self,
-        batch_size: int = None,
-        device: str = None,
-        pp_option: PaddlePredictorOption = None,
-    ) -> None:
-        """
-        Sets the predictor configuration.
-
-        Args:
-            batch_size (int, optional): The batch size to use. Defaults to None.
-            device (str, optional): The device to run the predictor on. Defaults to None.
-            pp_option (PaddlePredictorOption, optional): The predictor options to set. Defaults to None.
-
-        Returns:
-            None
-        """
-        if batch_size:
-            self.batch_sampler.batch_size = batch_size
-            self.pp_option.batch_size = batch_size
-        if device and device != self.pp_option.device:
-            self.pp_option.set_device(device)
-        if pp_option and pp_option != self.pp_option:
-            self.pp_option = pp_option

+ 1 - 1
paddlex/inference/models/common/__init__.py

@@ -30,6 +30,6 @@ from .ts import (
     TStoArray,
     TStoBatch,
 )
-from .static_infer import StaticInfer
+from .static_infer import StaticInfer, PaddleInfer, HPInfer
 
 from .tokenizer import PretrainedTokenizer, PretrainedTokenizer

+ 339 - 42
paddlex/inference/models/common/static_infer.py

@@ -12,10 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import abc
+import importlib.util
+import subprocess
 from typing import Sequence, List
 from pathlib import Path
 
-import lazy_paddle
+import lazy_paddle as paddle
 import numpy as np
 
 from ....utils import logging
@@ -26,37 +29,44 @@ from ....utils.flags import (
     INFER_BENCHMARK_USE_NEW_INFER_API,
 )
 from ...utils.benchmark import benchmark, set_inference_operations
-from ...utils.hpi import get_model_paths
+from ...utils.hpi import (
+    HPIConfig,
+    ONNXRuntimeConfig,
+    OpenVINOConfig,
+    TensorRTConfig,
+    OMConfig,
+    get_model_paths,
+    suggest_inference_backend_and_config,
+)
 from ...utils.pp_option import PaddlePredictorOption
 from ...utils.trt_config import DISABLE_TRT_HALF_OPS_CONFIG
 
 
 CACHE_DIR = ".cache"
 
-if INFER_BENCHMARK_USE_NEW_INFER_API:
-    INFERENCE_OPERATIONS = [
-        "PaddleCopyToDevice",
-        "PaddleCopyToHost",
-        "PaddleModelInfer",
-    ]
-else:
-    INFERENCE_OPERATIONS = ["PaddleInferChainLegacy"]
+INFERENCE_OPERATIONS = [
+    "PaddleCopyToDevice",
+    "PaddleCopyToHost",
+    "PaddleModelInfer",
+    "PaddleInferChainLegacy",
+    "MultiBackendInfer",
+]
 set_inference_operations(INFERENCE_OPERATIONS)
 
 
 # XXX: Better use Paddle Inference API to do this
 def _pd_dtype_to_np_dtype(pd_dtype):
-    if pd_dtype == lazy_paddle.inference.DataType.FLOAT64:
+    if pd_dtype == paddle.inference.DataType.FLOAT64:
         return np.float64
-    elif pd_dtype == lazy_paddle.inference.DataType.FLOAT32:
+    elif pd_dtype == paddle.inference.DataType.FLOAT32:
         return np.float32
-    elif pd_dtype == lazy_paddle.inference.DataType.INT64:
+    elif pd_dtype == paddle.inference.DataType.INT64:
         return np.int64
-    elif pd_dtype == lazy_paddle.inference.DataType.INT32:
+    elif pd_dtype == paddle.inference.DataType.INT32:
         return np.int32
-    elif pd_dtype == lazy_paddle.inference.DataType.UINT8:
+    elif pd_dtype == paddle.inference.DataType.UINT8:
         return np.uint8
-    elif pd_dtype == lazy_paddle.inference.DataType.INT8:
+    elif pd_dtype == paddle.inference.DataType.INT8:
         return np.int8
     else:
         raise TypeError(f"Unsupported data type: {pd_dtype}")
@@ -74,12 +84,12 @@ def _collect_trt_shape_range_info(
 
     dynamic_shape_input_data = dynamic_shape_input_data or {}
 
-    config = lazy_paddle.inference.Config(model_file, model_params)
+    config = paddle.inference.Config(model_file, model_params)
     config.enable_use_gpu(100, gpu_id)
     config.collect_shape_range_info(shape_range_info_path)
     # TODO: Add other needed options
     config.disable_glog_info()
-    predictor = lazy_paddle.inference.create_predictor(config)
+    predictor = paddle.inference.create_predictor(config)
 
     input_names = predictor.get_input_names()
     for name in dynamic_shapes:
@@ -147,7 +157,7 @@ def _convert_trt(
     dynamic_shapes,
     dynamic_shape_input_data,
 ):
-    from lazy_paddle.tensorrt.export import (
+    from paddle.tensorrt.export import (
         Input,
         TensorRTConfig,
         convert,
@@ -162,12 +172,12 @@ def _convert_trt(
 
     def _get_predictor(model_file, params_file):
         # HACK
-        config = lazy_paddle.inference.Config(str(model_file), str(params_file))
+        config = paddle.inference.Config(str(model_file), str(params_file))
         config.enable_use_gpu(100, device_id)
         # NOTE: Disable oneDNN to circumvent a bug in Paddle Inference
         config.disable_mkldnn()
         config.disable_glog_info()
-        return lazy_paddle.inference.create_predictor(config)
+        return paddle.inference.create_predictor(config)
 
     dynamic_shape_input_data = dynamic_shape_input_data or {}
 
@@ -246,7 +256,7 @@ class PaddleCopyToDevice:
     def __call__(self, arrs):
         device_id = [self.device_id] if self.device_id is not None else self.device_id
         device = constr_device(self.device_type, device_id)
-        paddle_tensors = [lazy_paddle.to_tensor(i, place=device) for i in arrs]
+        paddle_tensors = [paddle.to_tensor(i, place=device) for i in arrs]
         return paddle_tensors
 
 
@@ -292,19 +302,25 @@ class PaddleInferChainLegacy:
         return outputs
 
 
-class StaticInfer(object):
+class StaticInfer(metaclass=abc.ABCMeta):
+    @abc.abstractmethod
+    def __call__(self, x: Sequence[np.ndarray]) -> List[np.ndarray]:
+        raise NotImplementedError
+
+
+class PaddleInfer(StaticInfer):
     def __init__(
         self,
         model_dir: str,
-        model_prefix: str,
+        model_file_prefix: str,
         option: PaddlePredictorOption,
     ) -> None:
         super().__init__()
         self.model_dir = model_dir
-        self.model_file_prefix = model_prefix
+        self.model_file_prefix = model_file_prefix
         self._option = option
         self.predictor = self._create()
-        if self._use_new_inference_api:
+        if INFER_BENCHMARK_USE_NEW_INFER_API:
             device_type = self._option.device_type
             device_type = "gpu" if device_type == "dcu" else device_type
             copy_to_device = PaddleCopyToDevice(device_type, self._option.device_id)
@@ -314,13 +330,6 @@ class StaticInfer(object):
         else:
             self.infer = PaddleInferChainLegacy(self.predictor)
 
-    @property
-    def _use_new_inference_api(self):
-        # HACK: Temp fallback to legacy API via env var
-        return INFER_BENCHMARK_USE_NEW_INFER_API
-
-        # return self._option.device_type in ("cpu", "gpu", "dcu")
-
     def __call__(self, x: Sequence[np.ndarray]) -> List[np.ndarray]:
         names = self.predictor.get_input_names()
         if len(names) != len(x):
@@ -340,7 +349,7 @@ class StaticInfer(object):
         """_create"""
         model_paths = get_model_paths(self.model_dir, self.model_file_prefix)
         if "paddle" not in model_paths:
-            raise RuntimeError("No valid Paddle model found")
+            raise RuntimeError("No valid PaddlePaddle model found")
         model_file, params_file = model_paths["paddle"]
 
         if (
@@ -383,10 +392,10 @@ class StaticInfer(object):
             config.enable_use_gpu(100, self._option.device_id)
         # for Native Paddle and MKLDNN
         else:
-            config = lazy_paddle.inference.Config(str(model_file), str(params_file))
+            config = paddle.inference.Config(str(model_file), str(params_file))
             if self._option.device_type == "gpu":
                 config.exp_disable_mixed_precision_ops({"feed", "fetch"})
-                from lazy_paddle.inference import PrecisionType
+                from paddle.inference import PrecisionType
 
                 precision = (
                     PrecisionType.Half
@@ -427,7 +436,7 @@ class StaticInfer(object):
                 if hasattr(config, "enable_new_executor"):
                     config.enable_new_executor()
                 # XXX: is_compiled_with_rocm() must be True on dcu platform ?
-                if lazy_paddle.is_compiled_with_rocm():
+                if paddle.is_compiled_with_rocm():
                     # Delete unsupported passes in dcu
                     config.delete_pass("conv2d_add_act_fuse_pass")
                     config.delete_pass("conv2d_add_fuse_pass")
@@ -463,7 +472,7 @@ class StaticInfer(object):
         if not DEBUG:
             config.disable_glog_info()
 
-        predictor = lazy_paddle.inference.create_predictor(config)
+        predictor = paddle.inference.create_predictor(config)
 
         return predictor
 
@@ -482,9 +491,9 @@ class StaticInfer(object):
             )
             model_file = trt_save_path.with_suffix(".json")
             params_file = trt_save_path.with_suffix(".pdiparams")
-            config = lazy_paddle.inference.Config(str(model_file), str(params_file))
+            config = paddle.inference.Config(str(model_file), str(params_file))
         else:
-            config = lazy_paddle.inference.Config(str(model_file), str(params_file))
+            config = paddle.inference.Config(str(model_file), str(params_file))
             config.set_optim_cache_dir(str(cache_dir / "optim_cache"))
             # call enable_use_gpu() first to use TensorRT engine
             config.enable_use_gpu(100, self._option.device_id)
@@ -534,8 +543,11 @@ class StaticInfer(object):
                             self._option.trt_dynamic_shapes,
                             self._option.trt_dynamic_shape_input_data,
                         )
-                    if self._option.model_name in DISABLE_TRT_HALF_OPS_CONFIG and self._option.run_mode == "trt_fp16":
-                        lazy_paddle.inference.InternalUtils.disable_tensorrt_half_ops(
+                    if (
+                        self._option.model_name in DISABLE_TRT_HALF_OPS_CONFIG
+                        and self._option.run_mode == "trt_fp16"
+                    ):
+                        paddle.inference.InternalUtils.disable_tensorrt_half_ops(
                             config, DISABLE_TRT_HALF_OPS_CONFIG[self._option.model_name]
                         )
                     config.enable_tuned_tensorrt_dynamic_shape(
@@ -559,3 +571,288 @@ class StaticInfer(object):
                         raise RuntimeError("No dynamic shape information provided")
 
         return config
+
+
+# FIXME: Name might be misleading
+@benchmark.timeit
+class MultiBackendInfer(object):
+    def __init__(self, ui_runtime):
+        super().__init__()
+        self.ui_runtime = ui_runtime
+
+    # The time consumed by the wrapper code will also be taken into account.
+    def __call__(self, x):
+        outputs = self.ui_runtime.infer(x)
+        return outputs
+
+
+# TODO: It would be better to refactor the code to make `HPInfer` a higher-level
+# class that uses `PaddleInfer`.
+class HPInfer(StaticInfer):
+    def __init__(
+        self,
+        model_dir: str,
+        model_file_prefix: str,
+        config: HPIConfig,
+    ) -> None:
+        super().__init__()
+        self._model_dir = model_dir
+        self._model_file_prefix = model_file_prefix
+        self._config = config
+        backend, backend_config = self._determine_backend_and_config()
+        if backend == "paddle":
+            self._use_paddle = True
+            self._paddle_infer = self._build_paddle_infer(backend_config)
+        else:
+            self._use_paddle = False
+            ui_runtime = self._build_ui_runtime(backend, backend_config)
+            self._multi_backend_infer = MultiBackendInfer(ui_runtime)
+            num_inputs = ui_runtime.num_inputs()
+            self._input_names = [
+                ui_runtime.get_input_info(i).name for i in range(num_inputs)
+            ]
+
+    @property
+    def model_dir(self) -> str:
+        return self._model_dir
+
+    @property
+    def model_file_prefix(self) -> str:
+        return self._model_file_prefix
+
+    @property
+    def config(self) -> HPIConfig:
+        return self._config
+
+    def __call__(self, x: Sequence[np.ndarray]) -> List[np.ndarray]:
+        if self._use_paddle:
+            return self._call_paddle_infer(x)
+        else:
+            return self._call_multi_backend_infer(x)
+
+    def _call_paddle_infer(self, x):
+        return self._paddle_infer(x)
+
+    def _call_multi_backend_infer(self, x):
+        num_inputs = len(self._input_names)
+        if len(x) != num_inputs:
+            raise ValueError(f"Expected {num_inputs} inputs but got {len(x)} instead")
+        x = _sort_inputs(x, self._input_names)
+        inputs = {}
+        for name, input_ in zip(self._input_names, x):
+            inputs[name] = np.ascontiguousarray(input_)
+        return self._multi_backend_infer(inputs)
+
+    def _determine_backend_and_config(self):
+        from ultra_infer import (
+            is_built_with_om,
+            is_built_with_openvino,
+            is_built_with_ort,
+            is_built_with_trt,
+        )
+
+        model_paths = get_model_paths(self._model_dir, self._model_file_prefix)
+        is_onnx_model_available = "onnx" in model_paths
+        # TODO: Give a warning if Paddle2ONNX is not available but can be used
+        # to select a better backend.
+        if self._config.auto_paddle2onnx:
+            if self._check_paddle2onnx():
+                is_onnx_model_available = (
+                    is_onnx_model_available or "paddle" in model_paths
+                )
+            else:
+                logging.debug(
+                    "Paddle2ONNX is not available. Automatic model conversion will not be performed."
+                )
+        available_backends = []
+        if "paddle" in model_paths:
+            available_backends.append("paddle")
+        if is_built_with_openvino() and is_onnx_model_available:
+            available_backends.append("openvino")
+        if is_built_with_ort() and is_onnx_model_available:
+            available_backends.append("onnxruntime")
+        if is_built_with_trt() and is_onnx_model_available:
+            available_backends.append("tensorrt")
+        if is_built_with_om() and "om" in model_paths:
+            available_backends.append("om")
+
+        if not available_backends:
+            raise RuntimeError("No inference backend is available")
+
+        if (
+            self._config.backend is not None
+            and self._config.backend not in available_backends
+        ):
+            raise RuntimeError(
+                f"Inference backend {repr(self._config.backend)} is unavailable"
+            )
+
+        if self._config.auto_config:
+            # Should we use the strategy pattern here to allow extensible
+            # strategies?
+            ret = suggest_inference_backend_and_config(
+                self._config, available_backends=available_backends
+            )
+            if ret[0] is None:
+                # Should I use a custom exception?
+                raise RuntimeError(
+                    f"No inference backend and configuration could be suggested. Reason: {ret[1]}"
+                )
+            backend, backend_config = ret
+        else:
+            backend = self._config.backend
+            if backend is None:
+                raise RuntimeError(
+                    "When automatic configuration is not used, the inference backend must be specified manually."
+                )
+            backend_config = self._config.backend_config or {}
+
+        if backend == "paddle" and not backend_config:
+            logging.warning(
+                "The Paddle Inference backend is selected with the default configuration. This may not provide optimal performance."
+            )
+
+        return backend, backend_config
+
+    def _build_paddle_infer(self, backend_config):
+        kwargs = {
+            "device_type": self._config.device_type,
+            "device_id": self._config.device_id,
+            **backend_config,
+        }
+        # TODO: This is probably redundant. Can we reuse the code in the
+        # predictor class?
+        paddle_info = self._config.hpi_info.backend_configs.paddle_infer
+        if paddle_info is not None:
+            if (
+                kwargs.get("trt_dynamic_shapes") is None
+                and paddle_info.trt_dynamic_shapes is not None
+            ):
+                trt_dynamic_shapes = paddle_info.trt_dynamic_shapes
+                logging.debug("TensorRT dynamic shapes set to %s", trt_dynamic_shapes)
+                kwargs["trt_dynamic_shapes"] = trt_dynamic_shapes
+            if (
+                kwargs.get("trt_dynamic_shape_input_data") is None
+                and paddle_info.trt_dynamic_shape_input_data is not None
+            ):
+                trt_dynamic_shape_input_data = paddle_info.trt_dynamic_shape_input_data
+                logging.debug(
+                    "TensorRT dynamic shape input data set to %s",
+                    trt_dynamic_shape_input_data,
+                )
+                kwargs["trt_dynamic_shape_input_data"] = trt_dynamic_shape_input_data
+        pp_option = PaddlePredictorOption(self._config.pdx_model_name, **kwargs)
+        logging.info("Using Paddle Inference backend")
+        logging.info("Paddle predictor option: %s", pp_option)
+        return PaddleInfer(self._model_dir, self._model_file_prefix, option=pp_option)
+
+    def _build_ui_runtime(self, backend, backend_config, ui_option=None):
+        from ultra_infer import ModelFormat, Runtime, RuntimeOption
+
+        if ui_option is None:
+            ui_option = RuntimeOption()
+
+        if self._config.device_type == "cpu":
+            pass
+        elif self._config.device_type == "gpu":
+            ui_option.use_gpu(self._config.device_id or 0)
+        elif self._config.device_type == "npu":
+            ui_option.use_ascend()
+        else:
+            raise RuntimeError(
+                f"Unsupported device type {repr(self._config.device_type)}"
+            )
+
+        model_paths = get_model_paths(self.model_dir, self.model_file_prefix)
+        if backend in ("openvino", "onnxruntime", "tensorrt"):
+            # XXX: This introduces side effects.
+            if "onnx" not in model_paths:
+                if self._config.auto_paddle2onnx:
+                    if "paddle" not in model_paths:
+                        raise RuntimeError("PaddlePaddle model required")
+                    # The CLI is used here since there is currently no API.
+                    logging.info(
+                        "Automatically converting PaddlePaddle model to ONNX format"
+                    )
+                    subprocess.check_call(
+                        [
+                            "paddlex",
+                            "--paddle2onnx",
+                            "--paddle_model_dir",
+                            self._model_dir,
+                            "--onnx_model_dir",
+                            self._model_dir,
+                        ]
+                    )
+                    model_paths = get_model_paths(
+                        self.model_dir, self.model_file_prefix
+                    )
+                    assert "onnx" in model_paths
+                else:
+                    raise RuntimeError("ONNX model required")
+            ui_option.set_model_path(str(model_paths["onnx"]), "", ModelFormat.ONNX)
+        elif backend == "om":
+            if "om" not in model_paths:
+                raise RuntimeError("OM model required")
+            ui_option.set_model_path(str(model_paths["om"]), "", ModelFormat.OM)
+        else:
+            raise ValueError(f"Unsupported inference backend {repr(backend)}")
+
+        if backend == "openvino":
+            backend_config = OpenVINOConfig.model_validate(backend_config)
+            ui_option.use_openvino_backend()
+            ui_option.set_cpu_thread_num(backend_config.cpu_num_threads)
+        elif backend == "onnxruntime":
+            backend_config = ONNXRuntimeConfig.model_validate(backend_config)
+            ui_option.use_ort_backend()
+            ui_option.set_cpu_thread_num(backend_config.cpu_num_threads)
+        elif backend == "tensorrt":
+            if (
+                backend_config.get("use_dynamic_shapes", True)
+                and backend_config.get("dynamic_shapes") is None
+            ):
+                trt_info = self._config.hpi_info.backend_configs.tensorrt
+                if trt_info is not None and trt_info.dynamic_shapes is not None:
+                    trt_dynamic_shapes = trt_info.dynamic_shapes
+                    logging.debug(
+                        "TensorRT dynamic shapes set to %s", trt_dynamic_shapes
+                    )
+                    backend_config = {
+                        **backend_config,
+                        "dynamic_shapes": trt_dynamic_shapes,
+                    }
+            backend_config = TensorRTConfig.model_validate(backend_config)
+            ui_option.use_trt_backend()
+            cache_dir = self.model_dir / CACHE_DIR / "tensorrt"
+            cache_dir.mkdir(parents=True, exist_ok=True)
+            ui_option.trt_option.serialize_file = str(cache_dir / "trt_serialized.trt")
+            if backend_config.precision == "FP16":
+                ui_option.trt_option.enable_fp16 = True
+            if not backend_config.use_dynamic_shapes:
+                raise RuntimeError(
+                    "TensorRT static shape inference is currently not supported"
+                )
+            if backend_config.dynamic_shapes is not None:
+                if not Path(ui_option.trt_option.serialize_file).exists():
+                    for name, shapes in backend_config.dynamic_shapes.items():
+                        ui_option.trt_option.set_shape(name, *shapes)
+                else:
+                    logging.warning(
+                        "TensorRT dynamic shapes will be loaded from the file."
+                    )
+        elif backend == "om":
+            backend_config = OMConfig.model_validate(backend_config)
+            ui_option.use_om_backend()
+        else:
+            raise ValueError(f"Unsupported inference backend {repr(backend)}")
+
+        logging.info("Inference backend: %s", backend)
+        logging.info("Inference backend config: %s", backend_config)
+
+        ui_runtime = Runtime(ui_option)
+
+        return ui_runtime
+
+    def _check_paddle2onnx(self):
+        # HACK
+        return importlib.util.find_spec("paddle2onnx") is not None

+ 4 - 11
paddlex/inference/models/formula_recognition/predictor.py

@@ -18,10 +18,7 @@ from ....utils.func_register import FuncRegister
 from ....modules.formula_recognition.model_list import MODELS
 from ...common.batch_sampler import ImageBatchSampler
 from ...common.reader import ReadImage
-from ..common import (
-    StaticInfer,
-)
-from ..base import BasicPredictor
+from ..base import BasePredictor
 from .processors import (
     MinMaxResize,
     LatexTestTransform,
@@ -38,8 +35,8 @@ from .processors import (
 from .result import FormulaRecResult
 
 
-class FormulaRecPredictor(BasicPredictor):
-    """FormulaRecPredictor that inherits from BasicPredictor."""
+class FormulaRecPredictor(BasePredictor):
+    """FormulaRecPredictor that inherits from BasePredictor."""
 
     entities = MODELS
 
@@ -84,11 +81,7 @@ class FormulaRecPredictor(BasicPredictor):
                 pre_tfs[name] = op
         pre_tfs["ToBatch"] = ToBatch()
 
-        infer = StaticInfer(
-            model_dir=self.model_dir,
-            model_prefix=self.MODEL_FILE_PREFIX,
-            option=self.pp_option,
-        )
+        infer = self.create_static_infer()
 
         post_op = self.build_postprocess(**self.config["PostProcess"])
         return pre_tfs, infer, post_op

+ 4 - 9
paddlex/inference/models/image_classification/predictor.py

@@ -25,15 +25,14 @@ from ..common import (
     Normalize,
     ToCHWImage,
     ToBatch,
-    StaticInfer,
 )
-from ..base import BasicPredictor
+from ..base import BasePredictor
 from .processors import Crop, Topk
 from .result import TopkResult
 
 
-class ClasPredictor(BasicPredictor):
-    """ClasPredictor that inherits from BasicPredictor."""
+class ClasPredictor(BasePredictor):
+    """ClasPredictor that inherits from BasePredictor."""
 
     entities = MODELS
 
@@ -85,11 +84,7 @@ class ClasPredictor(BasicPredictor):
             preprocessors[name] = op
         preprocessors["ToBatch"] = ToBatch()
 
-        infer = StaticInfer(
-            model_dir=self.model_dir,
-            model_prefix=self.MODEL_FILE_PREFIX,
-            option=self.pp_option,
-        )
+        infer = self.create_static_infer()
 
         postprocessors = {}
         for key in self.config["PostProcess"]:

+ 4 - 9
paddlex/inference/models/image_feature/predictor.py

@@ -25,15 +25,14 @@ from ..common import (
     Normalize,
     ToCHWImage,
     ToBatch,
-    StaticInfer,
 )
-from ..base import BasicPredictor
+from ..base import BasePredictor
 from .processors import NormalizeFeatures
 from .result import IdentityResult
 
 
-class ImageFeaturePredictor(BasicPredictor):
-    """ImageFeaturePredictor that inherits from BasicPredictor."""
+class ImageFeaturePredictor(BasePredictor):
+    """ImageFeaturePredictor that inherits from BasePredictor."""
 
     entities = MODELS
 
@@ -83,11 +82,7 @@ class ImageFeaturePredictor(BasicPredictor):
             preprocessors[name] = op
         preprocessors["ToBatch"] = ToBatch()
 
-        infer = StaticInfer(
-            model_dir=self.model_dir,
-            model_prefix=self.MODEL_FILE_PREFIX,
-            option=self.pp_option,
-        )
+        infer = self.create_static_infer()
 
         postprocessors = {}
         for key in self.config["PostProcess"]:

+ 1 - 1
paddlex/inference/models/image_multilabel_classification/predictor.py

@@ -22,7 +22,7 @@ from ....modules.multilabel_classification.model_list import MODELS
 
 
 class MLClasPredictor(ClasPredictor):
-    """MLClasPredictor that inherits from BasicPredictor."""
+    """MLClasPredictor that inherits from BasePredictor."""
 
     entities = MODELS
 

+ 4 - 9
paddlex/inference/models/image_unwarping/predictor.py

@@ -22,15 +22,14 @@ from ..common import (
     Normalize,
     ToCHWImage,
     ToBatch,
-    StaticInfer,
 )
-from ..base import BasicPredictor
+from ..base import BasePredictor
 from .processors import DocTrPostProcess
 from .result import DocTrResult
 
 
-class WarpPredictor(BasicPredictor):
-    """WarpPredictor that inherits from BasicPredictor."""
+class WarpPredictor(BasePredictor):
+    """WarpPredictor that inherits from BasePredictor."""
 
     entities = MODELS
 
@@ -71,11 +70,7 @@ class WarpPredictor(BasicPredictor):
         preprocessors["ToCHW"] = ToCHWImage()
         preprocessors["ToBatch"] = ToBatch()
 
-        infer = StaticInfer(
-            model_dir=self.model_dir,
-            model_prefix=self.MODEL_FILE_PREFIX,
-            option=self.pp_option,
-        )
+        infer = self.create_static_infer()
 
         postprocessors = {"DocTrPostProcess": DocTrPostProcess()}
         return preprocessors, infer, postprocessors

+ 1 - 6
paddlex/inference/models/instance_segmentation/predictor.py

@@ -17,7 +17,6 @@ import numpy as np
 
 from ....modules.instance_segmentation.model_list import MODELS
 from ...common.batch_sampler import ImageBatchSampler
-from ..common import StaticInfer
 from ..object_detection.processors import (
     ReadImage,
     ToBatch,
@@ -85,11 +84,7 @@ class InstanceSegPredictor(DetPredictor):
         pre_ops.append(self.build_to_batch())
 
         # build infer
-        infer = StaticInfer(
-            model_dir=self.model_dir,
-            model_prefix=self.MODEL_FILE_PREFIX,
-            option=self.pp_option,
-        )
+        infer = self.create_static_infer()
 
         # build postprocess op
         post_op = self.build_postprocess()

+ 2 - 2
paddlex/inference/models/multilingual_speech_recognition/predictor.py

@@ -18,14 +18,14 @@ import numpy as np
 from ....utils.func_register import FuncRegister
 from ...common.batch_sampler import AudioBatchSampler
 
-from ..base import BasicPredictor
+from ..base import BasePredictor
 from .result import WhisperResult
 from ...utils.io import AudioReader
 from ....modules.multilingual_speech_recognition.model_list import MODELS
 from ....utils.download import download_and_extract
 
 
-class WhisperPredictor(BasicPredictor):
+class WhisperPredictor(BasePredictor):
 
     entities = MODELS
 

+ 3 - 8
paddlex/inference/models/object_detection/predictor.py

@@ -20,8 +20,7 @@ from ....utils.func_register import FuncRegister
 from ....modules.object_detection.model_list import MODELS
 from ...common.batch_sampler import ImageBatchSampler
 
-from ..common import StaticInfer
-from ..base import BasicPredictor
+from ..base import BasePredictor
 from .processors import (
     DetPad,
     DetPostProcess,
@@ -37,7 +36,7 @@ from .result import DetResult
 from .utils import STATIC_SHAPE_MODEL_LIST
 
 
-class DetPredictor(BasicPredictor):
+class DetPredictor(BasePredictor):
 
     entities = MODELS
 
@@ -142,11 +141,7 @@ class DetPredictor(BasicPredictor):
             pre_ops.insert(1, self.build_resize(self.img_size, False, 2))
 
         # build infer
-        infer = StaticInfer(
-            model_dir=self.model_dir,
-            model_prefix=self.MODEL_FILE_PREFIX,
-            option=self.pp_option,
-        )
+        infer = self.create_static_infer()
 
         # build postprocess op
         post_op = self.build_postprocess()

+ 3 - 7
paddlex/inference/models/open_vocabulary_detection/predictor.py

@@ -27,11 +27,11 @@ from .processors import (
     YOLOWorldPostProcessor,
 )
 from ..common import StaticInfer
-from ..base import BasicPredictor
+from ..base import BasePredictor
 from ..object_detection.result import DetResult
 
 
-class OVDetPredictor(BasicPredictor):
+class OVDetPredictor(BasePredictor):
 
     entities = MODELS
 
@@ -73,11 +73,7 @@ class OVDetPredictor(BasicPredictor):
                 pre_ops.append(op)
 
         # build infer
-        infer = StaticInfer(
-            model_dir=self.model_dir,
-            model_prefix=self.MODEL_FILE_PREFIX,
-            option=self.pp_option,
-        )
+        infer = self.create_static_infer()
 
         # build postprocess op
         post_op = self.build_postprocess(pre_ops=pre_ops)

+ 1 - 1
paddlex/inference/models/open_vocabulary_detection/processors/groundingdino_processors.py

@@ -19,8 +19,8 @@ import numpy as np
 import PIL
 
 from ...common.tokenizer.bert_tokenizer import BertTokenizer
-from .....utils.lazy_loader import LazyLoader
 from ....utils.benchmark import benchmark
+from .....utils.lazy_loader import LazyLoader
 
 # NOTE: LazyLoader is used to avoid conflicts between ultra-infer and Paddle
 paddle = LazyLoader("lazy_paddle", globals(), "paddle")

+ 3 - 7
paddlex/inference/models/open_vocabulary_segmentation/predictor.py

@@ -23,11 +23,11 @@ from ...common.batch_sampler import ImageBatchSampler
 from ...common.reader import ReadImage
 from .processors import SAMProcessor
 from ..common import StaticInfer
-from ..base import BasicPredictor
+from ..base import BasePredictor
 from .results import SAMSegResult
 
 
-class OVSegPredictor(BasicPredictor):
+class OVSegPredictor(BasePredictor):
 
     entities = MODELS
 
@@ -62,11 +62,7 @@ class OVSegPredictor(BasicPredictor):
                 pre_ops.append(op)
 
         # build infer
-        infer = StaticInfer(
-            model_dir=self.model_dir,
-            model_prefix=self.MODEL_FILE_PREFIX,
-            option=self.pp_option,
-        )
+        infer = self.create_static_infer()
 
         # build model specific processor, it's required for a OV model.
         processor_cfg = self.config["Processor"]

+ 2 - 1
paddlex/inference/models/open_vocabulary_segmentation/processors/sam_processer.py

@@ -20,13 +20,14 @@ import PIL
 from copy import deepcopy
 
 from .....utils.lazy_loader import LazyLoader
-from ....utils.benchmark import benchmark
 
 # NOTE: LazyLoader is used to avoid conflicts between ultra-infer and Paddle
 paddle = LazyLoader("lazy_paddle", globals(), "paddle")
 T = LazyLoader("T", globals(), "paddle.vision.transforms")
 F = LazyLoader("F", globals(), "paddle.nn.functional")
 
+from ....utils.benchmark import benchmark
+
 
 def _get_preprocess_shape(
     oldh: int, oldw: int, long_side_length: int

+ 4 - 9
paddlex/inference/models/semantic_segmentation/predictor.py

@@ -24,15 +24,14 @@ from ..common import (
     Normalize,
     ToCHWImage,
     ToBatch,
-    StaticInfer,
 )
 from .processors import Resize, SegPostProcess
-from ..base import BasicPredictor
+from ..base import BasePredictor
 from .result import SegResult
 
 
-class SegPredictor(BasicPredictor):
-    """SegPredictor that inherits from BasicPredictor."""
+class SegPredictor(BasePredictor):
+    """SegPredictor that inherits from BasePredictor."""
 
     entities = MODELS
 
@@ -95,11 +94,7 @@ class SegPredictor(BasicPredictor):
             _, op = self._FUNC_MAP["Resize"](self, target_size=self.target_size)
             preprocessors["Resize"] = op
 
-        infer = StaticInfer(
-            model_dir=self.model_dir,
-            model_prefix=self.MODEL_FILE_PREFIX,
-            option=self.pp_option,
-        )
+        infer = self.create_static_infer()
 
         postprocessers = SegPostProcess()
 

+ 3 - 8
paddlex/inference/models/table_structure_recognition/predictor.py

@@ -25,14 +25,13 @@ from ..common import (
     Normalize,
     ToCHWImage,
     ToBatch,
-    StaticInfer,
 )
-from ..base import BasicPredictor
+from ..base import BasePredictor
 from .processors import Pad, TableLabelDecode
 from .result import TableRecResult
 
 
-class TablePredictor(BasicPredictor):
+class TablePredictor(BasePredictor):
     entities = MODELS
 
     _FUNC_MAP = {}
@@ -59,11 +58,7 @@ class TablePredictor(BasicPredictor):
                 preprocessors.append(op)
         preprocessors.append(ToBatch())
 
-        infer = StaticInfer(
-            model_dir=self.model_dir,
-            model_prefix=self.MODEL_FILE_PREFIX,
-            option=self.pp_option,
-        )
+        infer = self.create_static_infer()
 
         postprocessors = TableLabelDecode(
             model_name=self.config["Global"]["model_name"],

+ 9 - 9
paddlex/inference/models/text_detection/predictor.py

@@ -25,14 +25,13 @@ from ..common import (
     Normalize,
     ToCHWImage,
     ToBatch,
-    StaticInfer,
 )
-from ..base import BasicPredictor
+from ..base import BasePredictor
 from .processors import DetResizeForTest, NormalizeImage, DBPostProcess
 from .result import TextDetResult
 
 
-class TextDetPredictor(BasicPredictor):
+class TextDetPredictor(BasePredictor):
 
     entities = MODELS
 
@@ -46,6 +45,7 @@ class TextDetPredictor(BasicPredictor):
         thresh: Union[float, None] = None,
         box_thresh: Union[float, None] = None,
         unclip_ratio: Union[float, None] = None,
+        input_shape=None,
         *args,
         **kwargs
     ):
@@ -56,6 +56,7 @@ class TextDetPredictor(BasicPredictor):
         self.thresh = thresh
         self.box_thresh = box_thresh
         self.unclip_ratio = unclip_ratio
+        self.input_shape = input_shape
         self.pre_tfs, self.infer, self.post_op = self._build()
 
     def _build_batch_sampler(self):
@@ -76,11 +77,7 @@ class TextDetPredictor(BasicPredictor):
                 pre_tfs[name] = op
         pre_tfs["ToBatch"] = ToBatch()
 
-        infer = StaticInfer(
-            model_dir=self.model_dir,
-            model_prefix=self.MODEL_FILE_PREFIX,
-            option=self.pp_option,
-        )
+        infer = self.create_static_infer()
 
         post_op = self.build_postprocess(**self.config["PostProcess"])
         return pre_tfs, infer, post_op
@@ -147,7 +144,10 @@ class TextDetPredictor(BasicPredictor):
             limit_type = self.limit_type or kwargs.get("limit_type", "min")
 
         return "Resize", DetResizeForTest(
-            limit_side_len=limit_side_len, limit_type=limit_type, **kwargs
+            limit_side_len=limit_side_len,
+            limit_type=limit_type,
+            input_shape=self.input_shape,
+            **kwargs
         )
 
     @register("NormalizeImage")

+ 16 - 2
paddlex/inference/models/text_detection/processors.py

@@ -33,11 +33,14 @@ from ...utils.benchmark import benchmark
 class DetResizeForTest:
     """DetResizeForTest"""
 
-    def __init__(self, **kwargs):
+    def __init__(self, input_shape=None, **kwargs):
         super().__init__()
         self.resize_type = 0
         self.keep_ratio = False
-        if "image_shape" in kwargs:
+        if input_shape is not None:
+            self.input_shape = input_shape
+            self.resize_type = 3
+        elif "image_shape" in kwargs:
             self.image_shape = kwargs["image_shape"]
             self.resize_type = 1
             if "keep_ratio" in kwargs:
@@ -80,6 +83,8 @@ class DetResizeForTest:
             )
         elif self.resize_type == 2:
             img, [ratio_h, ratio_w] = self.resize_image_type2(img)
+        elif self.resize_type == 3:
+            img, [ratio_h, ratio_w] = self.resize_image_type3(img)
         else:
             # img, shape = self.resize_image_type1(img)
             img, [ratio_h, ratio_w] = self.resize_image_type1(img)
@@ -182,6 +187,15 @@ class DetResizeForTest:
 
         return img, [ratio_h, ratio_w]
 
+    def resize_image_type3(self, img):
+        """resize the image"""
+        resize_c, resize_h, resize_w = self.input_shape  # (c, h, w)
+        ori_h, ori_w = img.shape[:2]  # (h, w, c)
+        ratio_h = float(resize_h) / ori_h
+        ratio_w = float(resize_w) / ori_w
+        img = cv2.resize(img, (int(resize_w), int(resize_h)))
+        return img, [ratio_h, ratio_w]
+
 
 @benchmark.timeit
 class NormalizeImage:

+ 8 - 10
paddlex/inference/models/text_recognition/predictor.py

@@ -21,22 +21,22 @@ from ..common import (
     ResizeByShort,
     Normalize,
     ToCHWImage,
-    StaticInfer,
 )
-from ..base import BasicPredictor
+from ..base import BasePredictor
 from .processors import OCRReisizeNormImg, CTCLabelDecode, ToBatch
 from .result import TextRecResult
 
 
-class TextRecPredictor(BasicPredictor):
+class TextRecPredictor(BasePredictor):
 
     entities = MODELS
 
     _FUNC_MAP = {}
     register = FuncRegister(_FUNC_MAP)
 
-    def __init__(self, *args, **kwargs):
+    def __init__(self, *args, input_shape=None, **kwargs):
         super().__init__(*args, **kwargs)
+        self.input_shape = input_shape
         self.pre_tfs, self.infer, self.post_op = self._build()
 
     def _build_batch_sampler(self):
@@ -57,11 +57,7 @@ class TextRecPredictor(BasicPredictor):
                 pre_tfs[name] = op
         pre_tfs["ToBatch"] = ToBatch()
 
-        infer = StaticInfer(
-            model_dir=self.model_dir,
-            model_prefix=self.MODEL_FILE_PREFIX,
-            option=self.pp_option,
-        )
+        infer = self.create_static_infer()
 
         post_op = self.build_postprocess(**self.config["PostProcess"])
         return pre_tfs, infer, post_op
@@ -87,7 +83,9 @@ class TextRecPredictor(BasicPredictor):
 
     @register("RecResizeImg")
     def build_resize(self, image_shape):
-        return "ReisizeNorm", OCRReisizeNormImg(rec_image_shape=image_shape)
+        return "ReisizeNorm", OCRReisizeNormImg(
+            rec_image_shape=image_shape, input_shape=self.input_shape
+        )
 
     def build_postprocess(self, **kwargs):
         if kwargs.get("name") == "CTCLabelDecode":

+ 14 - 2
paddlex/inference/models/text_recognition/processors.py

@@ -34,9 +34,10 @@ from ...utils.benchmark import benchmark
 class OCRReisizeNormImg:
     """for ocr image resize and normalization"""
 
-    def __init__(self, rec_image_shape=[3, 48, 320]):
+    def __init__(self, rec_image_shape=[3, 48, 320], input_shape=None):
         super().__init__()
         self.rec_image_shape = rec_image_shape
+        self.input_shape = input_shape
         self.max_imgW = 3200
 
     def resize_norm_img(self, img, max_wh_ratio):
@@ -66,7 +67,10 @@ class OCRReisizeNormImg:
 
     def __call__(self, imgs):
         """apply"""
-        return [self.resize(img) for img in imgs]
+        if self.input_shape is None:
+            return [self.resize(img) for img in imgs]
+        else:
+            return [self.staticResize(img) for img in imgs]
 
     def resize(self, img):
         imgC, imgH, imgW = self.rec_image_shape
@@ -77,6 +81,14 @@ class OCRReisizeNormImg:
         img = self.resize_norm_img(img, max_wh_ratio)
         return img
 
+    def staticResize(self, img):
+        imgC, imgH, imgW = self.input_shape
+        resized_image = cv2.resize(img, (int(imgW), int(imgH)))
+        resized_image = resized_image.transpose((2, 0, 1)) / 255
+        resized_image -= 0.5
+        resized_image /= 0.5
+        return resized_image
+
 
 @benchmark.timeit
 class BaseRecLabelDecode:

+ 4 - 9
paddlex/inference/models/ts_anomaly_detection/predictor.py

@@ -27,15 +27,14 @@ from ..common import (
     TimeFeature,
     TStoArray,
     TStoBatch,
-    StaticInfer,
 )
 from .processors import GetAnomaly
-from ..base import BasicPredictor
+from ..base import BasePredictor
 from .result import TSAdResult
 
 
-class TSAdPredictor(BasicPredictor):
-    """TSAdPredictor that inherits from BasicPredictor."""
+class TSAdPredictor(BasePredictor):
+    """TSAdPredictor that inherits from BasePredictor."""
 
     entities = MODELS
 
@@ -94,11 +93,7 @@ class TSAdPredictor(BasicPredictor):
             )
         preprocessors["TStoArray"] = TStoArray(self.config["input_data"])
         preprocessors["TStoBatch"] = TStoBatch()
-        infer = StaticInfer(
-            model_dir=self.model_dir,
-            model_prefix=self.MODEL_FILE_PREFIX,
-            option=self.pp_option,
-        )
+        infer = self.create_static_infer()
         postprocessors = {}
         postprocessors["GetAnomaly"] = GetAnomaly(
             self.config["model_threshold"], self.config["info_params"]

+ 4 - 9
paddlex/inference/models/ts_classification/predictor.py

@@ -28,16 +28,15 @@ from ..common import (
     TimeFeature,
     TStoArray,
     TStoBatch,
-    StaticInfer,
 )
 
 from .processors import GetCls, BuildPadMask
-from ..base import BasicPredictor
+from ..base import BasePredictor
 from .result import TSClsResult
 
 
-class TSClsPredictor(BasicPredictor):
-    """TSClsPredictor that inherits from BasicPredictor."""
+class TSClsPredictor(BasePredictor):
+    """TSClsPredictor that inherits from BasePredictor."""
 
     entities = MODELS
 
@@ -90,11 +89,7 @@ class TSClsPredictor(BasicPredictor):
         preprocessors["BuildPadMask"] = BuildPadMask(self.config["input_data"])
         preprocessors["TStoArray"] = TStoArray(self.config["input_data"])
         preprocessors["TStoBatch"] = TStoBatch()
-        infer = StaticInfer(
-            model_dir=self.model_dir,
-            model_prefix=self.MODEL_FILE_PREFIX,
-            option=self.pp_option,
-        )
+        infer = self.create_static_infer()
         postprocessors = {}
         postprocessors["GetCls"] = GetCls()
         return preprocessors, infer, postprocessors

+ 4 - 9
paddlex/inference/models/ts_forecasting/predictor.py

@@ -28,15 +28,14 @@ from ..common import (
     TimeFeature,
     TStoArray,
     TStoBatch,
-    StaticInfer,
 )
 from .processors import ArraytoTS, TSDeNormalize
-from ..base import BasicPredictor
+from ..base import BasePredictor
 from .result import TSFcResult
 
 
-class TSFcPredictor(BasicPredictor):
-    """TSFcPredictor that inherits from BasicPredictor."""
+class TSFcPredictor(BasePredictor):
+    """TSFcPredictor that inherits from BasePredictor."""
 
     entities = MODELS
 
@@ -95,11 +94,7 @@ class TSFcPredictor(BasicPredictor):
             )
         preprocessors["TStoArray"] = TStoArray(self.config["input_data"])
         preprocessors["TStoBatch"] = TStoBatch()
-        infer = StaticInfer(
-            model_dir=self.model_dir,
-            model_prefix=self.MODEL_FILE_PREFIX,
-            option=self.pp_option,
-        )
+        infer = self.create_static_infer()
         postprocessors = {}
         postprocessors["ArraytoTS"] = ArraytoTS(self.config["info_params"])
         if self.config.get("scale", None):

+ 3 - 10
paddlex/inference/models/video_classification/predictor.py

@@ -17,10 +17,7 @@ from ....utils.func_register import FuncRegister
 from ....modules.video_classification.model_list import MODELS
 from ...common.batch_sampler import VideoBatchSampler
 from ...common.reader import ReadVideo
-from ..common import (
-    StaticInfer,
-)
-from ..base import BasicPredictor
+from ..base import BasePredictor
 from .processors import (
     Scale,
     CenterCrop,
@@ -32,7 +29,7 @@ from .processors import (
 from .result import TopkVideoResult
 
 
-class VideoClasPredictor(BasicPredictor):
+class VideoClasPredictor(BasePredictor):
 
     entities = MODELS
 
@@ -62,11 +59,7 @@ class VideoClasPredictor(BasicPredictor):
                 pre_tfs[name] = op
         pre_tfs["ToBatch"] = ToBatch()
 
-        infer = StaticInfer(
-            model_dir=self.model_dir,
-            model_prefix=self.MODEL_FILE_PREFIX,
-            option=self.pp_option,
-        )
+        infer = self.create_static_infer()
 
         post_op = {}
         for key in self.config["PostProcess"]:

+ 3 - 7
paddlex/inference/models/video_detection/predictor.py

@@ -21,12 +21,12 @@ from ..common import (
     ToBatch,
     StaticInfer,
 )
-from ..base import BasicPredictor
+from ..base import BasePredictor
 from .processors import ResizeVideo, Image2Array, NormalizeVideo, DetVideoPostProcess
 from .result import DetVideoResult
 
 
-class VideoDetPredictor(BasicPredictor):
+class VideoDetPredictor(BasePredictor):
 
     entities = MODELS
 
@@ -62,11 +62,7 @@ class VideoDetPredictor(BasicPredictor):
             if op:
                 pre_tfs[name] = op
 
-        infer = StaticInfer(
-            model_dir=self.model_dir,
-            model_prefix=self.MODEL_FILE_PREFIX,
-            option=self.pp_option,
-        )
+        infer = self.create_static_infer()
         post_op = {}
         for cfg in self.config["PostProcess"]["transform_ops"]:
             tf_key = list(cfg.keys())[0]

+ 10 - 2
paddlex/inference/pipelines/3d_bev_detection/pipeline.py

@@ -16,6 +16,7 @@ from typing import Any, Dict, Optional, Union, List
 import numpy as np
 from importlib import import_module
 from ...utils.pp_option import PaddlePredictorOption
+from ...utils.hpi import HPIConfig
 from ..base import BasePipeline
 
 module_3d_bev_detection_result = import_module(
@@ -35,6 +36,7 @@ class BEVDet3DPipeline(BasePipeline):
         device: str = None,
         pp_option: PaddlePredictorOption = None,
         use_hpip: bool = False,
+        hpi_config: Optional[Union[Dict[str, Any], HPIConfig]] = None,
     ) -> None:
         """
         Initializes the class with given configurations and options.
@@ -43,9 +45,15 @@ class BEVDet3DPipeline(BasePipeline):
             config (Dict): Configuration dictionary containing model and other parameters.
             device (str): The device to run the prediction on. Default is None.
             pp_option (PaddlePredictorOption): Options for PaddlePaddle predictor. Default is None.
-            use_hpip (bool): Whether to use high-performance inference (hpip) for prediction. Defaults to False.
+            use_hpip (bool, optional): Whether to use the high-performance
+                inference plugin (HPIP) by default. Defaults to False.
+            hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
+                The default high-performance inference configuration dictionary.
+                Defaults to None.
         """
-        super().__init__(device=device, pp_option=pp_option, use_hpip=use_hpip)
+        super().__init__(
+            device=device, pp_option=pp_option, use_hpip=use_hpip, hpi_config=hpi_config
+        )
 
         bev_detection_3d_model_config = config["SubModules"]["3DBEVDetection"]
         self.bev_detection_3d_model = self.create_model(bev_detection_3d_model_config)

+ 17 - 4
paddlex/inference/pipelines/__init__.py

@@ -13,10 +13,11 @@
 # limitations under the License.
 
 from pathlib import Path
-from typing import Any, Dict, Optional
+from typing import Any, Dict, Optional, Union
 from importlib import import_module
 from .base import BasePipeline
 from ..utils.pp_option import PaddlePredictorOption
+from ..utils.hpi import HPIConfig
 from .components import BaseChat, BaseRetriever, BaseGeneratePrompt
 from ...utils import logging
 from ...utils.config import parse_config
@@ -109,7 +110,8 @@ def create_pipeline(
     config: Optional[Dict[str, Any]] = None,
     device: Optional[str] = None,
     pp_option: Optional[PaddlePredictorOption] = None,
-    use_hpip: bool = False,
+    use_hpip: Optional[bool] = None,
+    hpi_config: Optional[Union[Dict[str, Any], HPIConfig]] = None,
     *args: Any,
     **kwargs: Any,
 ) -> BasePipeline:
@@ -128,8 +130,12 @@ def create_pipeline(
             Defaults to None.
         pp_option (Optional[PaddlePredictorOption], optional): The options for
             the PaddlePredictor. Defaults to None.
-        use_hpip (bool, optional): Whether to use high-performance inference
-            plugin (HPIP) for prediction. Defaults to False.
+        use_hpip (Optional[bool], optional): Whether to use the high-performance
+            inference plugin (HPIP) for prediction by default.
+            Defaults to None.
+        hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional): The
+            default high-performance inference configuration dictionary.
+            Defaults to None.
         *args: Additional positional arguments.
         **kwargs: Additional keyword arguments.
 
@@ -151,12 +157,19 @@ def create_pipeline(
                 config["pipeline_name"],
             )
     pipeline_name = config["pipeline_name"]
+    if device is None:
+        device = config.get("device", None)
+    if use_hpip is None:
+        use_hpip = config.get("use_hpip", False)
+    if hpi_config is None:
+        hpi_config = config.get("hpi_config", None)
 
     pipeline = BasePipeline.get(pipeline_name)(
         config=config,
         device=device,
         pp_option=pp_option,
         use_hpip=use_hpip,
+        hpi_config=hpi_config,
         *args,
         **kwargs,
     )

+ 10 - 2
paddlex/inference/pipelines/anomaly_detection/pipeline.py

@@ -16,6 +16,7 @@ from typing import Any, Dict, Optional, Union, List
 import numpy as np
 
 from ...utils.pp_option import PaddlePredictorOption
+from ...utils.hpi import HPIConfig
 from ..base import BasePipeline
 
 from ...models.anomaly_detection.result import UadResult
@@ -32,6 +33,7 @@ class AnomalyDetectionPipeline(BasePipeline):
         device: str = None,
         pp_option: PaddlePredictorOption = None,
         use_hpip: bool = False,
+        hpi_config: Optional[Union[Dict[str, Any], HPIConfig]] = None,
     ) -> None:
         """Initializes the image anomaly detection pipeline.
 
@@ -39,10 +41,16 @@ class AnomalyDetectionPipeline(BasePipeline):
             config (Dict): Configuration dictionary containing various settings.
             device (str, optional): Device to run the predictions on. Defaults to None.
             pp_option (PaddlePredictorOption, optional): PaddlePredictor options. Defaults to None.
-            use_hpip (bool, optional): Whether to use high-performance inference (hpip) for prediction. Defaults to False.
+            use_hpip (bool, optional): Whether to use the high-performance
+                inference plugin (HPIP) by default. Defaults to False.
+            hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
+                The default high-performance inference configuration dictionary.
+                Defaults to None.
         """
 
-        super().__init__(device=device, pp_option=pp_option, use_hpip=use_hpip)
+        super().__init__(
+            device=device, pp_option=pp_option, use_hpip=use_hpip, hpi_config=hpi_config
+        )
 
         anomaly_detetion_model_config = config["SubModules"]["AnomalyDetection"]
         self.anomaly_detetion_model = self.create_model(anomaly_detetion_model_config)

+ 5 - 1
paddlex/inference/pipelines/attribute_recognition/pipeline.py

@@ -19,6 +19,7 @@ from pathlib import Path
 import numpy as np
 
 from ...utils.pp_option import PaddlePredictorOption
+from ...utils.hpi import HPIConfig
 from ...common.reader import ReadImage
 from ...common.batch_sampler import ImageBatchSampler
 from ..components import CropByBoxes
@@ -35,8 +36,11 @@ class AttributeRecPipeline(BasePipeline):
         device: str = None,
         pp_option: PaddlePredictorOption = None,
         use_hpip: bool = False,
+        hpi_config: Optional[Union[Dict[str, Any], HPIConfig]] = None,
     ):
-        super().__init__(device=device, pp_option=pp_option, use_hpip=use_hpip)
+        super().__init__(
+            device=device, pp_option=pp_option, use_hpip=use_hpip, hpi_config=hpi_config
+        )
 
         self.det_model = self.create_model(config["SubModules"]["Detection"])
         self.cls_model = self.create_model(config["SubModules"]["Classification"])

+ 28 - 9
paddlex/inference/pipelines/base.py

@@ -12,12 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from pathlib import Path
-from typing import Any, Dict, Optional
+from typing import Any, Dict, Optional, Union
 from abc import ABC, abstractmethod
-import yaml
-import codecs
+from ...utils import logging
 from ...utils.subclass_register import AutoRegisterABCMetaClass
+from ..utils.hpi import HPIConfig
 from ..utils.pp_option import PaddlePredictorOption
 from ..models import BasePredictor
 
@@ -37,6 +36,7 @@ class BasePipeline(ABC, metaclass=AutoRegisterABCMetaClass):
         device: str = None,
         pp_option: PaddlePredictorOption = None,
         use_hpip: bool = False,
+        hpi_config: Optional[Union[Dict[str, Any], HPIConfig]] = None,
         *args,
         **kwargs,
     ) -> None:
@@ -46,12 +46,17 @@ class BasePipeline(ABC, metaclass=AutoRegisterABCMetaClass):
         Args:
             device (str, optional): The device to use for prediction. Defaults to None.
             pp_option (PaddlePredictorOption, optional): The options for PaddlePredictor. Defaults to None.
-            use_hpip (bool, optional): Whether to use high-performance inference (hpip) for prediction. Defaults to False.
+            use_hpip (bool, optional): Whether to use the high-performance
+                inference plugin (HPIP) by default. Defaults to False.
+            hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
+                The default high-performance inference configuration dictionary.
+                Defaults to None.
         """
         super().__init__()
         self.device = device
         self.pp_option = pp_option
         self.use_hpip = use_hpip
+        self.hpi_config = hpi_config
 
     @abstractmethod
     def predict(self, input, **kwargs):
@@ -79,18 +84,25 @@ class BasePipeline(ABC, metaclass=AutoRegisterABCMetaClass):
             raise ValueError(config["model_config_error"])
 
         model_dir = config.get("model_dir", None)
-        hpi_params = config.get("hpi_params", None)
+        # Should we log if the actual parameter to use is different from the default?
+        use_hpip = config.get("use_hpip", self.use_hpip)
+        hpi_config = config.get("hpi_config", None)
+        if self.hpi_config is not None:
+            hpi_config = hpi_config or {}
+            hpi_config = {**self.hpi_config, **hpi_config}
 
         from .. import create_predictor
 
+        logging.info("Creating model: %s", (config["model_name"], model_dir))
+
         model = create_predictor(
             model_name=config["model_name"],
             model_dir=model_dir,
             device=self.device,
             batch_size=config.get("batch_size", 1),
             pp_option=self.pp_option,
-            use_hpip=self.use_hpip,
-            hpi_params=hpi_params,
+            use_hpip=use_hpip,
+            hpi_config=hpi_config,
             **kwargs,
         )
         return model
@@ -110,11 +122,18 @@ class BasePipeline(ABC, metaclass=AutoRegisterABCMetaClass):
 
         from . import create_pipeline
 
+        use_hpip = config.get("use_hpip", self.use_hpip)
+        hpi_config = config.get("hpi_config", None)
+        if self.hpi_config is not None:
+            hpi_config = hpi_config or {}
+            hpi_config = {**self.hpi_config, **hpi_config}
+
         pipeline = create_pipeline(
             config=config,
             device=self.device,
             pp_option=self.pp_option,
-            use_hpip=self.use_hpip,
+            use_hpip=use_hpip,
+            hpi_config=hpi_config,
         )
         return pipeline
 

+ 10 - 2
paddlex/inference/pipelines/doc_preprocessor/pipeline.py

@@ -21,6 +21,7 @@ from ....utils import logging
 from ...common.reader import ReadImage
 from ...common.batch_sampler import ImageBatchSampler
 from ...utils.pp_option import PaddlePredictorOption
+from ...utils.hpi import HPIConfig
 
 
 class DocPreprocessorPipeline(BasePipeline):
@@ -34,6 +35,7 @@ class DocPreprocessorPipeline(BasePipeline):
         device: Optional[str] = None,
         pp_option: Optional[PaddlePredictorOption] = None,
         use_hpip: bool = False,
+        hpi_config: Optional[Union[Dict[str, Any], HPIConfig]] = None,
     ) -> None:
         """Initializes the doc preprocessor pipeline.
 
@@ -41,10 +43,16 @@ class DocPreprocessorPipeline(BasePipeline):
             config (Dict): Configuration dictionary containing various settings.
             device (str, optional): Device to run the predictions on. Defaults to None.
             pp_option (PaddlePredictorOption, optional): PaddlePredictor options. Defaults to None.
-            use_hpip (bool, optional): Whether to use high-performance inference (hpip) for prediction. Defaults to False.
+            use_hpip (bool, optional): Whether to use the high-performance
+                inference plugin (HPIP) by default. Defaults to False.
+            hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
+                The default high-performance inference configuration dictionary.
+                Defaults to None.
         """
 
-        super().__init__(device=device, pp_option=pp_option, use_hpip=use_hpip)
+        super().__init__(
+            device=device, pp_option=pp_option, use_hpip=use_hpip, hpi_config=hpi_config
+        )
 
         self.use_doc_orientation_classify = config.get(
             "use_doc_orientation_classify", True

+ 10 - 2
paddlex/inference/pipelines/formula_recognition/pipeline.py

@@ -25,6 +25,7 @@ from ...models.formula_recognition.result import (
 )
 from ....utils import logging
 from ...utils.pp_option import PaddlePredictorOption
+from ...utils.hpi import HPIConfig
 from ...common.reader import ReadImage
 from ...common.batch_sampler import ImageBatchSampler
 from ..ocr.result import OCRResult
@@ -44,6 +45,7 @@ class FormulaRecognitionPipeline(BasePipeline):
         device: str = None,
         pp_option: PaddlePredictorOption = None,
         use_hpip: bool = False,
+        hpi_config: Optional[Union[Dict[str, Any], HPIConfig]] = None,
     ) -> None:
         """Initializes the formula recognition pipeline.
 
@@ -51,10 +53,16 @@ class FormulaRecognitionPipeline(BasePipeline):
             config (Dict): Configuration dictionary containing various settings.
             device (str, optional): Device to run the predictions on. Defaults to None.
             pp_option (PaddlePredictorOption, optional): PaddlePredictor options. Defaults to None.
-            use_hpip (bool, optional): Whether to use high-performance inference (hpip) for prediction. Defaults to False.
+            use_hpip (bool, optional): Whether to use the high-performance
+                inference plugin (HPIP) by default. Defaults to False.
+            hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
+                The default high-performance inference configuration dictionary.
+                Defaults to None.
         """
 
-        super().__init__(device=device, pp_option=pp_option, use_hpip=use_hpip)
+        super().__init__(
+            device=device, pp_option=pp_option, use_hpip=use_hpip, hpi_config=hpi_config
+        )
 
         self.use_doc_preprocessor = config.get("use_doc_preprocessor", True)
         if self.use_doc_preprocessor:

+ 10 - 2
paddlex/inference/pipelines/image_classification/pipeline.py

@@ -17,6 +17,7 @@ import numpy as np
 from ...common.reader import ReadImage
 from ...common.batch_sampler import ImageBatchSampler
 from ...utils.pp_option import PaddlePredictorOption
+from ...utils.hpi import HPIConfig
 from ..base import BasePipeline
 
 from ...models.image_classification.result import TopkResult
@@ -33,6 +34,7 @@ class ImageClassificationPipeline(BasePipeline):
         device: str = None,
         pp_option: PaddlePredictorOption = None,
         use_hpip: bool = False,
+        hpi_config: Optional[Union[Dict[str, Any], HPIConfig]] = None,
     ) -> None:
         """
         Initializes the class with given configurations and options.
@@ -41,9 +43,15 @@ class ImageClassificationPipeline(BasePipeline):
             config (Dict): Configuration dictionary containing model and other parameters.
             device (str): The device to run the prediction on. Default is None.
             pp_option (PaddlePredictorOption): Options for PaddlePaddle predictor. Default is None.
-            use_hpip (bool): Whether to use high-performance inference (hpip) for prediction. Defaults to False.
+            use_hpip (bool, optional): Whether to use the high-performance
+                inference plugin (HPIP) by default. Defaults to False.
+            hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
+                The default high-performance inference configuration dictionary.
+                Defaults to None.
         """
-        super().__init__(device=device, pp_option=pp_option, use_hpip=use_hpip)
+        super().__init__(
+            device=device, pp_option=pp_option, use_hpip=use_hpip, hpi_config=hpi_config
+        )
 
         image_classification_model_config = config["SubModules"]["ImageClassification"]
         model_kwargs = {}

+ 10 - 2
paddlex/inference/pipelines/image_multilabel_classification/pipeline.py

@@ -17,6 +17,7 @@ import numpy as np
 from ...common.reader import ReadImage
 from ...common.batch_sampler import ImageBatchSampler
 from ...utils.pp_option import PaddlePredictorOption
+from ...utils.hpi import HPIConfig
 from ..base import BasePipeline
 
 from ...models.image_multilabel_classification.result import MLClassResult
@@ -33,6 +34,7 @@ class ImageMultiLabelClassificationPipeline(BasePipeline):
         device: str = None,
         pp_option: PaddlePredictorOption = None,
         use_hpip: bool = False,
+        hpi_config: Optional[Union[Dict[str, Any], HPIConfig]] = None,
     ) -> None:
         """
         Initializes the class with given configurations and options.
@@ -41,9 +43,15 @@ class ImageMultiLabelClassificationPipeline(BasePipeline):
             config (Dict): Configuration dictionary containing model and other parameters.
             device (str): The device to run the prediction on. Default is None.
             pp_option (PaddlePredictorOption): Options for PaddlePaddle predictor. Default is None.
-            use_hpip (bool): Whether to use high-performance inference (hpip) for prediction. Defaults to False.
+            use_hpip (bool, optional): Whether to use the high-performance
+                inference plugin (HPIP) by default. Defaults to False.
+            hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
+                The default high-performance inference configuration dictionary.
+                Defaults to None.
         """
-        super().__init__(device=device, pp_option=pp_option, use_hpip=use_hpip)
+        super().__init__(
+            device=device, pp_option=pp_option, use_hpip=use_hpip, hpi_config=hpi_config
+        )
 
         self.threshold = config["SubModules"]["ImageMultiLabelClassification"].get(
             "threshold", None

+ 10 - 2
paddlex/inference/pipelines/instance_segmentation/pipeline.py

@@ -15,6 +15,7 @@
 from typing import Any, Dict, Optional, Union, List
 import numpy as np
 from ...utils.pp_option import PaddlePredictorOption
+from ...utils.hpi import HPIConfig
 from ..base import BasePipeline
 
 from ...models.instance_segmentation.result import InstanceSegResult
@@ -31,6 +32,7 @@ class InstanceSegmentationPipeline(BasePipeline):
         device: str = None,
         pp_option: PaddlePredictorOption = None,
         use_hpip: bool = False,
+        hpi_config: Optional[Union[Dict[str, Any], HPIConfig]] = None,
     ) -> None:
         """
         Initializes the class with given configurations and options.
@@ -39,9 +41,15 @@ class InstanceSegmentationPipeline(BasePipeline):
             config (Dict): Configuration dictionary containing model and other parameters.
             device (str): The device to run the prediction on. Default is None.
             pp_option (PaddlePredictorOption): Options for PaddlePaddle predictor. Default is None.
-            use_hpip (bool): Whether to use high-performance inference (hpip) for prediction. Defaults to False.
+            use_hpip (bool, optional): Whether to use the high-performance
+                inference plugin (HPIP) by default. Defaults to False.
+            hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
+                The default high-performance inference configuration dictionary.
+                Defaults to None.
         """
-        super().__init__(device=device, pp_option=pp_option, use_hpip=use_hpip)
+        super().__init__(
+            device=device, pp_option=pp_option, use_hpip=use_hpip, hpi_config=hpi_config
+        )
 
         instance_segmentation_model_config = config["SubModules"][
             "InstanceSegmentation"

+ 10 - 2
paddlex/inference/pipelines/keypoint_detection/pipeline.py

@@ -15,6 +15,7 @@
 from typing import Any, Dict, Optional, Union, Tuple, List
 import numpy as np
 from ...utils.pp_option import PaddlePredictorOption
+from ...utils.hpi import HPIConfig
 from ..base import BasePipeline
 
 from ...models.keypoint_detection.result import KptResult
@@ -33,6 +34,7 @@ class KeypointDetectionPipeline(BasePipeline):
         device: str = None,
         pp_option: PaddlePredictorOption = None,
         use_hpip: bool = False,
+        hpi_config: Optional[Union[Dict[str, Any], HPIConfig]] = None,
     ) -> None:
         """
         Initializes the class with given configurations and options.
@@ -41,9 +43,15 @@ class KeypointDetectionPipeline(BasePipeline):
             config (Dict): Configuration dictionary containing model and other parameters.
             device (str): The device to run the prediction on. Default is None.
             pp_option (PaddlePredictorOption): Options for PaddlePaddle predictor. Default is None.
-            use_hpip (bool): Whether to use high-performance inference (hpip) for prediction. Defaults to False.
+            use_hpip (bool, optional): Whether to use the high-performance
+                inference plugin (HPIP) by default. Defaults to False.
+            hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
+                The default high-performance inference configuration dictionary.
+                Defaults to None.
         """
-        super().__init__(device=device, pp_option=pp_option, use_hpip=use_hpip)
+        super().__init__(
+            device=device, pp_option=pp_option, use_hpip=use_hpip, hpi_config=hpi_config
+        )
 
         # create object detection model
         model_cfg = config["SubModules"]["ObjectDetection"]

+ 11 - 3
paddlex/inference/pipelines/layout_parsing/pipeline.py

@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Dict, Optional, Union, List, Tuple
+from typing import Any, Dict, Optional, Union, List, Tuple
 import numpy as np
 from ..base import BasePipeline
 from .utils import get_sub_regions_ocr_res, sorted_layout_boxes
@@ -20,6 +20,7 @@ from ..components import CropByBoxes
 from .result import LayoutParsingResult
 from ....utils import logging
 from ...utils.pp_option import PaddlePredictorOption
+from ...utils.hpi import HPIConfig
 from ...common.reader import ReadImage
 from ...common.batch_sampler import ImageBatchSampler
 from ..ocr.result import OCRResult
@@ -38,6 +39,7 @@ class LayoutParsingPipeline(BasePipeline):
         device: str = None,
         pp_option: PaddlePredictorOption = None,
         use_hpip: bool = False,
+        hpi_config: Optional[Union[Dict[str, Any], HPIConfig]] = None,
     ) -> None:
         """Initializes the layout parsing pipeline.
 
@@ -45,10 +47,16 @@ class LayoutParsingPipeline(BasePipeline):
             config (Dict): Configuration dictionary containing various settings.
             device (str, optional): Device to run the predictions on. Defaults to None.
             pp_option (PaddlePredictorOption, optional): PaddlePredictor options. Defaults to None.
-            use_hpip (bool, optional): Whether to use high-performance inference (hpip) for prediction. Defaults to False.
+            use_hpip (bool, optional): Whether to use the high-performance
+                inference plugin (HPIP) by default. Defaults to False.
+            hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
+                The default high-performance inference configuration dictionary.
+                Defaults to None.
         """
 
-        super().__init__(device=device, pp_option=pp_option, use_hpip=use_hpip)
+        super().__init__(
+            device=device, pp_option=pp_option, use_hpip=use_hpip, hpi_config=hpi_config
+        )
 
         self.inintial_predictor(config)
 

+ 9 - 2
paddlex/inference/pipelines/layout_parsing/pipeline_v2.py

@@ -13,7 +13,7 @@
 # limitations under the License.
 from __future__ import annotations
 
-from typing import Optional, Union, Tuple, Iterator
+from typing import Any, Dict, Optional, Union, Tuple
 import numpy as np
 import re
 import copy
@@ -23,6 +23,7 @@ from ...common.batch_sampler import ImageBatchSampler
 from ...common.reader import ReadImage
 from ...models.object_detection.result import DetResult
 from ...utils.pp_option import PaddlePredictorOption
+from ...utils.hpi import HPIConfig
 from ..base import BasePipeline
 from ..ocr.result import OCRResult
 from .result_v2 import LayoutParsingResultV2
@@ -40,6 +41,7 @@ class LayoutParsingPipelineV2(BasePipeline):
         device: str = None,
         pp_option: PaddlePredictorOption = None,
         use_hpip: bool = False,
+        hpi_config: Optional[Union[Dict[str, Any], HPIConfig]] = None,
     ) -> None:
         """Initializes the layout parsing pipeline.
 
@@ -47,13 +49,18 @@ class LayoutParsingPipelineV2(BasePipeline):
             config (Dict): Configuration dictionary containing various settings.
             device (str, optional): Device to run the predictions on. Defaults to None.
             pp_option (PaddlePredictorOption, optional): PaddlePredictor options. Defaults to None.
-            use_hpip (bool, optional): Whether to use high-performance inference (hpip) for prediction. Defaults to False.
+            use_hpip (bool, optional): Whether to use the high-performance
+                inference plugin (HPIP) by default. Defaults to False.
+            hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
+                The default high-performance inference configuration dictionary.
+                Defaults to None.
         """
 
         super().__init__(
             device=device,
             pp_option=pp_option,
             use_hpip=use_hpip,
+            hpi_config=hpi_config,
         )
 
         self.inintial_predictor(config)

+ 10 - 2
paddlex/inference/pipelines/multilingual_speech_recognition/pipeline.py

@@ -16,6 +16,7 @@ from typing import Any, Dict, Optional, Union, List
 import numpy as np
 
 from ...utils.pp_option import PaddlePredictorOption
+from ...utils.hpi import HPIConfig
 from ..base import BasePipeline
 from ...models.multilingual_speech_recognition.result import WhisperResult
 
@@ -31,6 +32,7 @@ class MultilingualSpeechRecognitionPipeline(BasePipeline):
         device: str = None,
         pp_option: PaddlePredictorOption = None,
         use_hpip: bool = False,
+        hpi_config: Optional[Union[Dict[str, Any], HPIConfig]] = None,
     ) -> None:
         """
         Initializes the class with given configurations and options.
@@ -39,9 +41,15 @@ class MultilingualSpeechRecognitionPipeline(BasePipeline):
             config (Dict): Configuration dictionary containing model and other parameters.
             device (str): The device to run the prediction on. Default is None.
             pp_option (PaddlePredictorOption): Options for PaddlePaddle predictor. Default is None.
-            use_hpip (bool): Whether to use high-performance inference (hpip) for prediction. Defaults to False.
+            use_hpip (bool, optional): Whether to use the high-performance
+                inference plugin (HPIP) by default. Defaults to False.
+            hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
+                The default high-performance inference configuration dictionary.
+                Defaults to None.
         """
-        super().__init__(device=device, pp_option=pp_option, use_hpip=use_hpip)
+        super().__init__(
+            device=device, pp_option=pp_option, use_hpip=use_hpip, hpi_config=hpi_config
+        )
 
         multilingual_speech_recognition_model_config = config["SubModules"][
             "MultilingualSpeechRecognition"

+ 10 - 2
paddlex/inference/pipelines/object_detection/pipeline.py

@@ -16,6 +16,7 @@ from typing import Any, Dict, Optional, Union, Tuple, List
 import numpy as np
 
 from ...utils.pp_option import PaddlePredictorOption
+from ...utils.hpi import HPIConfig
 from ..base import BasePipeline
 
 from ...models.object_detection.result import DetResult
@@ -32,6 +33,7 @@ class ObjectDetectionPipeline(BasePipeline):
         device: str = None,
         pp_option: PaddlePredictorOption = None,
         use_hpip: bool = False,
+        hpi_config: Optional[Union[Dict[str, Any], HPIConfig]] = None,
     ) -> None:
         """
         Initializes the class with given configurations and options.
@@ -40,9 +42,15 @@ class ObjectDetectionPipeline(BasePipeline):
             config (Dict): Configuration dictionary containing model and other parameters.
             device (str): The device to run the prediction on. Default is None.
             pp_option (PaddlePredictorOption): Options for PaddlePaddle predictor. Default is None.
-            use_hpip (bool): Whether to use high-performance inference (hpip) for prediction. Defaults to False.
+            use_hpip (bool, optional): Whether to use the high-performance
+                inference plugin (HPIP) by default. Defaults to False.
+            hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
+                The default high-performance inference configuration dictionary.
+                Defaults to None.
         """
-        super().__init__(device=device, pp_option=pp_option, use_hpip=use_hpip)
+        super().__init__(
+            device=device, pp_option=pp_option, use_hpip=use_hpip, hpi_config=hpi_config
+        )
         model_cfg = config["SubModules"]["ObjectDetection"]
         model_kwargs = {}
         if "threshold" in model_cfg:

+ 17 - 3
paddlex/inference/pipelines/ocr/pipeline.py

@@ -18,6 +18,7 @@ from scipy.ndimage import rotate
 from ...common.reader import ReadImage
 from ...common.batch_sampler import ImageBatchSampler
 from ...utils.pp_option import PaddlePredictorOption
+from ...utils.hpi import HPIConfig
 from ..base import BasePipeline
 from ..components import (
     CropByPolys,
@@ -41,6 +42,7 @@ class OCRPipeline(BasePipeline):
         device: Optional[str] = None,
         pp_option: Optional[PaddlePredictorOption] = None,
         use_hpip: bool = False,
+        hpi_config: Optional[Union[Dict[str, Any], HPIConfig]] = None,
     ) -> None:
         """
         Initializes the class with given configurations and options.
@@ -49,9 +51,15 @@ class OCRPipeline(BasePipeline):
             config (Dict): Configuration dictionary containing various settings.
             device (str, optional): Device to run the predictions on. Defaults to None.
             pp_option (PaddlePredictorOption, optional): PaddlePredictor options. Defaults to None.
-            use_hpip (bool, optional): Whether to use high-performance inference (hpip) for prediction. Defaults to False.
+            use_hpip (bool, optional): Whether to use the high-performance
+                inference plugin (HPIP) by default. Defaults to False.
+            hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
+                The default high-performance inference configuration dictionary.
+                Defaults to None.
         """
-        super().__init__(device=device, pp_option=pp_option, use_hpip=use_hpip)
+        super().__init__(
+            device=device, pp_option=pp_option, use_hpip=use_hpip, hpi_config=hpi_config
+        )
 
         self.use_doc_preprocessor = config.get("use_doc_preprocessor", True)
         if self.use_doc_preprocessor:
@@ -84,6 +92,7 @@ class OCRPipeline(BasePipeline):
             self.text_det_limit_type = text_det_config.get("limit_type", "max")
             self.text_det_thresh = text_det_config.get("thresh", 0.3)
             self.text_det_box_thresh = text_det_config.get("box_thresh", 0.6)
+            self.input_shape = text_det_config.get("input_shape", None)
             self.text_det_unclip_ratio = text_det_config.get("unclip_ratio", 2.0)
             self._sort_boxes = SortQuadBoxes()
             self._crop_by_polys = CropByPolys(det_box_type="quad")
@@ -93,6 +102,7 @@ class OCRPipeline(BasePipeline):
             self.text_det_thresh = text_det_config.get("thresh", 0.2)
             self.text_det_box_thresh = text_det_config.get("box_thresh", 0.6)
             self.text_det_unclip_ratio = text_det_config.get("unclip_ratio", 0.5)
+            self.input_shape = text_det_config.get("input_shape", None)
             self._sort_boxes = SortPolyBoxes()
             self._crop_by_polys = CropByPolys(det_box_type="poly")
         else:
@@ -105,6 +115,7 @@ class OCRPipeline(BasePipeline):
             thresh=self.text_det_thresh,
             box_thresh=self.text_det_box_thresh,
             unclip_ratio=self.text_det_unclip_ratio,
+            input_shape=self.input_shape,
         )
 
         text_rec_config = config.get("SubModules", {}).get(
@@ -112,7 +123,10 @@ class OCRPipeline(BasePipeline):
             {"model_config_error": "config error for text_rec_model!"},
         )
         self.text_rec_score_thresh = text_rec_config.get("score_thresh", 0)
-        self.text_rec_model = self.create_model(text_rec_config)
+        self.input_shape = text_rec_config.get("input_shape", None)
+        self.text_rec_model = self.create_model(
+            text_rec_config, input_shape=self.input_shape
+        )
 
         self.batch_sampler = ImageBatchSampler(batch_size=1)
         self.img_reader = ReadImage(format="BGR")

+ 10 - 2
paddlex/inference/pipelines/open_vocabulary_detection/pipeline.py

@@ -15,6 +15,7 @@
 from typing import Any, Dict, Optional, Union, List
 import numpy as np
 from ...utils.pp_option import PaddlePredictorOption
+from ...utils.hpi import HPIConfig
 from ..base import BasePipeline
 
 from ...models.object_detection.result import DetResult
@@ -31,6 +32,7 @@ class OpenVocabularyDetectionPipeline(BasePipeline):
         device: str = None,
         pp_option: PaddlePredictorOption = None,
         use_hpip: bool = False,
+        hpi_config: Optional[Union[Dict[str, Any], HPIConfig]] = None,
     ) -> None:
         """
         Initializes the class with given configurations and options.
@@ -39,9 +41,15 @@ class OpenVocabularyDetectionPipeline(BasePipeline):
             config (Dict): Configuration dictionary containing model and other parameters.
             device (str): The device to run the prediction on. Default is None.
             pp_option (PaddlePredictorOption): Options for PaddlePaddle predictor. Default is None.
-            use_hpip (bool): Whether to use high-performance inference (hpip) for prediction. Defaults to False.
+            use_hpip (bool, optional): Whether to use the high-performance
+                inference plugin (HPIP) by default. Defaults to False.
+            hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
+                The default high-performance inference configuration dictionary.
+                Defaults to None.
         """
-        super().__init__(device=device, pp_option=pp_option, use_hpip=use_hpip)
+        super().__init__(
+            device=device, pp_option=pp_option, use_hpip=use_hpip, hpi_config=hpi_config
+        )
 
         open_vocabulary_detection_model_config = config.get("SubModules", {}).get(
             "OpenVocabularyDetection",

+ 10 - 2
paddlex/inference/pipelines/open_vocabulary_segmentation/pipeline.py

@@ -15,6 +15,7 @@
 from typing import Any, Dict, Optional, Union, Tuple, List
 import numpy as np
 from ...utils.pp_option import PaddlePredictorOption
+from ...utils.hpi import HPIConfig
 from ..base import BasePipeline
 
 from ...models.open_vocabulary_segmentation.results import SAMSegResult
@@ -33,6 +34,7 @@ class OpenVocabularySegmentationPipeline(BasePipeline):
         device: str = None,
         pp_option: PaddlePredictorOption = None,
         use_hpip: bool = False,
+        hpi_config: Optional[Union[Dict[str, Any], HPIConfig]] = None,
     ) -> None:
         """
         Initializes the class with given configurations and options.
@@ -41,9 +43,15 @@ class OpenVocabularySegmentationPipeline(BasePipeline):
             config (Dict): Configuration dictionary containing model and other parameters.
             device (str): The device to run the prediction on. Default is None.
             pp_option (PaddlePredictorOption): Options for PaddlePaddle predictor. Default is None.
-            use_hpip (bool): Whether to use high-performance inference (hpip) for prediction. Defaults to False.
+            use_hpip (bool, optional): Whether to use the high-performance
+                inference plugin (HPIP) by default. Defaults to False.
+            hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
+                The default high-performance inference configuration dictionary.
+                Defaults to None.
         """
-        super().__init__(device=device, pp_option=pp_option, use_hpip=use_hpip)
+        super().__init__(
+            device=device, pp_option=pp_option, use_hpip=use_hpip, hpi_config=hpi_config
+        )
 
         # create box-prompted SAM-H
         box_prompted_model_cfg = config.get("SubModules", {}).get(

+ 11 - 3
paddlex/inference/pipelines/pp_chatocr/pipeline_base.py

@@ -12,10 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Any, Dict, Optional
+from typing import Any, Dict, Optional, Union
 from ..base import BasePipeline
 from ....utils import logging
 from ...utils.pp_option import PaddlePredictorOption
+from ...utils.hpi import HPIConfig
 
 
 class PP_ChatOCR_Pipeline(BasePipeline):
@@ -26,6 +27,7 @@ class PP_ChatOCR_Pipeline(BasePipeline):
         device: str = None,
         pp_option: PaddlePredictorOption = None,
         use_hpip: bool = False,
+        hpi_config: Optional[Union[Dict[str, Any], HPIConfig]] = None,
     ) -> None:
         """Initializes the pp-chatocrv3-doc pipeline.
 
@@ -33,10 +35,16 @@ class PP_ChatOCR_Pipeline(BasePipeline):
             config (Dict): Configuration dictionary containing various settings.
             device (str, optional): Device to run the predictions on. Defaults to None.
             pp_option (PaddlePredictorOption, optional): PaddlePredictor options. Defaults to None.
-            use_hpip (bool, optional): Whether to use high-performance inference (hpip) for prediction. Defaults to False.
+            use_hpip (bool, optional): Whether to use the high-performance
+                inference plugin (HPIP) by default. Defaults to False.
+            hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
+                The default high-performance inference configuration dictionary.
+                Defaults to None.
         """
 
-        super().__init__(device=device, pp_option=pp_option, use_hpip=use_hpip)
+        super().__init__(
+            device=device, pp_option=pp_option, use_hpip=use_hpip, hpi_config=hpi_config
+        )
 
     def visual_predict(self):
         """

+ 10 - 3
paddlex/inference/pipelines/pp_chatocr/pipeline_v3.py

@@ -24,6 +24,7 @@ from ...common.batch_sampler import ImageBatchSampler
 from ....utils import logging
 from ....utils.file_interface import custom_open
 from ...utils.pp_option import PaddlePredictorOption
+from ...utils.hpi import HPIConfig
 from ..layout_parsing.result import LayoutParsingResult
 from ..components.chat_server import BaseChat
 
@@ -39,6 +40,7 @@ class PP_ChatOCRv3_Pipeline(PP_ChatOCR_Pipeline):
         device: str = None,
         pp_option: PaddlePredictorOption = None,
         use_hpip: bool = False,
+        hpi_config: Optional[Union[Dict[str, Any], HPIConfig]] = None,
         initial_predictor: bool = True,
     ) -> None:
         """Initializes the pp-chatocrv3-doc pipeline.
@@ -47,12 +49,17 @@ class PP_ChatOCRv3_Pipeline(PP_ChatOCR_Pipeline):
             config (Dict): Configuration dictionary containing various settings.
             device (str, optional): Device to run the predictions on. Defaults to None.
             pp_option (PaddlePredictorOption, optional): PaddlePredictor options. Defaults to None.
-            use_hpip (bool, optional): Whether to use high-performance inference (hpip) for prediction. Defaults to False.
-            use_layout_parsing (bool, optional): Whether to use layout parsing. Defaults to True.
+            use_hpip (bool, optional): Whether to use the high-performance
+                inference plugin (HPIP) by default. Defaults to False.
+            hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
+                The default high-performance inference configuration dictionary.
+                Defaults to None.
             initial_predictor (bool, optional): Whether to initialize the predictor. Defaults to True.
         """
 
-        super().__init__(device=device, pp_option=pp_option, use_hpip=use_hpip)
+        super().__init__(
+            device=device, pp_option=pp_option, use_hpip=use_hpip, hpi_config=hpi_config
+        )
 
         self.pipeline_name = config["pipeline_name"]
         self.config = config

+ 10 - 3
paddlex/inference/pipelines/pp_chatocr/pipeline_v4.py

@@ -26,6 +26,7 @@ from ...common.batch_sampler import ImageBatchSampler
 from ....utils import logging
 from ....utils.file_interface import custom_open
 from ...utils.pp_option import PaddlePredictorOption
+from ...utils.hpi import HPIConfig
 from ..layout_parsing.result import LayoutParsingResult
 from ..components.chat_server import BaseChat
 
@@ -41,6 +42,7 @@ class PP_ChatOCRv4_Pipeline(PP_ChatOCR_Pipeline):
         device: str = None,
         pp_option: PaddlePredictorOption = None,
         use_hpip: bool = False,
+        hpi_config: Optional[Union[Dict[str, Any], HPIConfig]] = None,
         initial_predictor: bool = True,
     ) -> None:
         """Initializes the pp-chatocrv3-doc pipeline.
@@ -49,12 +51,17 @@ class PP_ChatOCRv4_Pipeline(PP_ChatOCR_Pipeline):
             config (Dict): Configuration dictionary containing various settings.
             device (str, optional): Device to run the predictions on. Defaults to None.
             pp_option (PaddlePredictorOption, optional): PaddlePredictor options. Defaults to None.
-            use_hpip (bool, optional): Whether to use high-performance inference (hpip) for prediction. Defaults to False.
-            use_layout_parsing (bool, optional): Whether to use layout parsing. Defaults to True.
+            use_hpip (bool, optional): Whether to use the high-performance
+                inference plugin (HPIP) by default. Defaults to False.
+            hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
+                The default high-performance inference configuration dictionary.
+                Defaults to None.
             initial_predictor (bool, optional): Whether to initialize the predictor. Defaults to True.
         """
 
-        super().__init__(device=device, pp_option=pp_option, use_hpip=use_hpip)
+        super().__init__(
+            device=device, pp_option=pp_option, use_hpip=use_hpip, hpi_config=hpi_config
+        )
 
         self.pipeline_name = config["pipeline_name"]
         self.config = config

+ 6 - 2
paddlex/inference/pipelines/pp_shitu_v2/pipeline.py

@@ -12,11 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Any, Dict, Optional
+from typing import Any, Dict, Optional, Union
 
 import numpy as np
 
 from ...utils.pp_option import PaddlePredictorOption
+from ...utils.hpi import HPIConfig
 from ...common.reader import ReadImage
 from ...common.batch_sampler import ImageBatchSampler
 from ..components import CropByBoxes, FaissIndexer, FaissBuilder, IndexData
@@ -35,8 +36,11 @@ class ShiTuV2Pipeline(BasePipeline):
         device: str = None,
         pp_option: PaddlePredictorOption = None,
         use_hpip: bool = False,
+        hpi_config: Optional[Union[Dict[str, Any], HPIConfig]] = None,
     ):
-        super().__init__(device=device, pp_option=pp_option, use_hpip=use_hpip)
+        super().__init__(
+            device=device, pp_option=pp_option, use_hpip=use_hpip, hpi_config=hpi_config
+        )
 
         self._topk, self._rec_threshold, self._hamming_radius, self._det_threshold = (
             config.get("rec_topk", 5),

+ 10 - 2
paddlex/inference/pipelines/rotated_object_detection/pipeline.py

@@ -15,6 +15,7 @@
 from typing import Any, Dict, Optional, Union, List
 import numpy as np
 from ...utils.pp_option import PaddlePredictorOption
+from ...utils.hpi import HPIConfig
 from ..base import BasePipeline
 
 from ...models.object_detection.result import DetResult
@@ -31,6 +32,7 @@ class RotatedObjectDetectionPipeline(BasePipeline):
         device: str = None,
         pp_option: PaddlePredictorOption = None,
         use_hpip: bool = False,
+        hpi_config: Optional[Union[Dict[str, Any], HPIConfig]] = None,
     ) -> None:
         """
         Initializes the class with given configurations and options.
@@ -39,9 +41,15 @@ class RotatedObjectDetectionPipeline(BasePipeline):
             config (Dict): Configuration dictionary containing model and other parameters.
             device (str): The device to run the prediction on. Default is None.
             pp_option (PaddlePredictorOption): Options for PaddlePaddle predictor. Default is None.
-            use_hpip (bool): Whether to use high-performance inference (hpip) for prediction. Defaults to False.
+            use_hpip (bool, optional): Whether to use the high-performance
+                inference plugin (HPIP) by default. Defaults to False.
+            hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
+                The default high-performance inference configuration dictionary.
+                Defaults to None.
         """
-        super().__init__(device=device, pp_option=pp_option, use_hpip=use_hpip)
+        super().__init__(
+            device=device, pp_option=pp_option, use_hpip=use_hpip, hpi_config=hpi_config
+        )
 
         rotated_object_detection_model_config = config["SubModules"][
             "RotatedObjectDetection"

+ 10 - 2
paddlex/inference/pipelines/seal_recognition/pipeline.py

@@ -21,6 +21,7 @@ from ..components import CropByBoxes
 from .result import SealRecognitionResult
 from ....utils import logging
 from ...utils.pp_option import PaddlePredictorOption
+from ...utils.hpi import HPIConfig
 from ...common.reader import ReadImage
 from ...common.batch_sampler import ImageBatchSampler
 from ..doc_preprocessor.result import DocPreprocessorResult
@@ -39,6 +40,7 @@ class SealRecognitionPipeline(BasePipeline):
         device: str = None,
         pp_option: PaddlePredictorOption = None,
         use_hpip: bool = False,
+        hpi_config: Optional[Union[Dict[str, Any], HPIConfig]] = None,
     ) -> None:
         """Initializes the seal recognition pipeline.
 
@@ -46,10 +48,16 @@ class SealRecognitionPipeline(BasePipeline):
             config (Dict): Configuration dictionary containing various settings.
             device (str, optional): Device to run the predictions on. Defaults to None.
             pp_option (PaddlePredictorOption, optional): PaddlePredictor options. Defaults to None.
-            use_hpip (bool, optional): Whether to use high-performance inference (hpip) for prediction. Defaults to False.
+            use_hpip (bool, optional): Whether to use the high-performance
+                inference plugin (HPIP) by default. Defaults to False.
+            hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
+                The default high-performance inference configuration dictionary.
+                Defaults to None.
         """
 
-        super().__init__(device=device, pp_option=pp_option, use_hpip=use_hpip)
+        super().__init__(
+            device=device, pp_option=pp_option, use_hpip=use_hpip, hpi_config=hpi_config
+        )
 
         self.use_doc_preprocessor = config.get("use_doc_preprocessor", True)
         if self.use_doc_preprocessor:

+ 10 - 2
paddlex/inference/pipelines/semantic_segmentation/pipeline.py

@@ -15,6 +15,7 @@
 from typing import Union, Any, Tuple, List, Dict, Optional, Literal
 import numpy as np
 from ...utils.pp_option import PaddlePredictorOption
+from ...utils.hpi import HPIConfig
 from ..base import BasePipeline
 
 from ...models.semantic_segmentation.result import SegResult
@@ -31,6 +32,7 @@ class SemanticSegmentationPipeline(BasePipeline):
         device: str = None,
         pp_option: PaddlePredictorOption = None,
         use_hpip: bool = False,
+        hpi_config: Optional[Union[Dict[str, Any], HPIConfig]] = None,
     ) -> None:
         """
         Initializes the class with given configurations and options.
@@ -39,9 +41,15 @@ class SemanticSegmentationPipeline(BasePipeline):
             config (Dict): Configuration dictionary containing model and other parameters.
             device (str): The device to run the prediction on. Default is None.
             pp_option (PaddlePredictorOption): Options for PaddlePaddle predictor. Default is None.
-            use_hpip (bool): Whether to use high-performance inference (hpip) for prediction. Defaults to False.
+            use_hpip (bool, optional): Whether to use the high-performance
+                inference plugin (HPIP) by default. Defaults to False.
+            hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
+                The default high-performance inference configuration dictionary.
+                Defaults to None.
         """
-        super().__init__(device=device, pp_option=pp_option, use_hpip=use_hpip)
+        super().__init__(
+            device=device, pp_option=pp_option, use_hpip=use_hpip, hpi_config=hpi_config
+        )
 
         semantic_segmentation_model_config = config["SubModules"][
             "SemanticSegmentation"

+ 10 - 2
paddlex/inference/pipelines/small_object_detection/pipeline.py

@@ -15,6 +15,7 @@
 from typing import Any, Dict, Optional, Union, List
 import numpy as np
 from ...utils.pp_option import PaddlePredictorOption
+from ...utils.hpi import HPIConfig
 from ..base import BasePipeline
 
 from ...models.object_detection.result import DetResult
@@ -31,6 +32,7 @@ class SmallObjectDetectionPipeline(BasePipeline):
         device: str = None,
         pp_option: PaddlePredictorOption = None,
         use_hpip: bool = False,
+        hpi_config: Optional[Union[Dict[str, Any], HPIConfig]] = None,
     ) -> None:
         """
         Initializes the class with given configurations and options.
@@ -39,9 +41,15 @@ class SmallObjectDetectionPipeline(BasePipeline):
             config (Dict): Configuration dictionary containing model and other parameters.
             device (str): The device to run the prediction on. Default is None.
             pp_option (PaddlePredictorOption): Options for PaddlePaddle predictor. Default is None.
-            use_hpip (bool): Whether to use high-performance inference (hpip) for prediction. Defaults to False.
+            use_hpip (bool, optional): Whether to use the high-performance
+                inference plugin (HPIP) by default. Defaults to False.
+            hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
+                The default high-performance inference configuration dictionary.
+                Defaults to None.
         """
-        super().__init__(device=device, pp_option=pp_option, use_hpip=use_hpip)
+        super().__init__(
+            device=device, pp_option=pp_option, use_hpip=use_hpip, hpi_config=hpi_config
+        )
 
         small_object_detection_model_config = config["SubModules"][
             "SmallObjectDetection"

+ 25 - 10
paddlex/inference/pipelines/table_recognition/pipeline.py

@@ -24,6 +24,7 @@ from .table_recognition_post_processing import get_table_recognition_res
 from .result import SingleTableRecognitionResult, TableRecognitionResult
 from ....utils import logging
 from ...utils.pp_option import PaddlePredictorOption
+from ...utils.hpi import HPIConfig
 from ...common.reader import ReadImage
 from ...common.batch_sampler import ImageBatchSampler
 from ..ocr.result import OCRResult
@@ -43,6 +44,7 @@ class TableRecognitionPipeline(BasePipeline):
         device: str = None,
         pp_option: PaddlePredictorOption = None,
         use_hpip: bool = False,
+        hpi_config: Optional[Union[Dict[str, Any], HPIConfig]] = None,
     ) -> None:
         """Initializes the layout parsing pipeline.
 
@@ -50,10 +52,16 @@ class TableRecognitionPipeline(BasePipeline):
             config (Dict): Configuration dictionary containing various settings.
             device (str, optional): Device to run the predictions on. Defaults to None.
             pp_option (PaddlePredictorOption, optional): PaddlePredictor options. Defaults to None.
-            use_hpip (bool, optional): Whether to use high-performance inference (hpip) for prediction. Defaults to False.
+            use_hpip (bool, optional): Whether to use the high-performance
+                inference plugin (HPIP) by default. Defaults to False.
+            hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
+                The default high-performance inference configuration dictionary.
+                Defaults to None.
         """
 
-        super().__init__(device=device, pp_option=pp_option, use_hpip=use_hpip)
+        super().__init__(
+            device=device, pp_option=pp_option, use_hpip=use_hpip, hpi_config=hpi_config
+        )
 
         self.use_doc_preprocessor = config.get("use_doc_preprocessor", True)
         if self.use_doc_preprocessor:
@@ -90,8 +98,7 @@ class TableRecognitionPipeline(BasePipeline):
             self.general_ocr_pipeline = self.create_pipeline(general_ocr_config)
         else:
             self.general_ocr_config_bak = config.get("SubPipelines", {}).get(
-                "GeneralOCR",
-                None
+                "GeneralOCR", None
             )
 
         self._crop_by_boxes = CropByBoxes()
@@ -222,7 +229,7 @@ class TableRecognitionPipeline(BasePipeline):
             doc_preprocessor_res = {}
             doc_preprocessor_image = image_array
         return doc_preprocessor_res, doc_preprocessor_image
-    
+
     def split_ocr_bboxes_by_table_cells(self, ori_img, cells_bboxes):
         """
         Splits OCR bounding boxes by table cells and retrieves text.
@@ -246,7 +253,7 @@ class TableRecognitionPipeline(BasePipeline):
             # Perform OCR on the defined region of the image and get the recognized text.
             rec_te = next(self.general_ocr_pipeline(ori_img[y1:y2, x1:x2, :]))
             # Concatenate the texts and append them to the texts_list.
-            texts_list.append(''.join(rec_te["rec_texts"]))
+            texts_list.append("".join(rec_te["rec_texts"]))
         # Return the list of recognized texts from each cell.
         return texts_list
 
@@ -302,9 +309,15 @@ class TableRecognitionPipeline(BasePipeline):
         """
         table_structure_pred = next(self.table_structure_model(image_array))
         if use_table_cells_ocr_results == True:
-            table_cells_result = list(map(lambda arr: arr.tolist(), table_structure_pred["bbox"]))
-            table_cells_result = [[rect[0], rect[1], rect[4], rect[5]] for rect in table_cells_result]
-            cells_texts_list = self.split_ocr_bboxes_by_table_cells(image_array, table_cells_result)
+            table_cells_result = list(
+                map(lambda arr: arr.tolist(), table_structure_pred["bbox"])
+            )
+            table_cells_result = [
+                [rect[0], rect[1], rect[4], rect[5]] for rect in table_cells_result
+            ]
+            cells_texts_list = self.split_ocr_bboxes_by_table_cells(
+                image_array, table_cells_result
+            )
         else:
             cells_texts_list = []
         single_table_recognition_res = get_table_recognition_res(
@@ -409,7 +422,9 @@ class TableRecognitionPipeline(BasePipeline):
                 )
             elif use_table_cells_ocr_results == True:
                 assert self.general_ocr_config_bak != None
-                self.general_ocr_pipeline = self.create_pipeline(self.general_ocr_config_bak)
+                self.general_ocr_pipeline = self.create_pipeline(
+                    self.general_ocr_config_bak
+                )
 
             table_res_list = []
             table_region_id = 1

+ 40 - 18
paddlex/inference/pipelines/table_recognition/pipeline_v2.py

@@ -12,11 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import os, sys
 from typing import Any, Dict, Optional, Union, List, Tuple
 import numpy as np
 import math
-import cv2
 from sklearn.cluster import KMeans
 from ..base import BasePipeline
 from ..components import CropByBoxes
@@ -28,6 +26,7 @@ from .table_recognition_post_processing import (
 from .result import SingleTableRecognitionResult, TableRecognitionResult
 from ....utils import logging
 from ...utils.pp_option import PaddlePredictorOption
+from ...utils.hpi import HPIConfig
 from ...common.reader import ReadImage
 from ...common.batch_sampler import ImageBatchSampler
 from ..ocr.result import OCRResult
@@ -47,7 +46,7 @@ class TableRecognitionPipelineV2(BasePipeline):
         device: str = None,
         pp_option: PaddlePredictorOption = None,
         use_hpip: bool = False,
-        hpi_params: Optional[Dict[str, Any]] = None,
+        hpi_config: Optional[Union[Dict[str, Any], HPIConfig]] = None,
     ) -> None:
         """Initializes the layout parsing pipeline.
 
@@ -55,12 +54,15 @@ class TableRecognitionPipelineV2(BasePipeline):
             config (Dict): Configuration dictionary containing various settings.
             device (str, optional): Device to run the predictions on. Defaults to None.
             pp_option (PaddlePredictorOption, optional): PaddlePredictor options. Defaults to None.
-            use_hpip (bool, optional): Whether to use high-performance inference (hpip) for prediction. Defaults to False.
-            hpi_params (Optional[Dict[str, Any]], optional): HPIP parameters. Defaults to None.
+            use_hpip (bool, optional): Whether to use the high-performance
+                inference plugin (HPIP) by default. Defaults to False.
+            hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
+                The default high-performance inference configuration dictionary.
+                Defaults to None.
         """
 
         super().__init__(
-            device=device, pp_option=pp_option, use_hpip=use_hpip, hpi_params=hpi_params
+            device=device, pp_option=pp_option, use_hpip=use_hpip, hpi_config=hpi_config
         )
 
         self.use_doc_preprocessor = config.get("use_doc_preprocessor", True)
@@ -130,8 +132,7 @@ class TableRecognitionPipelineV2(BasePipeline):
             self.general_ocr_pipeline = self.create_pipeline(general_ocr_config)
         else:
             self.general_ocr_config_bak = config.get("SubPipelines", {}).get(
-                "GeneralOCR",
-                None
+                "GeneralOCR", None
             )
 
         self._crop_by_boxes = CropByBoxes()
@@ -600,15 +601,25 @@ class TableRecognitionPipelineV2(BasePipeline):
                 use_e2e_model = True
             else:
                 table_cells_pred = next(
-                    self.wireless_table_cells_detection_model(image_array, threshold=0.3)
+                    self.wireless_table_cells_detection_model(
+                        image_array, threshold=0.3
+                    )
                 )  # Setting the threshold to 0.3 can improve the accuracy of table cells detection.
                 # If you really want more or fewer table cells detection boxes, the threshold can be adjusted.
 
         if use_e2e_model == False:
-            table_structure_result = self.extract_results(table_structure_pred, "table_stru")
-            table_cells_result, table_cells_score = self.extract_results(table_cells_pred, "det")
-            table_cells_result, table_cells_score = self.cells_det_results_nms(table_cells_result, table_cells_score)
-            ocr_det_boxes = self.get_region_ocr_det_boxes(overall_ocr_res["rec_boxes"].tolist(), table_box)
+            table_structure_result = self.extract_results(
+                table_structure_pred, "table_stru"
+            )
+            table_cells_result, table_cells_score = self.extract_results(
+                table_cells_pred, "det"
+            )
+            table_cells_result, table_cells_score = self.cells_det_results_nms(
+                table_cells_result, table_cells_score
+            )
+            ocr_det_boxes = self.get_region_ocr_det_boxes(
+                overall_ocr_res["rec_boxes"].tolist(), table_box
+            )
             table_cells_result = self.cells_det_results_reprocessing(
                 table_cells_result,
                 table_cells_score,
@@ -616,7 +627,9 @@ class TableRecognitionPipelineV2(BasePipeline):
                 len(table_structure_pred["bbox"]),
             )
             if use_table_cells_ocr_results == True:
-                cells_texts_list = self.split_ocr_bboxes_by_table_cells(image_array, table_cells_result)
+                cells_texts_list = self.split_ocr_bboxes_by_table_cells(
+                    image_array, table_cells_result
+                )
             else:
                 cells_texts_list = []
             single_table_recognition_res = get_table_recognition_res(
@@ -629,9 +642,16 @@ class TableRecognitionPipelineV2(BasePipeline):
             )
         else:
             if use_table_cells_ocr_results == True:
-                table_cells_result_e2e = list(map(lambda arr: arr.tolist(), table_structure_pred["bbox"]))
-                table_cells_result_e2e = [[rect[0], rect[1], rect[4], rect[5]]for rect in table_cells_result_e2e]
-                cells_texts_list = self.split_ocr_bboxes_by_table_cells(image_array, table_cells_result_e2e)
+                table_cells_result_e2e = list(
+                    map(lambda arr: arr.tolist(), table_structure_pred["bbox"])
+                )
+                table_cells_result_e2e = [
+                    [rect[0], rect[1], rect[4], rect[5]]
+                    for rect in table_cells_result_e2e
+                ]
+                cells_texts_list = self.split_ocr_bboxes_by_table_cells(
+                    image_array, table_cells_result_e2e
+                )
             else:
                 cells_texts_list = []
             single_table_recognition_res = get_table_recognition_res_e2e(
@@ -737,7 +757,9 @@ class TableRecognitionPipelineV2(BasePipeline):
                 )
             elif use_table_cells_ocr_results == True:
                 assert self.general_ocr_config_bak != None
-                self.general_ocr_pipeline = self.create_pipeline(self.general_ocr_config_bak)
+                self.general_ocr_pipeline = self.create_pipeline(
+                    self.general_ocr_config_bak
+                )
 
             table_res_list = []
             table_region_id = 1

+ 10 - 2
paddlex/inference/pipelines/ts_anomaly_detection/pipeline.py

@@ -16,6 +16,7 @@ from typing import Any, Dict, Optional, Union, List
 import pandas as pd
 
 from ...utils.pp_option import PaddlePredictorOption
+from ...utils.hpi import HPIConfig
 from ..base import BasePipeline
 
 from ...models.ts_anomaly_detection.result import TSAdResult
@@ -32,6 +33,7 @@ class TSAnomalyDetPipeline(BasePipeline):
         device: str = None,
         pp_option: PaddlePredictorOption = None,
         use_hpip: bool = False,
+        hpi_config: Optional[Union[Dict[str, Any], HPIConfig]] = None,
     ) -> None:
         """Initializes the Time Series ad pipeline.
 
@@ -39,10 +41,16 @@ class TSAnomalyDetPipeline(BasePipeline):
             config (Dict): Configuration dictionary containing various settings.
             device (str, optional): Device to run the predictions on. Defaults to None.
             pp_option (PaddlePredictorOption, optional): PaddlePredictor options. Defaults to None.
-            use_hpip (bool, optional): Whether to use high-performance inference (hpip) for prediction. Defaults to False.
+            use_hpip (bool, optional): Whether to use the high-performance
+                inference plugin (HPIP) by default. Defaults to False.
+            hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
+                The default high-performance inference configuration dictionary.
+                Defaults to None.
         """
 
-        super().__init__(device=device, pp_option=pp_option, use_hpip=use_hpip)
+        super().__init__(
+            device=device, pp_option=pp_option, use_hpip=use_hpip, hpi_config=hpi_config
+        )
 
         ts_ad_model_config = config["SubModules"]["TSAnomalyDetection"]
         self.ts_ad_model = self.create_model(ts_ad_model_config)

+ 10 - 2
paddlex/inference/pipelines/ts_classification/pipeline.py

@@ -16,6 +16,7 @@ from typing import Any, Dict, Optional, Union, List
 import pandas as pd
 
 from ...utils.pp_option import PaddlePredictorOption
+from ...utils.hpi import HPIConfig
 from ..base import BasePipeline
 
 from ...models.ts_classification.result import TSClsResult
@@ -32,6 +33,7 @@ class TSClsPipeline(BasePipeline):
         device: str = None,
         pp_option: PaddlePredictorOption = None,
         use_hpip: bool = False,
+        hpi_config: Optional[Union[Dict[str, Any], HPIConfig]] = None,
     ) -> None:
         """Initializes the Time Series classification pipeline.
 
@@ -39,10 +41,16 @@ class TSClsPipeline(BasePipeline):
             config (Dict): Configuration dictionary containing various settings.
             device (str, optional): Device to run the predictions on. Defaults to None.
             pp_option (PaddlePredictorOption, optional): PaddlePredictor options. Defaults to None.
-            use_hpip (bool, optional): Whether to use high-performance inference (hpip) for prediction. Defaults to False.
+            use_hpip (bool, optional): Whether to use the high-performance
+                inference plugin (HPIP) by default. Defaults to False.
+            hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
+                The default high-performance inference configuration dictionary.
+                Defaults to None.
         """
 
-        super().__init__(device=device, pp_option=pp_option, use_hpip=use_hpip)
+        super().__init__(
+            device=device, pp_option=pp_option, use_hpip=use_hpip, hpi_config=hpi_config
+        )
 
         ts_classification_model_config = config["SubModules"]["TSClassification"]
         self.ts_classification_model = self.create_model(ts_classification_model_config)

+ 10 - 2
paddlex/inference/pipelines/ts_forecasting/pipeline.py

@@ -16,6 +16,7 @@ from typing import Any, Dict, Optional, Union, List
 import pandas as pd
 
 from ...utils.pp_option import PaddlePredictorOption
+from ...utils.hpi import HPIConfig
 from ..base import BasePipeline
 
 from ...models.ts_forecasting.result import TSFcResult
@@ -32,6 +33,7 @@ class TSFcPipeline(BasePipeline):
         device: str = None,
         pp_option: PaddlePredictorOption = None,
         use_hpip: bool = False,
+        hpi_config: Optional[Union[Dict[str, Any], HPIConfig]] = None,
     ) -> None:
         """Initializes the Time Series Forecast pipeline.
 
@@ -39,10 +41,16 @@ class TSFcPipeline(BasePipeline):
             config (Dict): Configuration dictionary containing various settings.
             device (str, optional): Device to run the predictions on. Defaults to None.
             pp_option (PaddlePredictorOption, optional): PaddlePredictor options. Defaults to None.
-            use_hpip (bool, optional): Whether to use high-performance inference (hpip) for prediction. Defaults to False.
+            use_hpip (bool, optional): Whether to use the high-performance
+                inference plugin (HPIP) by default. Defaults to False.
+            hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
+                The default high-performance inference configuration dictionary.
+                Defaults to None.
         """
 
-        super().__init__(device=device, pp_option=pp_option, use_hpip=use_hpip)
+        super().__init__(
+            device=device, pp_option=pp_option, use_hpip=use_hpip, hpi_config=hpi_config
+        )
 
         ts_forecast_model_config = config["SubModules"]["TSForecast"]
         self.ts_forecast_model = self.create_model(ts_forecast_model_config)

+ 10 - 2
paddlex/inference/pipelines/video_classification/pipeline.py

@@ -15,6 +15,7 @@
 from typing import Any, Dict, Optional, Union, List
 import numpy as np
 from ...utils.pp_option import PaddlePredictorOption
+from ...utils.hpi import HPIConfig
 from ..base import BasePipeline
 
 from ...models.video_classification.result import TopkVideoResult
@@ -31,6 +32,7 @@ class VideoClassificationPipeline(BasePipeline):
         device: str = None,
         pp_option: PaddlePredictorOption = None,
         use_hpip: bool = False,
+        hpi_config: Optional[Union[Dict[str, Any], HPIConfig]] = None,
     ) -> None:
         """
         Initializes the class with given configurations and options.
@@ -39,9 +41,15 @@ class VideoClassificationPipeline(BasePipeline):
             config (Dict): Configuration dictionary containing model and other parameters.
             device (str): The device to run the prediction on. Default is None.
             pp_option (PaddlePredictorOption): Options for PaddlePaddle predictor. Default is None.
-            use_hpip (bool): Whether to use high-performance inference (hpip) for prediction. Defaults to False.
+            use_hpip (bool, optional): Whether to use the high-performance
+                inference plugin (HPIP) by default. Defaults to False.
+            hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
+                The default high-performance inference configuration dictionary.
+                Defaults to None.
         """
-        super().__init__(device=device, pp_option=pp_option, use_hpip=use_hpip)
+        super().__init__(
+            device=device, pp_option=pp_option, use_hpip=use_hpip, hpi_config=hpi_config
+        )
 
         video_classification_model_config = config["SubModules"]["VideoClassification"]
         self.video_classification_model = self.create_model(

+ 13 - 3
paddlex/inference/pipelines/video_detection/pipeline.py

@@ -15,6 +15,7 @@
 from typing import Any, Dict, Optional, Union, List
 import numpy as np
 from ...utils.pp_option import PaddlePredictorOption
+from ...utils.hpi import HPIConfig
 from ..base import BasePipeline
 
 from ...models.video_detection.result import DetVideoResult
@@ -31,6 +32,7 @@ class VideoDetectionPipeline(BasePipeline):
         device: str = None,
         pp_option: PaddlePredictorOption = None,
         use_hpip: bool = False,
+        hpi_config: Optional[Union[Dict[str, Any], HPIConfig]] = None,
     ) -> None:
         """
         Initializes the class with given configurations and options.
@@ -39,9 +41,15 @@ class VideoDetectionPipeline(BasePipeline):
             config (Dict): Configuration dictionary containing model and other parameters.
             device (str): The device to run the prediction on. Default is None.
             pp_option (PaddlePredictorOption): Options for PaddlePaddle predictor. Default is None.
-            use_hpip (bool): Whether to use high-performance inference (hpip) for prediction. Defaults to False.
+            use_hpip (bool, optional): Whether to use the high-performance
+                inference plugin (HPIP) by default. Defaults to False.
+            hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
+                The default high-performance inference configuration dictionary.
+                Defaults to None.
         """
-        super().__init__(device=device, pp_option=pp_option, use_hpip=use_hpip)
+        super().__init__(
+            device=device, pp_option=pp_option, use_hpip=use_hpip, hpi_config=hpi_config
+        )
 
         video_detection_model_config = config["SubModules"]["VideoDetection"]
         model_kwargs = {}
@@ -49,7 +57,9 @@ class VideoDetectionPipeline(BasePipeline):
             model_kwargs["nms_thresh"] = video_detection_model_config["nms_thresh"]
         if "score_thresh" in video_detection_model_config:
             model_kwargs["score_thresh"] = video_detection_model_config["score_thresh"]
-        self.video_detection_model = self.create_model(video_detection_model_config, **model_kwargs)
+        self.video_detection_model = self.create_model(
+            video_detection_model_config, **model_kwargs
+        )
 
     def predict(
         self,

+ 11 - 13
paddlex/inference/utils/benchmark.py

@@ -162,11 +162,9 @@ class Benchmark:
         # 3. Operations do not nest, except that the entry point operation
         #    contains all other operations.
         # 4. The input batch size for each operation is `batch_size`.
-        # 5. Inference operations are always performed, while preprocessing and
-        #    postprocessing operations are optional.
-        # 6. If present, preprocessing operations are always performed before
-        #    inference operations, and inference operations are completed before
-        #    any postprocessing operations. There is no interleaving among these
+        # 5. Preprocessing operations are always performed before inference
+        #    operations, and inference operations are completed before
+        #    postprocessing operations. There is no interleaving among these
         #    stages.
 
         logs = {k: v for k, v in self.logs.items()}
@@ -275,8 +273,8 @@ class Benchmark:
                 i[:4] + (f"{i[4]:.8f}", f"{i[5]:.8f}") for i in summary_list
             ]
             table.add_rows(summary_list)
-            table_name = "WarmUp Data".center(len(str(table).split("\n")[0]), " ")
-            logging.info(table_name)
+            table_title = "Warmup Data".center(len(str(table).split("\n")[0]), " ")
+            logging.info(table_title)
             logging.info(table)
 
         else:
@@ -286,8 +284,8 @@ class Benchmark:
             ]
             table = PrettyTable(operation_head)
             table.add_rows(operation_list)
-            table_name = "Operation Info".center(len(str(table).split("\n")[0]), " ")
-            logging.info(table_name)
+            table_title = "Operation Info".center(len(str(table).split("\n")[0]), " ")
+            logging.info(table_title)
             logging.info(table)
 
             detail_head = [
@@ -301,8 +299,8 @@ class Benchmark:
             table = PrettyTable(detail_head)
             detail_list = [i[:4] + (f"{i[4]:.8f}", f"{i[5]:.8f}") for i in detail_list]
             table.add_rows(detail_list)
-            table_name = "Detail Data".center(len(str(table).split("\n")[0]), " ")
-            logging.info(table_name)
+            table_title = "Detail Data".center(len(str(table).split("\n")[0]), " ")
+            logging.info(table_title)
             logging.info(table)
 
             summary_head = [
@@ -318,8 +316,8 @@ class Benchmark:
                 i[:4] + (f"{i[4]:.8f}", f"{i[5]:.8f}") for i in summary_list
             ]
             table.add_rows(summary_list)
-            table_name = "Summary Data".center(len(str(table).split("\n")[0]), " ")
-            logging.info(table_name)
+            table_title = "Summary Data".center(len(str(table).split("\n")[0]), " ")
+            logging.info(table_title)
             logging.info(table)
 
             if INFER_BENCHMARK_OUTPUT_DIR:

+ 155 - 3
paddlex/inference/utils/hpi.py

@@ -12,12 +12,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import importlib.resources
+import json
+import platform
+from functools import lru_cache
 from os import PathLike
 from pathlib import Path
-from typing import Dict, List, Literal, Optional, Tuple, TypedDict, Union
+from typing import Any, Dict, Final, List, Literal, Optional, Tuple, TypedDict, Union
 
-from pydantic import BaseModel
-from typing_extensions import TypeAlias
+from pydantic import BaseModel, Field
+from typing_extensions import Annotated, TypeAlias
 
 from ...utils.flags import FLAGS_json_format_model
 
@@ -47,6 +51,42 @@ InferenceBackend: TypeAlias = Literal[
 ]
 
 
+class OpenVINOConfig(BaseModel):
+    cpu_num_threads: int = 8
+
+
+class ONNXRuntimeConfig(BaseModel):
+    cpu_num_threads: int = 8
+
+
+class TensorRTConfig(BaseModel):
+    precision: Literal["fp32", "fp16"] = "fp32"
+    use_dynamic_shapes: bool = True
+    dynamic_shapes: Optional[Dict[str, List[List[int]]]] = None
+    # TODO: Control caching behavior
+
+
+class OMConfig(BaseModel):
+    pass
+
+
+class HPIConfig(BaseModel):
+    pdx_model_name: Annotated[str, Field(alias="model_name")]
+    device_type: str
+    device_id: Optional[int] = None
+    auto_config: bool = True
+    backend: Optional[InferenceBackend] = None
+    backend_config: Optional[Dict[str, Any]] = None
+    hpi_info: Optional[HPIInfo] = None
+    auto_paddle2onnx: bool = True
+    # TODO: Add more validation logic here
+
+
+class ModelInfo(BaseModel):
+    name: str
+    hpi_info: Optional[HPIInfo] = None
+
+
 ModelFormat: TypeAlias = Literal["paddle", "onnx", "om"]
 
 
@@ -80,3 +120,115 @@ def get_model_paths(
     if (model_dir / f"{model_file_prefix}.om").exists():
         model_paths["om"] = model_dir / f"{model_file_prefix}.om"
     return model_paths
+
+
+@lru_cache(1)
+def _get_hpi_model_info_collection():
+    with importlib.resources.open_text(
+        __package__, "hpi_model_info_collection.json", encoding="utf-8"
+    ) as f:
+        hpi_model_info_collection = json.load(f)
+    return hpi_model_info_collection
+
+
+def suggest_inference_backend_and_config(
+    hpi_config: HPIConfig,
+    available_backends: Optional[List[InferenceBackend]] = None,
+) -> Union[Tuple[InferenceBackend, Dict[str, Any]], Tuple[None, str]]:
+    # TODO: The current strategy is naive. It would be better to consider
+    # additional important factors, such as NVIDIA GPU compute capability and
+    # device manufacturers. We should also allow users to provide hints.
+
+    import lazy_paddle as paddle
+
+    if available_backends is not None and not available_backends:
+        return None, "No inference backends are available."
+
+    paddle_version = paddle.__version__
+    if paddle_version != "3.0.0-rc0":
+        return None, f"{repr(paddle_version)} is not a supported Paddle version."
+
+    if hpi_config.device_type == "cpu":
+        uname = platform.uname()
+        arch = uname.machine.lower()
+        if arch == "x86_64":
+            key = "cpu_x64"
+        else:
+            return None, f"{repr(arch)} is not a supported architecture."
+    elif hpi_config.device_type == "gpu":
+        # FIXME: We should not rely on the PaddlePaddle library to detemine CUDA
+        # and cuDNN versions.
+        # Should we inject environment info from the outside?
+        import lazy_paddle.version
+
+        cuda_version = lazy_paddle.version.cuda()
+        cuda_version = cuda_version.replace(".", "")
+        cudnn_version = lazy_paddle.version.cudnn().rsplit(".", 1)[0]
+        cudnn_version = cudnn_version.replace(".", "")
+        key = f"gpu_cuda{cuda_version}_cudnn{cudnn_version}"
+    else:
+        return None, f"{repr(hpi_config.device_type)} is not a supported device type."
+
+    hpi_model_info_collection = _get_hpi_model_info_collection()
+
+    if key not in hpi_model_info_collection:
+        return None, "No prior knowledge can be utilized."
+    hpi_model_info_collection_for_env = hpi_model_info_collection[key]
+
+    if hpi_config.pdx_model_name not in hpi_model_info_collection_for_env:
+        return None, f"{repr(hpi_config.pdx_model_name)} is not a known model."
+    supported_pseudo_backends = hpi_model_info_collection_for_env[
+        hpi_config.pdx_model_name
+    ]
+
+    candidate_backends = []
+    backend_to_pseudo_backend = {}
+    for pb in supported_pseudo_backends:
+        if pb.startswith("paddle"):
+            backend = "paddle"
+        elif pb.startswith("tensorrt"):
+            backend = "tensorrt"
+        else:
+            backend = pb
+        if available_backends is not None and backend not in available_backends:
+            continue
+        candidate_backends.append(backend)
+        backend_to_pseudo_backend[backend] = pb
+
+    if not candidate_backends:
+        return None, "No inference backend can be selected."
+
+    if hpi_config.backend is not None:
+        if hpi_config.backend not in candidate_backends:
+            return (
+                None,
+                f"{repr(hpi_config.backend)} is not a supported inference backend.",
+            )
+        suggested_backend = hpi_config.backend
+    else:
+        # The first backend is the preferred one.
+        suggested_backend = candidate_backends[0]
+
+    suggested_backend_config = {}
+    if suggested_backend == "paddle":
+        pseudo_backend = backend_to_pseudo_backend["paddle"]
+        assert pseudo_backend in (
+            "paddle",
+            "paddle_tensorrt_fp32",
+            "paddle_tensorrt_fp16",
+        ), pseudo_backend
+        if pseudo_backend == "paddle_tensorrt_fp32":
+            suggested_backend_config.update({"run_mode": "trt_fp32"})
+        elif pseudo_backend == "paddle_tensorrt_fp16":
+            # TODO: Check if the target device supports FP16.
+            suggested_backend_config.update({"run_mode": "trt_fp16"})
+    elif suggested_backend == "tensorrt":
+        pseudo_backend = backend_to_pseudo_backend["tensorrt"]
+        assert pseudo_backend in ("tensorrt", "tensorrt_fp16"), pseudo_backend
+        if pseudo_backend == "tensorrt_fp16":
+            suggested_backend_config.update({"precision": "fp16"})
+
+    if hpi_config.backend_config is not None:
+        suggested_backend_config.update(hpi_config.backend_config)
+
+    return suggested_backend, suggested_backend_config

+ 1874 - 0
paddlex/inference/utils/hpi_model_info_collection.json

@@ -0,0 +1,1874 @@
+{
+  "cpu_x64": {
+    "PP-DocLayout-L": [
+      "paddle"
+    ],
+    "PP-DocLayout-M": [
+      "paddle",
+      "openvino",
+      "onnxruntime"
+    ],
+    "PP-DocLayout-S": [
+      "openvino",
+      "paddle",
+      "onnxruntime"
+    ],
+    "RT-DETR-L_wired_table_cell_det": [
+      "paddle"
+    ],
+    "RT-DETR-L_wireless_table_cell_det": [
+      "paddle"
+    ],
+    "PP-LCNet_x1_0_table_cls": [
+      "openvino",
+      "paddle",
+      "onnxruntime"
+    ],
+    "STFPM": [
+      "onnxruntime",
+      "paddle"
+    ],
+    "PP-LCNet_x1_0_doc_ori": [
+      "openvino",
+      "paddle",
+      "onnxruntime"
+    ],
+    "BlazeFace": [
+      "paddle"
+    ],
+    "BlazeFace-FPN-SSH": [
+      "paddle"
+    ],
+    "PicoDet_LCNet_x2_5_face": [
+      "paddle",
+      "onnxruntime"
+    ],
+    "PP-YOLOE_plus-S_face": [
+      "paddle"
+    ],
+    "MobileFaceNet": [
+      "paddle",
+      "onnxruntime"
+    ],
+    "ResNet50_face": [
+      "onnxruntime",
+      "paddle",
+      "openvino"
+    ],
+    "LaTeX_OCR_rec": [
+      "paddle"
+    ],
+    "PP-ShiTuV2_rec": [
+      "onnxruntime",
+      "openvino",
+      "paddle"
+    ],
+    "PP-ShiTuV2_rec_CLIP_vit_base": [
+      "paddle",
+      "onnxruntime",
+      "openvino"
+    ],
+    "PP-ShiTuV2_rec_CLIP_vit_large": [
+      "paddle",
+      "onnxruntime",
+      "openvino"
+    ],
+    "PP-YOLOE-L_human": [
+      "paddle",
+      "openvino"
+    ],
+    "PP-YOLOE-S_human": [
+      "openvino",
+      "paddle"
+    ],
+    "CLIP_vit_base_patch16_224": [
+      "paddle",
+      "onnxruntime",
+      "openvino"
+    ],
+    "CLIP_vit_large_patch14_224": [
+      "paddle",
+      "onnxruntime",
+      "openvino"
+    ],
+    "ConvNeXt_base_224": [
+      "onnxruntime",
+      "paddle",
+      "openvino"
+    ],
+    "ConvNeXt_base_384": [
+      "onnxruntime",
+      "paddle",
+      "openvino"
+    ],
+    "ConvNeXt_large_224": [
+      "onnxruntime",
+      "paddle",
+      "openvino"
+    ],
+    "ConvNeXt_large_384": [
+      "onnxruntime",
+      "paddle",
+      "openvino"
+    ],
+    "ConvNeXt_small": [
+      "onnxruntime",
+      "openvino",
+      "paddle"
+    ],
+    "ConvNeXt_tiny": [
+      "onnxruntime",
+      "openvino",
+      "paddle"
+    ],
+    "MobileNetV1_x0_5": [
+      "onnxruntime",
+      "openvino",
+      "paddle"
+    ],
+    "MobileNetV1_x0_25": [
+      "openvino",
+      "onnxruntime",
+      "paddle"
+    ],
+    "MobileNetV1_x0_75": [
+      "onnxruntime",
+      "openvino",
+      "paddle"
+    ],
+    "MobileNetV1_x1_0": [
+      "onnxruntime",
+      "openvino",
+      "paddle"
+    ],
+    "MobileNetV2_x0_5": [
+      "openvino",
+      "paddle"
+    ],
+    "MobileNetV2_x0_25": [
+      "openvino",
+      "paddle"
+    ],
+    "MobileNetV2_x1_0": [
+      "openvino",
+      "paddle"
+    ],
+    "MobileNetV2_x1_5": [
+      "openvino",
+      "paddle"
+    ],
+    "MobileNetV2_x2_0": [
+      "openvino",
+      "paddle"
+    ],
+    "MobileNetV3_large_x0_5": [
+      "openvino",
+      "onnxruntime",
+      "paddle"
+    ],
+    "MobileNetV3_large_x0_35": [
+      "openvino",
+      "onnxruntime",
+      "paddle"
+    ],
+    "MobileNetV3_large_x0_75": [
+      "openvino",
+      "onnxruntime",
+      "paddle"
+    ],
+    "MobileNetV3_large_x1_0": [
+      "openvino",
+      "paddle",
+      "onnxruntime"
+    ],
+    "MobileNetV3_large_x1_25": [
+      "openvino",
+      "paddle",
+      "onnxruntime"
+    ],
+    "MobileNetV3_small_x0_5": [
+      "openvino",
+      "onnxruntime",
+      "paddle"
+    ],
+    "MobileNetV3_small_x0_35": [
+      "openvino",
+      "onnxruntime",
+      "paddle"
+    ],
+    "MobileNetV3_small_x0_75": [
+      "openvino",
+      "onnxruntime",
+      "paddle"
+    ],
+    "MobileNetV3_small_x1_0": [
+      "openvino",
+      "onnxruntime",
+      "paddle"
+    ],
+    "MobileNetV3_small_x1_25": [
+      "openvino",
+      "onnxruntime",
+      "paddle"
+    ],
+    "PP-HGNet_base": [
+      "paddle",
+      "openvino",
+      "onnxruntime"
+    ],
+    "PP-HGNet_small": [
+      "paddle",
+      "onnxruntime",
+      "openvino"
+    ],
+    "PP-HGNet_tiny": [
+      "onnxruntime",
+      "paddle",
+      "openvino"
+    ],
+    "PP-HGNetV2-B0": [
+      "openvino",
+      "onnxruntime",
+      "paddle"
+    ],
+    "PP-HGNetV2-B1": [
+      "openvino",
+      "onnxruntime",
+      "paddle"
+    ],
+    "PP-HGNetV2-B2": [
+      "openvino",
+      "onnxruntime",
+      "paddle"
+    ],
+    "PP-HGNetV2-B3": [
+      "openvino",
+      "onnxruntime",
+      "paddle"
+    ],
+    "PP-HGNetV2-B4": [
+      "onnxruntime",
+      "openvino",
+      "paddle"
+    ],
+    "PP-HGNetV2-B5": [
+      "paddle",
+      "openvino",
+      "onnxruntime"
+    ],
+    "PP-HGNetV2-B6": [
+      "paddle",
+      "openvino",
+      "onnxruntime"
+    ],
+    "PP-LCNet_x0_5": [
+      "openvino",
+      "paddle",
+      "onnxruntime"
+    ],
+    "PP-LCNet_x0_25": [
+      "openvino",
+      "paddle",
+      "onnxruntime"
+    ],
+    "PP-LCNet_x0_35": [
+      "openvino",
+      "paddle",
+      "onnxruntime"
+    ],
+    "PP-LCNet_x0_75": [
+      "openvino",
+      "paddle",
+      "onnxruntime"
+    ],
+    "PP-LCNet_x1_0": [
+      "openvino",
+      "paddle",
+      "onnxruntime"
+    ],
+    "PP-LCNet_x1_5": [
+      "openvino",
+      "paddle",
+      "onnxruntime"
+    ],
+    "PP-LCNet_x2_0": [
+      "openvino",
+      "paddle",
+      "onnxruntime"
+    ],
+    "PP-LCNet_x2_5": [
+      "openvino",
+      "paddle",
+      "onnxruntime"
+    ],
+    "PP-LCNetV2_base": [
+      "openvino",
+      "onnxruntime",
+      "paddle"
+    ],
+    "PP-LCNetV2_large": [
+      "openvino",
+      "paddle",
+      "onnxruntime"
+    ],
+    "PP-LCNetV2_small": [
+      "openvino",
+      "onnxruntime",
+      "paddle"
+    ],
+    "ResNet18_vd": [
+      "onnxruntime",
+      "paddle",
+      "openvino"
+    ],
+    "ResNet18": [
+      "onnxruntime",
+      "paddle",
+      "openvino"
+    ],
+    "ResNet34_vd": [
+      "onnxruntime",
+      "paddle",
+      "openvino"
+    ],
+    "ResNet34": [
+      "onnxruntime",
+      "paddle",
+      "openvino"
+    ],
+    "ResNet50_vd": [
+      "onnxruntime",
+      "paddle",
+      "openvino"
+    ],
+    "ResNet50": [
+      "onnxruntime",
+      "paddle",
+      "openvino"
+    ],
+    "ResNet101_vd": [
+      "paddle",
+      "openvino",
+      "onnxruntime"
+    ],
+    "ResNet101": [
+      "onnxruntime",
+      "paddle",
+      "openvino"
+    ],
+    "ResNet152_vd": [
+      "paddle",
+      "openvino",
+      "onnxruntime"
+    ],
+    "ResNet152": [
+      "onnxruntime",
+      "paddle",
+      "openvino"
+    ],
+    "ResNet200_vd": [
+      "paddle",
+      "openvino",
+      "onnxruntime"
+    ],
+    "SwinTransformer_base_patch4_window7_224": [
+      "paddle"
+    ],
+    "SwinTransformer_base_patch4_window12_384": [
+      "paddle"
+    ],
+    "SwinTransformer_large_patch4_window7_224": [
+      "paddle"
+    ],
+    "SwinTransformer_large_patch4_window12_384": [
+      "paddle"
+    ],
+    "SwinTransformer_small_patch4_window7_224": [
+      "paddle"
+    ],
+    "SwinTransformer_tiny_patch4_window7_224": [
+      "paddle"
+    ],
+    "FasterNet-L": [
+      "paddle",
+      "onnxruntime",
+      "openvino"
+    ],
+    "FasterNet-M": [
+      "paddle",
+      "onnxruntime",
+      "openvino"
+    ],
+    "FasterNet-S": [
+      "onnxruntime",
+      "paddle",
+      "openvino"
+    ],
+    "FasterNet-T0": [
+      "openvino",
+      "onnxruntime",
+      "paddle"
+    ],
+    "FasterNet-T1": [
+      "openvino",
+      "onnxruntime",
+      "paddle"
+    ],
+    "FasterNet-T2": [
+      "onnxruntime",
+      "openvino",
+      "paddle"
+    ],
+    "MobileNetV4_conv_large": [
+      "onnxruntime",
+      "paddle",
+      "openvino"
+    ],
+    "MobileNetV4_conv_medium": [
+      "onnxruntime",
+      "openvino",
+      "paddle"
+    ],
+    "MobileNetV4_conv_small": [
+      "onnxruntime",
+      "openvino",
+      "paddle"
+    ],
+    "MobileNetV4_hybrid_large": [
+      "paddle",
+      "openvino",
+      "onnxruntime"
+    ],
+    "MobileNetV4_hybrid_medium": [
+      "onnxruntime",
+      "openvino",
+      "paddle"
+    ],
+    "StarNet-S1": [
+      "openvino",
+      "paddle"
+    ],
+    "StarNet-S2": [
+      "openvino",
+      "paddle"
+    ],
+    "StarNet-S3": [
+      "openvino",
+      "paddle"
+    ],
+    "StarNet-S4": [
+      "openvino",
+      "paddle"
+    ],
+    "UVDoc": [
+      "onnxruntime",
+      "paddle"
+    ],
+    "Mask-RT-DETR-M": [],
+    "Mask-RT-DETR-S": [],
+    "Mask-RT-DETR-X": [
+      "paddle"
+    ],
+    "Cascade-MaskRCNN-ResNet50-FPN": [],
+    "Cascade-MaskRCNN-ResNet50-vd-SSLDv2-FPN": [],
+    "MaskRCNN-ResNet50-FPN": [],
+    "MaskRCNN-ResNet50-vd-FPN": [],
+    "MaskRCNN-ResNet50": [],
+    "MaskRCNN-ResNet101-FPN": [],
+    "MaskRCNN-ResNet101-vd-FPN": [],
+    "MaskRCNN-ResNeXt101-vd-FPN": [],
+    "PP-YOLOE_seg-S": [],
+    "Mask-RT-DETR-H": [
+      "paddle"
+    ],
+    "Mask-RT-DETR-L": [
+      "paddle"
+    ],
+    "SOLOv2": [],
+    "PP-ShiTuV2_det": [
+      "paddle",
+      "openvino",
+      "onnxruntime"
+    ],
+    "CLIP_vit_base_patch16_448_ML": [
+      "paddle",
+      "onnxruntime",
+      "openvino"
+    ],
+    "PP-HGNetV2-B0_ML": [
+      "openvino",
+      "paddle",
+      "onnxruntime"
+    ],
+    "PP-HGNetV2-B4_ML": [
+      "onnxruntime",
+      "paddle",
+      "openvino"
+    ],
+    "PP-HGNetV2-B6_ML": [
+      "paddle",
+      "openvino",
+      "onnxruntime"
+    ],
+    "PP-LCNet_x1_0_ML": [
+      "openvino",
+      "paddle",
+      "onnxruntime"
+    ],
+    "ResNet50_ML": [
+      "onnxruntime",
+      "paddle",
+      "openvino"
+    ],
+    "PicoDet-L": [
+      "paddle",
+      "openvino",
+      "onnxruntime"
+    ],
+    "PicoDet-S": [
+      "openvino",
+      "paddle",
+      "onnxruntime"
+    ],
+    "PP-YOLOE_plus-L": [
+      "paddle",
+      "openvino"
+    ],
+    "PP-YOLOE_plus-M": [
+      "paddle",
+      "openvino"
+    ],
+    "PP-YOLOE_plus-S": [
+      "openvino",
+      "paddle"
+    ],
+    "PP-YOLOE_plus-X": [
+      "paddle",
+      "openvino"
+    ],
+    "RT-DETR-H": [
+      "paddle"
+    ],
+    "RT-DETR-L": [
+      "paddle"
+    ],
+    "RT-DETR-R18": [
+      "paddle"
+    ],
+    "RT-DETR-R50": [
+      "paddle"
+    ],
+    "RT-DETR-X": [
+      "paddle"
+    ],
+    "YOLOv3-DarkNet53": [
+      "paddle",
+      "onnxruntime",
+      "openvino"
+    ],
+    "YOLOv3-MobileNetV3": [
+      "paddle",
+      "openvino",
+      "onnxruntime"
+    ],
+    "YOLOv3-ResNet50_vd_DCN": [
+      "openvino",
+      "paddle"
+    ],
+    "YOLOX-L": [
+      "onnxruntime",
+      "paddle"
+    ],
+    "YOLOX-M": [
+      "onnxruntime",
+      "paddle"
+    ],
+    "YOLOX-N": [
+      "onnxruntime",
+      "paddle"
+    ],
+    "YOLOX-S": [
+      "onnxruntime",
+      "paddle"
+    ],
+    "YOLOX-T": [
+      "onnxruntime",
+      "paddle"
+    ],
+    "YOLOX-X": [
+      "onnxruntime",
+      "paddle"
+    ],
+    "Cascade-FasterRCNN-ResNet50-FPN": [],
+    "Cascade-FasterRCNN-ResNet50-vd-SSLDv2-FPN": [],
+    "CenterNet-DLA-34": [],
+    "CenterNet-ResNet50": [],
+    "DETR-R50": [
+      "paddle"
+    ],
+    "FasterRCNN-ResNet34-FPN": [],
+    "FasterRCNN-ResNet50-FPN": [],
+    "FasterRCNN-ResNet50-vd-FPN": [],
+    "FasterRCNN-ResNet50-vd-SSLDv2-FPN": [],
+    "FasterRCNN-ResNet50": [],
+    "FasterRCNN-ResNet101-FPN": [],
+    "FasterRCNN-ResNet101": [],
+    "FasterRCNN-ResNeXt101-vd-FPN": [],
+    "FasterRCNN-Swin-Tiny-FPN": [],
+    "FCOS-ResNet50": [
+      "paddle",
+      "openvino",
+      "onnxruntime"
+    ],
+    "PicoDet-M": [
+      "openvino",
+      "paddle",
+      "onnxruntime"
+    ],
+    "PicoDet-XS": [
+      "openvino",
+      "paddle",
+      "onnxruntime"
+    ],
+    "PP-LCNet_x1_0_pedestrian_attribute": [
+      "openvino",
+      "paddle",
+      "onnxruntime"
+    ],
+    "Deeplabv3_Plus-R50": [
+      "paddle",
+      "onnxruntime"
+    ],
+    "Deeplabv3_Plus-R101": [
+      "paddle",
+      "onnxruntime"
+    ],
+    "Deeplabv3-R50": [
+      "paddle",
+      "onnxruntime"
+    ],
+    "Deeplabv3-R101": [
+      "paddle",
+      "onnxruntime"
+    ],
+    "OCRNet_HRNet-W18": [
+      "paddle",
+      "onnxruntime"
+    ],
+    "OCRNet_HRNet-W48": [
+      "paddle",
+      "onnxruntime"
+    ],
+    "PP-LiteSeg-T": [
+      "onnxruntime",
+      "paddle"
+    ],
+    "SegFormer-B0": [
+      "onnxruntime",
+      "paddle"
+    ],
+    "SegFormer-B1": [
+      "onnxruntime",
+      "paddle"
+    ],
+    "SegFormer-B2": [
+      "onnxruntime",
+      "paddle"
+    ],
+    "SegFormer-B3": [
+      "onnxruntime",
+      "paddle"
+    ],
+    "SegFormer-B4": [
+      "onnxruntime",
+      "paddle"
+    ],
+    "SegFormer-B5": [
+      "onnxruntime",
+      "paddle"
+    ],
+    "SeaFormer_base": [
+      "paddle"
+    ],
+    "SeaFormer_large": [
+      "paddle"
+    ],
+    "SeaFormer_small": [
+      "paddle"
+    ],
+    "SeaFormer_tiny": [
+      "paddle"
+    ],
+    "PP-LiteSeg-B": [
+      "onnxruntime",
+      "paddle"
+    ],
+    "MaskFormer_small": [],
+    "MaskFormer_tiny": [],
+    "PP-YOLOE_plus_SOD-S": [
+      "openvino",
+      "paddle"
+    ],
+    "PP-YOLOE_plus_SOD-L": [
+      "paddle",
+      "openvino"
+    ],
+    "PP-YOLOE_plus_SOD-largesize-L": [
+      "paddle",
+      "openvino"
+    ],
+    "PicoDet-S_layout_3cls": [
+      "openvino",
+      "paddle",
+      "onnxruntime"
+    ],
+    "PicoDet-S_layout_17cls": [
+      "openvino",
+      "paddle",
+      "onnxruntime"
+    ],
+    "PicoDet-L_layout_3cls": [
+      "paddle",
+      "openvino",
+      "onnxruntime"
+    ],
+    "PicoDet-L_layout_17cls": [
+      "paddle",
+      "openvino",
+      "onnxruntime"
+    ],
+    "RT-DETR-H_layout_3cls": [
+      "paddle"
+    ],
+    "RT-DETR-H_layout_17cls": [
+      "paddle"
+    ],
+    "PicoDet_layout_1x": [
+      "openvino",
+      "paddle",
+      "onnxruntime"
+    ],
+    "PicoDet_layout_1x_table": [
+      "openvino",
+      "paddle",
+      "onnxruntime"
+    ],
+    "SLANet": [
+      "paddle"
+    ],
+    "SLANet_plus": [
+      "paddle"
+    ],
+    "PP-OCRv4_mobile_det": [
+      "openvino",
+      "paddle",
+      "onnxruntime"
+    ],
+    "PP-OCRv4_server_det": [
+      "paddle",
+      "openvino",
+      "onnxruntime"
+    ],
+    "PP-OCRv3_mobile_det": [
+      "paddle",
+      "openvino",
+      "onnxruntime"
+    ],
+    "PP-OCRv3_server_det": [
+      "paddle",
+      "openvino",
+      "onnxruntime"
+    ],
+    "PP-OCRv4_mobile_seal_det": [
+      "openvino",
+      "paddle",
+      "onnxruntime"
+    ],
+    "PP-OCRv4_server_seal_det": [
+      "paddle",
+      "openvino",
+      "onnxruntime"
+    ],
+    "PP-OCRv4_mobile_rec": [
+      "openvino",
+      "paddle"
+    ],
+    "PP-OCRv4_server_rec": [
+      "paddle",
+      "openvino"
+    ],
+    "ch_SVTRv2_rec": [
+      "openvino",
+      "paddle"
+    ],
+    "ch_RepSVTR_rec": [
+      "openvino",
+      "paddle"
+    ],
+    "PP-OCRv4_server_rec_doc": [
+      "paddle",
+      "openvino"
+    ],
+    "ta_PP-OCRv3_mobile_rec": [
+      "openvino",
+      "paddle"
+    ],
+    "latin_PP-OCRv3_mobile_rec": [
+      "openvino",
+      "paddle"
+    ],
+    "chinese_cht_PP-OCRv3_mobile_rec": [
+      "openvino",
+      "paddle"
+    ],
+    "ka_PP-OCRv3_mobile_rec": [
+      "openvino",
+      "paddle"
+    ],
+    "PP-OCRv3_mobile_rec": [
+      "openvino",
+      "paddle"
+    ],
+    "korean_PP-OCRv3_mobile_rec": [
+      "openvino",
+      "paddle"
+    ],
+    "en_PP-OCRv3_mobile_rec": [
+      "openvino",
+      "paddle"
+    ],
+    "devanagari_PP-OCRv3_mobile_rec": [
+      "openvino",
+      "paddle"
+    ],
+    "te_PP-OCRv3_mobile_rec": [
+      "openvino",
+      "paddle"
+    ],
+    "en_PP-OCRv4_mobile_rec": [
+      "openvino",
+      "paddle"
+    ],
+    "arabic_PP-OCRv3_mobile_rec": [
+      "openvino",
+      "paddle"
+    ],
+    "japan_PP-OCRv3_mobile_rec": [
+      "openvino",
+      "paddle"
+    ],
+    "cyrillic_PP-OCRv3_mobile_rec": [
+      "openvino",
+      "paddle"
+    ],
+    "AutoEncoder_ad": [
+      "onnxruntime",
+      "openvino",
+      "paddle"
+    ],
+    "DLinear_ad": [
+      "onnxruntime",
+      "openvino",
+      "paddle"
+    ],
+    "Nonstationary_ad": [
+      "paddle"
+    ],
+    "PatchTST_ad": [
+      "onnxruntime",
+      "paddle"
+    ],
+    "TimesNet_ad": [],
+    "TimesNet_cls": [
+      "paddle"
+    ],
+    "DLinear": [
+      "onnxruntime",
+      "openvino",
+      "paddle"
+    ],
+    "NLinear": [
+      "onnxruntime",
+      "paddle"
+    ],
+    "Nonstationary": [
+      "paddle"
+    ],
+    "PatchTST": [
+      "onnxruntime",
+      "paddle"
+    ],
+    "RLinear": [
+      "onnxruntime",
+      "paddle"
+    ],
+    "TiDE": [
+      "paddle"
+    ],
+    "TimesNet": [
+      "paddle"
+    ],
+    "PP-LCNet_x1_0_vehicle_attribute": [
+      "openvino",
+      "paddle",
+      "onnxruntime"
+    ],
+    "PP-YOLOE-S_vehicle": [
+      "openvino",
+      "paddle"
+    ],
+    "PP-YOLOE-L_vehicle": [
+      "paddle",
+      "openvino"
+    ]
+  },
+  "gpu_cuda118_cudnn86": {
+    "PP-DocLayout-L": [
+      "paddle_tensorrt_fp16"
+    ],
+    "PP-DocLayout-M": [
+      "paddle_tensorrt_fp16",
+      "tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "PP-DocLayout-S": [
+      "tensorrt_fp16",
+      "paddle_tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "RT-DETR-L_wired_table_cell_det": [
+      "paddle_tensorrt_fp16"
+    ],
+    "RT-DETR-L_wireless_table_cell_det": [
+      "paddle_tensorrt_fp16"
+    ],
+    "PP-LCNet_x1_0_table_cls": [
+      "tensorrt_fp16",
+      "paddle_tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "STFPM": [
+      "tensorrt_fp16",
+      "paddle_tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "PP-LCNet_x1_0_doc_ori": [
+      "tensorrt_fp16",
+      "paddle_tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "BlazeFace": [
+      "paddle_tensorrt_fp16"
+    ],
+    "BlazeFace-FPN-SSH": [
+      "paddle_tensorrt_fp16"
+    ],
+    "PicoDet_LCNet_x2_5_face": [
+      "paddle_tensorrt_fp16",
+      "tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "PP-YOLOE_plus-S_face": [
+      "paddle_tensorrt_fp16"
+    ],
+    "MobileFaceNet": [
+      "tensorrt_fp16",
+      "paddle_tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "ResNet50_face": [
+      "tensorrt_fp16",
+      "paddle_tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "LaTeX_OCR_rec": [
+      "paddle"
+    ],
+    "PP-ShiTuV2_rec": [
+      "paddle_tensorrt_fp16",
+      "tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "PP-ShiTuV2_rec_CLIP_vit_base": [
+      "paddle_tensorrt_fp16",
+      "tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "PP-ShiTuV2_rec_CLIP_vit_large": [
+      "paddle_tensorrt_fp16",
+      "tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "PP-YOLOE-L_human": [
+      "paddle_tensorrt_fp16"
+    ],
+    "PP-YOLOE-S_human": [
+      "paddle_tensorrt_fp16"
+    ],
+    "CLIP_vit_base_patch16_224": [
+      "paddle_tensorrt_fp16",
+      "tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "CLIP_vit_large_patch14_224": [
+      "paddle_tensorrt_fp16",
+      "tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "ConvNeXt_base_224": [
+      "paddle_tensorrt",
+      "tensorrt",
+      "onnxruntime"
+    ],
+    "ConvNeXt_base_384": [
+      "paddle_tensorrt",
+      "tensorrt",
+      "onnxruntime"
+    ],
+    "ConvNeXt_large_224": [
+      "paddle_tensorrt_fp16",
+      "tensorrt",
+      "onnxruntime"
+    ],
+    "ConvNeXt_large_384": [
+      "paddle_tensorrt",
+      "tensorrt",
+      "onnxruntime"
+    ],
+    "ConvNeXt_small": [
+      "paddle_tensorrt",
+      "tensorrt",
+      "onnxruntime"
+    ],
+    "ConvNeXt_tiny": [
+      "paddle_tensorrt_fp16",
+      "tensorrt",
+      "onnxruntime"
+    ],
+    "MobileNetV1_x0_5": [
+      "paddle_tensorrt_fp16",
+      "tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "MobileNetV1_x0_25": [
+      "tensorrt_fp16",
+      "paddle_tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "MobileNetV1_x0_75": [
+      "paddle_tensorrt_fp16",
+      "tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "MobileNetV1_x1_0": [
+      "tensorrt_fp16",
+      "paddle_tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "MobileNetV2_x0_5": [
+      "paddle_tensorrt_fp16"
+    ],
+    "MobileNetV2_x0_25": [
+      "paddle_tensorrt_fp16"
+    ],
+    "MobileNetV2_x1_0": [
+      "paddle_tensorrt_fp16"
+    ],
+    "MobileNetV2_x1_5": [
+      "paddle_tensorrt_fp16"
+    ],
+    "MobileNetV2_x2_0": [
+      "paddle_tensorrt_fp16"
+    ],
+    "MobileNetV3_large_x0_5": [
+      "tensorrt_fp16",
+      "paddle_tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "MobileNetV3_large_x0_35": [
+      "tensorrt_fp16",
+      "paddle_tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "MobileNetV3_large_x0_75": [
+      "paddle_tensorrt_fp16",
+      "tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "MobileNetV3_large_x1_0": [
+      "paddle_tensorrt_fp16",
+      "tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "MobileNetV3_large_x1_25": [
+      "paddle_tensorrt_fp16",
+      "tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "MobileNetV3_small_x0_5": [
+      "tensorrt_fp16",
+      "paddle_tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "MobileNetV3_small_x0_35": [
+      "tensorrt_fp16",
+      "paddle_tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "MobileNetV3_small_x0_75": [
+      "tensorrt_fp16",
+      "paddle_tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "MobileNetV3_small_x1_0": [
+      "paddle_tensorrt_fp16",
+      "tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "MobileNetV3_small_x1_25": [
+      "tensorrt_fp16",
+      "paddle_tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "PP-HGNet_base": [
+      "paddle_tensorrt_fp16",
+      "tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "PP-HGNet_small": [
+      "tensorrt_fp16",
+      "paddle_tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "PP-HGNet_tiny": [
+      "paddle_tensorrt_fp16",
+      "tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "PP-HGNetV2-B0": [
+      "tensorrt_fp16",
+      "paddle_tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "PP-HGNetV2-B1": [
+      "tensorrt_fp16",
+      "paddle_tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "PP-HGNetV2-B2": [
+      "paddle_tensorrt_fp16",
+      "tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "PP-HGNetV2-B3": [
+      "tensorrt_fp16",
+      "paddle_tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "PP-HGNetV2-B4": [
+      "paddle_tensorrt_fp16",
+      "tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "PP-HGNetV2-B5": [
+      "tensorrt_fp16",
+      "paddle_tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "PP-HGNetV2-B6": [
+      "paddle_tensorrt_fp16",
+      "tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "PP-LCNet_x0_5": [
+      "tensorrt_fp16",
+      "paddle_tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "PP-LCNet_x0_25": [
+      "tensorrt_fp16",
+      "paddle_tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "PP-LCNet_x0_35": [
+      "tensorrt_fp16",
+      "paddle_tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "PP-LCNet_x0_75": [
+      "paddle_tensorrt_fp16",
+      "tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "PP-LCNet_x1_0": [
+      "tensorrt_fp16",
+      "paddle_tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "PP-LCNet_x1_5": [
+      "paddle_tensorrt_fp16",
+      "tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "PP-LCNet_x2_0": [
+      "paddle_tensorrt_fp16",
+      "tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "PP-LCNet_x2_5": [
+      "paddle_tensorrt_fp16",
+      "tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "PP-LCNetV2_base": [
+      "tensorrt_fp16",
+      "paddle_tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "PP-LCNetV2_large": [
+      "paddle_tensorrt_fp16",
+      "tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "PP-LCNetV2_small": [
+      "tensorrt_fp16",
+      "paddle_tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "ResNet18_vd": [
+      "tensorrt_fp16",
+      "paddle_tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "ResNet18": [
+      "tensorrt_fp16",
+      "paddle_tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "ResNet34_vd": [
+      "paddle_tensorrt_fp16",
+      "tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "ResNet34": [
+      "tensorrt_fp16",
+      "paddle_tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "ResNet50_vd": [
+      "paddle_tensorrt_fp16",
+      "tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "ResNet50": [
+      "tensorrt_fp16",
+      "paddle_tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "ResNet101_vd": [
+      "tensorrt_fp16",
+      "paddle_tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "ResNet101": [
+      "tensorrt_fp16",
+      "paddle_tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "ResNet152_vd": [
+      "paddle_tensorrt_fp16",
+      "tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "ResNet152": [
+      "tensorrt_fp16",
+      "paddle_tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "ResNet200_vd": [
+      "tensorrt_fp16",
+      "paddle_tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "SwinTransformer_base_patch4_window7_224": [
+      "paddle_tensorrt_fp16"
+    ],
+    "SwinTransformer_base_patch4_window12_384": [
+      "paddle_tensorrt_fp16"
+    ],
+    "SwinTransformer_large_patch4_window7_224": [
+      "paddle_tensorrt_fp16"
+    ],
+    "SwinTransformer_large_patch4_window12_384": [
+      "paddle_tensorrt_fp16"
+    ],
+    "SwinTransformer_small_patch4_window7_224": [
+      "paddle_tensorrt_fp16"
+    ],
+    "SwinTransformer_tiny_patch4_window7_224": [
+      "paddle_tensorrt_fp16"
+    ],
+    "FasterNet-L": [
+      "paddle_tensorrt_fp16",
+      "tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "FasterNet-M": [
+      "tensorrt_fp16",
+      "paddle_tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "FasterNet-S": [
+      "paddle_tensorrt_fp16",
+      "tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "FasterNet-T0": [
+      "tensorrt_fp16",
+      "paddle_tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "FasterNet-T1": [
+      "paddle_tensorrt_fp16",
+      "tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "FasterNet-T2": [
+      "tensorrt_fp16",
+      "paddle_tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "MobileNetV4_conv_large": [
+      "paddle_tensorrt_fp16",
+      "tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "MobileNetV4_conv_medium": [
+      "tensorrt_fp16",
+      "paddle_tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "MobileNetV4_conv_small": [
+      "paddle_tensorrt_fp16",
+      "tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "MobileNetV4_hybrid_large": [
+      "paddle_tensorrt_fp16",
+      "tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "MobileNetV4_hybrid_medium": [
+      "tensorrt_fp16",
+      "paddle_tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "StarNet-S1": [
+      "paddle_tensorrt_fp16"
+    ],
+    "StarNet-S2": [
+      "paddle_tensorrt_fp16"
+    ],
+    "StarNet-S3": [
+      "paddle_tensorrt_fp16"
+    ],
+    "StarNet-S4": [
+      "paddle_tensorrt_fp16"
+    ],
+    "UVDoc": [
+      "tensorrt_fp16",
+      "paddle",
+      "onnxruntime"
+    ],
+    "Mask-RT-DETR-M": [
+      "paddle"
+    ],
+    "Mask-RT-DETR-S": [],
+    "Mask-RT-DETR-X": [
+      "paddle"
+    ],
+    "Cascade-MaskRCNN-ResNet50-FPN": [
+      "paddle"
+    ],
+    "Cascade-MaskRCNN-ResNet50-vd-SSLDv2-FPN": [
+      "paddle"
+    ],
+    "MaskRCNN-ResNet50-FPN": [
+      "paddle"
+    ],
+    "MaskRCNN-ResNet50-vd-FPN": [
+      "paddle"
+    ],
+    "MaskRCNN-ResNet50": [
+      "paddle"
+    ],
+    "MaskRCNN-ResNet101-FPN": [
+      "paddle"
+    ],
+    "MaskRCNN-ResNet101-vd-FPN": [
+      "paddle"
+    ],
+    "MaskRCNN-ResNeXt101-vd-FPN": [
+      "paddle"
+    ],
+    "PP-YOLOE_seg-S": [],
+    "Mask-RT-DETR-H": [
+      "paddle"
+    ],
+    "Mask-RT-DETR-L": [
+      "paddle"
+    ],
+    "SOLOv2": [],
+    "PP-ShiTuV2_det": [
+      "paddle_tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "CLIP_vit_base_patch16_448_ML": [
+      "paddle_tensorrt_fp16",
+      "tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "PP-HGNetV2-B0_ML": [
+      "paddle_tensorrt_fp16",
+      "tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "PP-HGNetV2-B4_ML": [
+      "paddle_tensorrt_fp16",
+      "tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "PP-HGNetV2-B6_ML": [
+      "paddle_tensorrt_fp16",
+      "tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "PP-LCNet_x1_0_ML": [
+      "paddle_tensorrt_fp16",
+      "tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "ResNet50_ML": [
+      "paddle_tensorrt_fp16",
+      "tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "PicoDet-L": [
+      "paddle_tensorrt_fp16",
+      "tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "PicoDet-S": [
+      "tensorrt_fp16",
+      "paddle_tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "PP-YOLOE_plus-L": [
+      "paddle_tensorrt_fp16"
+    ],
+    "PP-YOLOE_plus-M": [
+      "paddle_tensorrt_fp16"
+    ],
+    "PP-YOLOE_plus-S": [
+      "paddle_tensorrt_fp16"
+    ],
+    "PP-YOLOE_plus-X": [
+      "paddle_tensorrt_fp16"
+    ],
+    "RT-DETR-H": [
+      "paddle_tensorrt_fp16"
+    ],
+    "RT-DETR-L": [
+      "paddle_tensorrt_fp16"
+    ],
+    "RT-DETR-R18": [
+      "paddle_tensorrt_fp16"
+    ],
+    "RT-DETR-R50": [
+      "paddle_tensorrt_fp16"
+    ],
+    "RT-DETR-X": [
+      "paddle_tensorrt_fp16"
+    ],
+    "YOLOv3-DarkNet53": [
+      "tensorrt_fp16",
+      "paddle_tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "YOLOv3-MobileNetV3": [
+      "tensorrt_fp16",
+      "paddle_tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "YOLOv3-ResNet50_vd_DCN": [
+      "paddle_tensorrt_fp16"
+    ],
+    "YOLOX-L": [
+      "tensorrt_fp16",
+      "paddle_tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "YOLOX-M": [
+      "tensorrt_fp16",
+      "paddle_tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "YOLOX-N": [
+      "paddle_tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "YOLOX-S": [
+      "tensorrt",
+      "paddle_tensorrt",
+      "onnxruntime"
+    ],
+    "YOLOX-T": [
+      "paddle_tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "YOLOX-X": [
+      "tensorrt_fp16",
+      "paddle_tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "Cascade-FasterRCNN-ResNet50-FPN": [
+      "paddle"
+    ],
+    "Cascade-FasterRCNN-ResNet50-vd-SSLDv2-FPN": [
+      "paddle"
+    ],
+    "CenterNet-DLA-34": [],
+    "CenterNet-ResNet50": [],
+    "DETR-R50": [
+      "paddle_tensorrt_fp16"
+    ],
+    "FasterRCNN-ResNet34-FPN": [
+      "paddle_tensorrt_fp16"
+    ],
+    "FasterRCNN-ResNet50-FPN": [
+      "paddle_tensorrt_fp16"
+    ],
+    "FasterRCNN-ResNet50-vd-FPN": [
+      "paddle_tensorrt_fp16"
+    ],
+    "FasterRCNN-ResNet50-vd-SSLDv2-FPN": [
+      "paddle_tensorrt_fp16"
+    ],
+    "FasterRCNN-ResNet50": [
+      "paddle_tensorrt_fp16"
+    ],
+    "FasterRCNN-ResNet101-FPN": [
+      "paddle_tensorrt_fp16"
+    ],
+    "FasterRCNN-ResNet101": [
+      "paddle_tensorrt_fp16"
+    ],
+    "FasterRCNN-ResNeXt101-vd-FPN": [
+      "paddle_tensorrt_fp16"
+    ],
+    "FasterRCNN-Swin-Tiny-FPN": [],
+    "FCOS-ResNet50": [
+      "paddle_tensorrt_fp16",
+      "tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "PicoDet-M": [
+      "tensorrt_fp16",
+      "paddle_tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "PicoDet-XS": [
+      "tensorrt_fp16",
+      "paddle_tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "PP-LCNet_x1_0_pedestrian_attribute": [
+      "tensorrt_fp16",
+      "paddle_tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "Deeplabv3_Plus-R50": [
+      "tensorrt_fp16",
+      "paddle_tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "Deeplabv3_Plus-R101": [
+      "tensorrt_fp16",
+      "paddle_tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "Deeplabv3-R50": [
+      "tensorrt_fp16",
+      "paddle_tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "Deeplabv3-R101": [
+      "tensorrt_fp16",
+      "paddle_tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "OCRNet_HRNet-W18": [
+      "tensorrt_fp16",
+      "paddle_tensorrt_fp16"
+    ],
+    "OCRNet_HRNet-W48": [
+      "tensorrt_fp16",
+      "paddle_tensorrt_fp16"
+    ],
+    "PP-LiteSeg-T": [
+      "tensorrt_fp16",
+      "paddle_tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "SegFormer-B0": [
+      "tensorrt_fp16",
+      "paddle_tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "SegFormer-B1": [
+      "tensorrt_fp16",
+      "paddle_tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "SegFormer-B2": [
+      "tensorrt_fp16",
+      "paddle_tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "SegFormer-B3": [
+      "paddle_tensorrt_fp16",
+      "tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "SegFormer-B4": [
+      "paddle_tensorrt_fp16",
+      "onnxruntime",
+      "tensorrt"
+    ],
+    "SegFormer-B5": [
+      "paddle_tensorrt_fp16",
+      "tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "SeaFormer_base": [
+      "paddle"
+    ],
+    "SeaFormer_large": [
+      "paddle"
+    ],
+    "SeaFormer_small": [
+      "paddle"
+    ],
+    "SeaFormer_tiny": [
+      "paddle"
+    ],
+    "PP-LiteSeg-B": [
+      "tensorrt_fp16",
+      "paddle_tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "MaskFormer_small": [
+      "paddle"
+    ],
+    "MaskFormer_tiny": [
+      "paddle"
+    ],
+    "PP-YOLOE_plus_SOD-S": [
+      "paddle_tensorrt_fp16"
+    ],
+    "PP-YOLOE_plus_SOD-L": [
+      "paddle_tensorrt_fp16"
+    ],
+    "PP-YOLOE_plus_SOD-largesize-L": [
+      "paddle_tensorrt_fp16"
+    ],
+    "PicoDet-S_layout_3cls": [
+      "tensorrt_fp16",
+      "paddle_tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "PicoDet-S_layout_17cls": [
+      "tensorrt_fp16",
+      "paddle_tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "PicoDet-L_layout_3cls": [
+      "paddle_tensorrt_fp16",
+      "tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "PicoDet-L_layout_17cls": [
+      "paddle_tensorrt_fp16",
+      "tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "RT-DETR-H_layout_3cls": [
+      "paddle_tensorrt_fp16"
+    ],
+    "RT-DETR-H_layout_17cls": [
+      "paddle_tensorrt"
+    ],
+    "PicoDet_layout_1x": [
+      "paddle_tensorrt_fp16",
+      "tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "PicoDet_layout_1x_table": [
+      "paddle_tensorrt_fp16",
+      "tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "SLANet": [
+      "paddle_tensorrt_fp16"
+    ],
+    "SLANet_plus": [
+      "paddle_tensorrt_fp16"
+    ],
+    "PP-OCRv4_mobile_det": [
+      "paddle_tensorrt_fp16",
+      "tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "PP-OCRv4_server_det": [
+      "tensorrt",
+      "paddle_tensorrt",
+      "onnxruntime"
+    ],
+    "PP-OCRv3_mobile_det": [
+      "tensorrt_fp16",
+      "paddle_tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "PP-OCRv3_server_det": [
+      "tensorrt_fp16",
+      "paddle_tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "PP-OCRv4_mobile_seal_det": [
+      "paddle_tensorrt_fp16",
+      "tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "PP-OCRv4_server_seal_det": [
+      "tensorrt",
+      "paddle_tensorrt",
+      "onnxruntime"
+    ],
+    "PP-OCRv4_mobile_rec": [
+      "paddle_tensorrt_fp16"
+    ],
+    "PP-OCRv4_server_rec": [
+      "paddle_tensorrt_fp16"
+    ],
+    "ch_SVTRv2_rec": [
+      "paddle_tensorrt_fp16"
+    ],
+    "ch_RepSVTR_rec": [
+      "paddle_tensorrt_fp16"
+    ],
+    "PP-OCRv4_server_rec_doc": [
+      "paddle_tensorrt_fp16"
+    ],
+    "ta_PP-OCRv3_mobile_rec": [
+      "paddle_tensorrt_fp16"
+    ],
+    "latin_PP-OCRv3_mobile_rec": [
+      "paddle_tensorrt_fp16"
+    ],
+    "chinese_cht_PP-OCRv3_mobile_rec": [
+      "paddle_tensorrt_fp16"
+    ],
+    "ka_PP-OCRv3_mobile_rec": [
+      "paddle_tensorrt_fp16"
+    ],
+    "PP-OCRv3_mobile_rec": [
+      "paddle_tensorrt_fp16"
+    ],
+    "korean_PP-OCRv3_mobile_rec": [
+      "paddle_tensorrt_fp16"
+    ],
+    "en_PP-OCRv3_mobile_rec": [
+      "paddle_tensorrt_fp16"
+    ],
+    "devanagari_PP-OCRv3_mobile_rec": [
+      "paddle_tensorrt_fp16"
+    ],
+    "te_PP-OCRv3_mobile_rec": [
+      "paddle_tensorrt_fp16"
+    ],
+    "en_PP-OCRv4_mobile_rec": [
+      "paddle_tensorrt_fp16"
+    ],
+    "arabic_PP-OCRv3_mobile_rec": [
+      "paddle_tensorrt_fp16"
+    ],
+    "japan_PP-OCRv3_mobile_rec": [
+      "paddle_tensorrt_fp16"
+    ],
+    "cyrillic_PP-OCRv3_mobile_rec": [
+      "paddle_tensorrt_fp16"
+    ],
+    "AutoEncoder_ad": [
+      "tensorrt_fp16",
+      "onnxruntime",
+      "paddle_tensorrt_fp16"
+    ],
+    "DLinear_ad": [
+      "tensorrt_fp16",
+      "paddle_tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "Nonstationary_ad": [
+      "paddle_tensorrt"
+    ],
+    "PatchTST_ad": [
+      "tensorrt_fp16",
+      "paddle_tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "TimesNet_ad": [],
+    "TimesNet_cls": [
+      "paddle"
+    ],
+    "DLinear": [
+      "tensorrt_fp16",
+      "paddle_tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "NLinear": [
+      "tensorrt_fp16",
+      "paddle_tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "Nonstationary": [
+      "paddle"
+    ],
+    "PatchTST": [
+      "tensorrt_fp16",
+      "paddle_tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "RLinear": [
+      "tensorrt_fp16",
+      "paddle_tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "TiDE": [
+      "paddle"
+    ],
+    "TimesNet": [
+      "paddle"
+    ],
+    "PP-LCNet_x1_0_vehicle_attribute": [
+      "tensorrt_fp16",
+      "paddle_tensorrt_fp16",
+      "onnxruntime"
+    ],
+    "PP-YOLOE-S_vehicle": [
+      "paddle_tensorrt_fp16"
+    ],
+    "PP-YOLOE-L_vehicle": [
+      "paddle_tensorrt_fp16"
+    ]
+  }
+}

+ 8 - 4
paddlex/inference/utils/pp_option.py

@@ -44,14 +44,18 @@ class PaddlePredictorOption(object):
     )
     SUPPORT_DEVICE = ("gpu", "cpu", "npu", "xpu", "mlu", "dcu", "gcu")
 
-    def __init__(self, model_name=None, **kwargs):
+    def __init__(self, model_name, **kwargs):
         super().__init__()
-        self.model_name = model_name
+        self._model_name = model_name
         self._cfg = {}
         self._init_option(**kwargs)
         self._changed = False
 
     @property
+    def model_name(self):
+        return self._model_name
+
+    @property
     def changed(self):
         return self._changed
 
@@ -121,9 +125,9 @@ class PaddlePredictorOption(object):
                 f"`run_mode` must be {support_run_mode_str}, but received {repr(run_mode)}."
             )
         # TRT Blocklist
-        if run_mode.startswith("trt") and self.model_name in TRT_BLOCKLIST:
+        if run_mode.startswith("trt") and self._model_name in TRT_BLOCKLIST:
             logging.warning(
-                f"The model({self.model_name}) is not supported to run in trt mode! Using `paddle` instead!"
+                f"The model({self._model_name}) is not supported to run in trt mode! Using `paddle` instead!"
             )
             run_mode = "paddle"
 

+ 19 - 4
paddlex/model.py

@@ -83,15 +83,30 @@ class _ModelBasedConfig(_BaseModel):
 
         model_dir = predict_kwargs.pop("model_dir", None)
 
-        device = self._config.Global.get("device")
-        kernel_option = predict_kwargs.pop("kernel_option", {})
-        pp_option = PaddlePredictorOption(self._model_name, **kernel_option)
+        UNSET = object()
+        device = self._config.Global.get("device", None)
+        kernel_option = predict_kwargs.pop("kernel_option", UNSET)
+        use_hpip = predict_kwargs.pop("use_hpip", UNSET)
+        hpi_config = predict_kwargs.pop("hpi_config", UNSET)
+
+        create_predictor_kwargs = {}
+        if kernel_option is not UNSET:
+            kernel_option.setdefault("model_name", self._model_name)
+            create_predictor_kwargs["pp_option"] = PaddlePredictorOption(
+                **kernel_option
+            )
+        if use_hpip is not UNSET:
+            create_predictor_kwargs["use_hpip"] = use_hpip
+        else:
+            create_predictor_kwargs["use_hpip"] = False
+        if hpi_config is not UNSET:
+            create_predictor_kwargs["hpi_config"] = hpi_config
 
         predictor = create_predictor(
             self._model_name,
             model_dir,
             device=device,
-            pp_option=pp_option,
+            **create_predictor_kwargs,
         )
         assert "input" in predict_kwargs
         return predict_kwargs, predictor

+ 23 - 13
paddlex/paddlex_cli.py

@@ -21,6 +21,7 @@ import shutil
 from pathlib import Path
 
 from . import create_pipeline
+from .constants import MODEL_FILE_PREFIX
 from .inference.pipelines import load_pipeline_config
 from .repo_manager import setup, get_all_supported_repo_names
 from .utils.flags import FLAGS_json_format_model
@@ -123,7 +124,9 @@ def args_cfg():
         help="Device to run the pipeline on (e.g., 'cpu', 'gpu:0').",
     )
     pipeline_group.add_argument(
-        "--use_hpip", action="store_true", help="Enable HPIP acceleration if available."
+        "--use_hpip",
+        action="store_true",
+        help="Enable HPIP acceleration by default.",
     )
     pipeline_group.add_argument(
         "--get_pipeline_config",
@@ -154,15 +157,18 @@ def args_cfg():
 
     ################# paddle2onnx #################
     paddle2onnx_group.add_argument(
-        "--paddle2onnx", action="store_true", help="Convert Paddle model to ONNX format"
+        "--paddle2onnx",
+        action="store_true",
+        help="Convert PaddlePaddle model to ONNX format",
     )
     paddle2onnx_group.add_argument(
-        "--paddle_model_dir", type=str, help="Directory containing the Paddle model"
+        "--paddle_model_dir",
+        type=str,
+        help="Directory containing the PaddlePaddle model",
     )
     paddle2onnx_group.add_argument(
         "--onnx_model_dir",
         type=str,
-        default="onnx",
         help="Output directory for the ONNX model",
     )
     paddle2onnx_group.add_argument(
@@ -223,21 +229,23 @@ def install(args):
             )
 
     def _install_hpi_deps(device_type):
-        support_device_type = ["cpu", "gpu"]
-        if device_type not in support_device_type:
+        supported_device_types = ["cpu", "gpu", "npu"]
+        if device_type not in supported_device_types:
             logging.error(
                 "HPI installation failed!\n"
                 "Supported device_type: %s. Your input device_type: %s.\n"
                 "Please ensure the device_type is correct.",
-                support_device_type,
+                supported_device_types,
                 device_type,
             )
             sys.exit(2)
 
         if device_type == "cpu":
-            packages = ["ultra-infer-python", "paddlex-hpi"]
+            packages = ["ultra-infer-python"]
         elif device_type == "gpu":
-            packages = ["ultra-infer-gpu-python", "paddlex-hpi"]
+            packages = ["ultra-infer-gpu-python"]
+        elif device_type == "npu":
+            packages = ["ultra-infer-npu-python"]
 
         with importlib.resources.path("paddlex", "hpip_links.html") as f:
             return subprocess.check_call(
@@ -336,10 +344,10 @@ def serve(pipeline, *, device, use_hpip, host, port):
 
 # TODO: Move to another module
 def paddle_to_onnx(paddle_model_dir, onnx_model_dir, *, opset_version):
-    PD_MODEL_FILE_PREFIX = "inference"
-    PD_PARAMS_FILENAME = "inference.pdiparams"
-    ONNX_MODEL_FILENAME = "inference.onnx"
-    CONFIG_FILENAME = "inference.yml"
+    PD_MODEL_FILE_PREFIX = MODEL_FILE_PREFIX
+    PD_PARAMS_FILENAME = f"{MODEL_FILE_PREFIX}.pdiparams"
+    ONNX_MODEL_FILENAME = f"{MODEL_FILE_PREFIX}.onnx"
+    CONFIG_FILENAME = f"{MODEL_FILE_PREFIX}.yml"
     ADDITIONAL_FILENAMES = ["scaler.pkl"]
 
     def _check_input_dir(input_dir, pd_model_file_ext):
@@ -407,6 +415,8 @@ def paddle_to_onnx(paddle_model_dir, onnx_model_dir, *, opset_version):
             logging.info(f"Copied {src_path} to {dst_path}")
 
     paddle_model_dir = Path(paddle_model_dir)
+    if not onnx_model_dir:
+        onnx_model_dir = paddle_model_dir
     onnx_model_dir = Path(onnx_model_dir)
     logging.info(f"Input dir: {paddle_model_dir}")
     logging.info(f"Output dir: {onnx_model_dir}")

+ 0 - 2
paddlex/serving_requirements.txt

@@ -2,8 +2,6 @@ aiohttp>=3.9
 bce-python-sdk>=0.9
 fastapi>=0.110
 filetype>=1.2
-pydantic>=2
 starlette>=0.36
-typing_extensions>=4.11
 uvicorn>=0.16
 yarl>=1.9

+ 1 - 2
paddlex/utils/config.py

@@ -17,7 +17,6 @@ import copy
 import argparse
 import yaml
 from . import logging
-from .errors import raise_key_not_found_error
 from .file_interface import custom_open
 
 __all__ = ["get_config"]
@@ -30,7 +29,7 @@ class AttrDict(dict):
         if key in self:
             return self[key]
         else:
-            raise raise_key_not_found_error(key, self)
+            raise AttributeError(key)
 
     def __setattr__(self, key, value):
         if key in self.__dict__:

+ 2 - 0
requirements.txt

@@ -32,6 +32,8 @@ premailer
 PyMuPDF
 ujson
 Pillow
+pydantic>=2
+typing_extensions>=4.11
 ######## For Chatocrv3 #######
 langchain==0.2.17
 langchain-openai==0.1.25

+ 1 - 0
setup.py

@@ -97,6 +97,7 @@ def packages_and_package_data():
     pkg_data.append("serving_requirements.txt")
     pkg_data.append("paddle2onnx_requirements.txt")
     pkg_data.append("hpip_links.html")
+    pkg_data.append("inference/utils/hpi_model_info_collection.json")
     ops_file_dir = "paddlex/ops"
     ops_file_types = ["h", "hpp", "cpp", "cc", "cu"]
     return pkgs, {