浏览代码

[Feat][HPI] Enable MKL-DNN as default and suggest backend configurations more intelligently (#4169)

* HPI supports mkldnn by default and allows finer config selection

* Update HPI prior knowledge

Co-authored-by: zhang-prog <zhang-prog@users.noreply.github.com>

* BEVFusion does not support CPU inference

* Fix missing deps

---------

Co-authored-by: zhang-prog <zhang-prog@users.noreply.github.com>
Lin Manhui 5 月之前
父节点
当前提交
06f4b06c85

+ 18 - 16
paddlex/inference/models/common/static_infer.py

@@ -33,7 +33,7 @@ from ...utils.hpi import (
     suggest_inference_backend_and_config,
 )
 from ...utils.model_paths import get_model_paths
-from ...utils.pp_option import PaddlePredictorOption
+from ...utils.pp_option import PaddlePredictorOption, get_default_run_mode
 from ...utils.trt_config import DISABLE_TRT_HALF_OPS_CONFIG
 
 CACHE_DIR = ".cache"
@@ -407,17 +407,10 @@ class PaddleInfer(StaticInfer):
                 assert self._option.device_type == "cpu"
                 config.disable_gpu()
                 if "mkldnn" in self._option.run_mode:
-                    if hasattr(config, "set_mkldnn_cache_capacity"):
-                        config.enable_mkldnn()
-                        if "bf16" in self._option.run_mode:
-                            config.enable_mkldnn_bfloat16()
-                        config.set_mkldnn_cache_capacity(
-                            self._option.mkldnn_cache_capacity
-                        )
-                    else:
-                        logging.warning(
-                            "MKL-DNN is not available. We will disable MKL-DNN."
-                        )
+                    config.enable_mkldnn()
+                    if "bf16" in self._option.run_mode:
+                        config.enable_mkldnn_bfloat16()
+                    config.set_mkldnn_cache_capacity(self._option.mkldnn_cache_capacity)
                 else:
                     if hasattr(config, "disable_mkldnn"):
                         config.disable_mkldnn()
@@ -641,10 +634,19 @@ class HPInfer(StaticInfer):
                 )
             backend_config = self._config.backend_config or {}
 
-        if backend == "paddle" and not backend_config:
-            logging.warning(
-                "The Paddle Inference backend is selected with the default configuration. This may not provide optimal performance."
-            )
+        if backend == "paddle":
+            if not backend_config:
+                is_default_config = True
+            elif backend_config.keys() != {"run_mode"}:
+                is_default_config = False
+            else:
+                is_default_config = backend_config["run_mode"] == get_default_run_mode(
+                    self._config.pdx_model_name, self._config.device_type
+                )
+            if is_default_config:
+                logging.warning(
+                    "The Paddle Inference backend is selected with the default configuration. This may not provide optimal performance."
+                )
 
         return backend, backend_config
 

+ 20 - 16
paddlex/inference/utils/hpi.py

@@ -17,6 +17,7 @@ import importlib.resources
 import importlib.util
 import json
 import platform
+from collections import defaultdict
 from functools import lru_cache
 from typing import Any, Dict, List, Literal, Optional, Tuple, Union
 
@@ -30,6 +31,7 @@ from ...utils.env import (
     get_paddle_version,
 )
 from ...utils.flags import USE_PIR_TRT
+from .misc import is_mkldnn_available
 from .model_paths import ModelPaths
 
 
@@ -186,24 +188,23 @@ def suggest_inference_backend_and_config(
         hpi_config.pdx_model_name
     ].copy()
 
+    if not is_mkldnn_available():
+        if "paddle_mkldnn" in supported_pseudo_backends:
+            supported_pseudo_backends.remove("paddle_mkldnn")
+
     # XXX
     if not (
         USE_PIR_TRT
         and importlib.util.find_spec("tensorrt")
         and ctypes.util.find_library("nvinfer")
     ):
-        if (
-            "paddle_tensorrt" in supported_pseudo_backends
-            or "paddle_tensorrt_fp16" in supported_pseudo_backends
-        ):
-            supported_pseudo_backends.append("paddle")
         if "paddle_tensorrt" in supported_pseudo_backends:
             supported_pseudo_backends.remove("paddle_tensorrt")
         if "paddle_tensorrt_fp16" in supported_pseudo_backends:
             supported_pseudo_backends.remove("paddle_tensorrt_fp16")
 
-    candidate_backends = []
-    backend_to_pseudo_backend = {}
+    supported_backends = []
+    backend_to_pseudo_backends = defaultdict(list)
     for pb in supported_pseudo_backends:
         if pb.startswith("paddle"):
             backend = "paddle"
@@ -213,26 +214,28 @@ def suggest_inference_backend_and_config(
             backend = pb
         if available_backends is not None and backend not in available_backends:
             continue
-        candidate_backends.append(backend)
-        backend_to_pseudo_backend[backend] = pb
+        supported_backends.append(backend)
+        backend_to_pseudo_backends[backend].append(pb)
 
-    if not candidate_backends:
+    if not supported_backends:
         return None, "No inference backend can be selected."
 
     if hpi_config.backend is not None:
-        if hpi_config.backend not in candidate_backends:
+        if hpi_config.backend not in supported_backends:
             return (
                 None,
                 f"{repr(hpi_config.backend)} is not a supported inference backend.",
             )
         suggested_backend = hpi_config.backend
+        pseudo_backends = backend_to_pseudo_backends[suggested_backend]
+        pseudo_backend = pseudo_backends[0]
     else:
-        # The first backend is the preferred one.
-        suggested_backend = candidate_backends[0]
+        # Prefer the first one.
+        suggested_backend = supported_backends[0]
+        pseudo_backend = supported_pseudo_backends[0]
 
     suggested_backend_config = {}
     if suggested_backend == "paddle":
-        pseudo_backend = backend_to_pseudo_backend["paddle"]
         assert pseudo_backend in (
             "paddle",
             "paddle_fp16",
@@ -240,7 +243,9 @@ def suggest_inference_backend_and_config(
             "paddle_tensorrt",
             "paddle_tensorrt_fp16",
         ), pseudo_backend
-        if pseudo_backend == "paddle_fp16":
+        if pseudo_backend == "paddle":
+            suggested_backend_config.update({"run_mode": "paddle"})
+        elif pseudo_backend == "paddle_fp16":
             suggested_backend_config.update({"run_mode": "paddle_fp16"})
         elif pseudo_backend == "paddle_mkldnn":
             suggested_backend_config.update({"run_mode": "mkldnn"})
@@ -250,7 +255,6 @@ def suggest_inference_backend_and_config(
             # TODO: Check if the target device supports FP16.
             suggested_backend_config.update({"run_mode": "trt_fp16"})
     elif suggested_backend == "tensorrt":
-        pseudo_backend = backend_to_pseudo_backend["tensorrt"]
         assert pseudo_backend in ("tensorrt", "tensorrt_fp16"), pseudo_backend
         if pseudo_backend == "tensorrt_fp16":
             suggested_backend_config.update({"precision": "fp16"})

文件差异内容过多而无法显示
+ 286 - 139
paddlex/inference/utils/hpi_model_info_collection.json


+ 36 - 4
paddlex/inference/utils/mkldnn_blocklist.py

@@ -13,15 +13,47 @@
 # limitations under the License.
 
 MKLDNN_BLOCKLIST = [
-    "SLANeXt_wired",
-    "SLANeXt_wireless",
     "LaTeX_OCR_rec",
     "PP-FormulaNet-L",
     "PP-FormulaNet-S",
     "UniMERNet",
+    "UVDoc",
+    "Cascade-MaskRCNN-ResNet50-FPN",
+    "Cascade-MaskRCNN-ResNet50-vd-SSLDv2-FPN",
+    "Mask-RT-DETR-M",
+    "Mask-RT-DETR-S",
+    "MaskRCNN-ResNeXt101-vd-FPN",
+    "MaskRCNN-ResNet101-FPN",
+    "MaskRCNN-ResNet101-vd-FPN",
+    "MaskRCNN-ResNet50-FPN",
+    "MaskRCNN-ResNet50-vd-FPN",
+    "MaskRCNN-ResNet50",
+    "SOLOv2",
+    "PP-TinyPose_128x96",
+    "PP-TinyPose_256x192",
+    "Cascade-FasterRCNN-ResNet50-FPN",
+    "Cascade-FasterRCNN-ResNet50-vd-SSLDv2-FPN",
+    "Co-DINO-Swin-L",
+    "Co-Deformable-DETR-Swin-T",
+    "FasterRCNN-ResNeXt101-vd-FPN",
+    "FasterRCNN-ResNet101-FPN",
+    "FasterRCNN-ResNet101",
+    "FasterRCNN-ResNet34-FPN",
+    "FasterRCNN-ResNet50-FPN",
+    "FasterRCNN-ResNet50-vd-FPN",
+    "FasterRCNN-ResNet50-vd-SSLDv2-FPN",
+    "FasterRCNN-ResNet50",
+    "FasterRCNN-Swin-Tiny-FPN",
+    "MaskFormer_small",
+    "MaskFormer_tiny",
+    "SLANeXt_wired",
+    "SLANeXt_wireless",
+    "SLANet",
+    "SLANet_plus",
+    "YOWO",
+    "SAM-H_box",
+    "SAM-H_point",
     "PP-FormulaNet_plus-L",
     "PP-FormulaNet_plus-M",
     "PP-FormulaNet_plus-S",
-    "SLANet",
-    "SLANet_plus",
 ]

+ 6 - 0
paddlex/inference/utils/pp_option.py

@@ -161,6 +161,12 @@ class PaddlePredictorOption(object):
                 f"`run_mode` must be {support_run_mode_str}, but received {repr(run_mode)}."
             )
 
+        if run_mode.startswith("mkldnn") and not is_mkldnn_available():
+            logging.warning("MKL-DNN is not available. Using `paddle` instead.")
+            run_mode = "paddle"
+
+        # TODO: Check if trt is available
+
         if self._model_name is not None:
             # TRT Blocklist
             if (

+ 4 - 0
setup.py

@@ -137,16 +137,20 @@ EXTRAS = {
             "tokenizers",
         ],
         "ocr": [
+            "einops",
             "ftfy",
             "imagesize",
+            "Jinja2",
             "lxml",
             "opencv-contrib-python",
             "openpyxl",
             "premailer",
             "pyclipper",
             "pypdfium2",
+            "regex",
             "scikit-learn",
             "shapely",
+            "tiktoken",
             "tokenizers",
         ],
         "speech": [

部分文件因为文件数量过多而无法显示