zhengchun
/
PaddleX


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224
							# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import warnings
from pathlib import Path
from typing import Any, Dict, Literal, List, Mapping, Optional, Tuple, Type, Union

import ultra_infer as ui
from paddlex.utils import logging
from pydantic import BaseModel, ConfigDict, Field, field_validator
from typing_extensions import Annotated, TypeAlias, TypedDict, assert_never

from paddlex_hpi._model_info import get_model_info
from paddlex_hpi._utils.typing import Backend, DeviceType


class _BackendConfig(BaseModel):
    def update_ui_option(self, option: ui.RuntimeOption, model_dir: Path) -> None:
        raise NotImplementedError


class PaddleInferConfig(_BackendConfig):
    cpu_num_threads: int = 8
    enable_mkldnn: bool = True
    enable_trt: bool = False
    trt_dynamic_shapes: Optional[Dict[str, List[List[int]]]] = None
    trt_dynamic_shape_input_data: Optional[Dict[str, List[List[float]]]] = None
    trt_precision: Literal["FP32", "FP16"] = "FP32"
    enable_log_info: bool = False

    def update_ui_option(self, option: ui.RuntimeOption, model_dir: Path) -> None:
        option.use_paddle_infer_backend()
        option.set_cpu_thread_num(self.cpu_num_threads)
        option.paddle_infer_option.enable_mkldnn = self.enable_mkldnn
        option.paddle_infer_option.enable_trt = self.enable_trt
        if self.enable_trt:
            option.trt_option.serialize_file = str(model_dir / "trt_serialized.trt")
            option.paddle_infer_option.collect_trt_shape = True
            option.paddle_infer_option.collect_trt_shape_by_device = True
            if self.trt_dynamic_shapes is not None:
                for name, shapes in self.trt_dynamic_shapes.items():
                    option.trt_option.set_shape(name, *shapes)
            if self.trt_dynamic_shape_input_data is not None:
                for name, data in self.trt_dynamic_shape_input_data.items():
                    option.trt_option.set_input_data(name, *data)
            if self.trt_precision == "FP16":
                option.trt_option.enable_fp16 = True
        option.paddle_infer_option.enable_log_info = self.enable_log_info


class OpenVINOConfig(_BackendConfig):
    cpu_num_threads: int = 8

    def update_ui_option(self, option: ui.RuntimeOption, model_dir: Path) -> None:
        option.use_openvino_backend()
        option.set_cpu_thread_num(self.cpu_num_threads)


class ONNXRuntimeConfig(_BackendConfig):
    cpu_num_threads: int = 8

    def update_ui_option(self, option: ui.RuntimeOption, model_dir: Path) -> None:
        option.use_ort_backend()
        option.set_cpu_thread_num(self.cpu_num_threads)


class TensorRTConfig(_BackendConfig):
    precision: Literal["FP32", "FP16"] = "FP32"
    dynamic_shapes: Optional[Dict[str, List[List[int]]]] = None

    def update_ui_option(self, option: ui.RuntimeOption, model_dir: Path) -> None:
        option.use_trt_backend()
        option.trt_option.serialize_file = str(model_dir / "trt_serialized.trt")
        if self.precision == "FP16":
            option.trt_option.enable_fp16 = True
        if self.dynamic_shapes is not None:
            for name, shapes in self.dynamic_shapes.items():
                option.trt_option.set_shape(name, *shapes)


class PaddleTensorRTConfig(_BackendConfig):
    dynamic_shapes: Dict[str, List[List[int]]]
    dynamic_shape_input_data: Optional[Dict[str, List[List[float]]]] = None
    enable_log_info: bool = False

    def update_ui_option(self, option: ui.RuntimeOption, model_dir: Path) -> None:
        option.use_paddle_infer_backend()
        option.paddle_infer_option.enable_trt = True
        option.trt_option.serialize_file = str(model_dir / "trt_serialized.trt")
        if self.dynamic_shapes is not None:
            option.paddle_infer_option.collect_trt_shape = True
            # TODO: Support setting collect_trt_shape_by_device
            for name, shapes in self.dynamic_shapes.items():
                option.trt_option.set_shape(name, *shapes)
        if self.dynamic_shape_input_data is not None:
            for name, data in self.dynamic_shape_input_data.items():
                option.trt_option.set_input_data(name, *data)
        option.paddle_infer_option.enable_log_info = self.enable_log_info


# Should we use tagged unions?
BackendConfig: TypeAlias = Union[
    PaddleInferConfig,
    OpenVINOConfig,
    ONNXRuntimeConfig,
    TensorRTConfig,
]


def get_backend_config_type(backend: Backend, /) -> Type[BackendConfig]:
    backend_config_type: Type[BackendConfig]
    if backend == "paddle_infer":
        backend_config_type = PaddleInferConfig
    elif backend == "openvino":
        backend_config_type = OpenVINOConfig
    elif backend == "onnx_runtime":
        backend_config_type = ONNXRuntimeConfig
    elif backend == "tensorrt":
        backend_config_type = TensorRTConfig
    else:
        assert_never(backend)
    return backend_config_type


# Can I create this dynamically and automatically?
class BackendConfigs(TypedDict, total=False):
    paddle_infer: PaddleInferConfig
    openvino: OpenVINOConfig
    onnx_runtime: ONNXRuntimeConfig
    tensorrt: TensorRTConfig
    paddle_tensorrt: PaddleTensorRTConfig


class HPIConfig(BaseModel):
    model_config = ConfigDict(populate_by_name=True)

    selected_backends: Optional[Dict[DeviceType, Backend]] = None
    # For backward compatilibity
    backend_configs: Annotated[
        Optional[BackendConfigs], Field(validation_alias="backend_config")
    ] = None

    def get_backend_and_config(
        self, model_name: str, device_type: DeviceType
    ) -> Tuple[Backend, BackendConfig]:
        # Do we need an extensible selector?
        model_info = get_model_info(model_name, device_type)
        if model_info:
            backend_config_pairs = model_info["backend_config_pairs"]
        else:
            backend_config_pairs = []
        config_dict: Dict[str, Any] = {}
        if self.selected_backends and device_type in self.selected_backends:
            backend = self.selected_backends[device_type]
            for pair in backend_config_pairs:
                # Use the first one
                if pair[0] == self.selected_backends[device_type]:
                    config_dict.update(pair[1])
                    break
        else:
            if backend_config_pairs:
                # Currently we select the first one
                backend = backend_config_pairs[0][0]
                config_dict.update(backend_config_pairs[0][1])
            else:
                backend = "paddle_infer"
        if self.backend_configs and backend in self.backend_configs:
            config_dict.update(
                self.backend_configs[backend].model_dump(exclude_unset=True)
            )
        backend_config_type = get_backend_config_type(backend)
        backend_config = backend_config_type.model_validate(config_dict)
        return backend, backend_config

    # XXX: For backward compatilibity
    @field_validator("selected_backends", mode="before")
    @classmethod
    def _hack_selected_backends(cls, data: Any) -> Any:
        if isinstance(data, Mapping):
            new_data = dict(data)
            for device_type in new_data:
                if new_data[device_type] == "paddle_tensorrt":
                    warnings.warn(
                        "`paddle_tensorrt` is deprecated. Please use `paddle_infer` instead.",
                        FutureWarning,
                    )
                    new_data[device_type] = "paddle_infer"
        return new_data

    @field_validator("backend_configs", mode="before")
    @classmethod
    def _hack_backend_configs(cls, data: Any) -> Any:
        if isinstance(data, Mapping):
            new_data = dict(data)
            if new_data and "paddle_tensorrt" in new_data:
                warnings.warn(
                    "`paddle_tensorrt` is deprecated. Please use `paddle_infer` instead.",
                    FutureWarning,
                )
                if "paddle_infer" not in new_data:
                    new_data["paddle_infer"] = {}
                pptrt_cfg = new_data["paddle_tensorrt"]
                logging.warning("`paddle_infer.enable_trt` will be set to `True`.")
                new_data["paddle_infer"]["enable_trt"] = True
                new_data["paddle_infer"]["trt_dynamic_shapes"] = pptrt_cfg[
                    "dynamic_shapes"
                ]
                if "dynamic_shape_input_data" in pptrt_cfg:
                    new_data["paddle_infer"]["trt_dynamic_shape_input_data"] = (
                        pptrt_cfg["dynamic_shape_input_data"]
                    )
                logging.warning("`paddle_tensorrt.enable_log_info` will be ignored.")
        return new_data