zhengchun
/
PaddleX


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636
							# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import argparse
import ast
import importlib.resources
import os
import shutil
import subprocess
import sys
from pathlib import Path

from . import create_pipeline
from .constants import MODEL_FILE_PREFIX
from .inference.pipelines import load_pipeline_config
from .inference.utils.model_paths import get_model_paths
from .repo_manager import get_all_supported_repo_names, setup
from .utils import logging
from .utils.deps import (
    get_dep_version,
    get_genai_dep_specs,
    get_genai_fastdeploy_spec,
    get_paddle2onnx_dep_specs,
    get_serving_dep_specs,
    is_dep_available,
    is_paddle2onnx_plugin_available,
)
from .utils.env import get_paddle_cuda_version
from .utils.install import install_packages, uninstall_packages
from .utils.interactive_get_pipeline import interactive_get_pipeline
from .utils.pipeline_arguments import PIPELINE_ARGUMENTS


def args_cfg():
    """parse cli arguments"""

    def parse_str(s):
        """convert str type value
        to None type if it is "None",
        to bool type if it means True or False.
        """
        if s in ("None", "none", "NONE"):
            return None
        elif s in ("TRUE", "True", "true", "T", "t"):
            return True
        elif s in ("FALSE", "False", "false", "F", "f"):
            return False
        return s

    parser = argparse.ArgumentParser(
        "Command-line interface for PaddleX. Use the options below to install plugins, run pipeline predictions, or start the serving application."
    )

    install_group = parser.add_argument_group("Install PaddleX Options")
    pipeline_group = parser.add_argument_group("Pipeline Predict Options")
    serving_group = parser.add_argument_group("Serving Options")
    paddle2onnx_group = parser.add_argument_group("Paddle2ONNX Options")

    ################# install pdx #################
    install_group.add_argument(
        "--install",
        nargs="*",
        metavar="PLUGIN",
        help="Install specified PaddleX plugins.",
    )
    install_group.add_argument(
        "--no_deps",
        action="store_true",
        help="Install custom development plugins without their dependencies.",
    )
    install_group.add_argument(
        "--platform",
        type=str,
        choices=["github.com", "gitee.com"],
        default="github.com",
        help="Platform to use for installation (default: github.com).",
    )
    install_group.add_argument(
        "-y",
        "--yes",
        dest="update_repos",
        action="store_true",
        help="Automatically confirm prompts and update repositories.",
    )
    install_group.add_argument(
        "--use_local_repos",
        action="store_true",
        default=False,
        help="Use local repositories if they exist.",
    )
    install_group.add_argument(
        "--deps_to_replace",
        type=str,
        nargs="+",
        default=None,
        help="Replace dependency version when installing from repositories.",
    )

    ################# pipeline predict #################
    pipeline_group.add_argument(
        "--pipeline", type=str, help="Name of the pipeline to execute for prediction."
    )
    pipeline_group.add_argument(
        "--input",
        type=str,
        default=None,
        help="Input data or path for the pipeline, supports specific file and directory.",
    )
    pipeline_group.add_argument(
        "--save_path",
        type=str,
        default=None,
        help="Path to save the prediction results.",
    )
    pipeline_group.add_argument(
        "--device",
        type=str,
        default=None,
        help="Device to run the pipeline on (e.g., 'cpu', 'gpu:0').",
    )
    pipeline_group.add_argument(
        "--use_hpip",
        action="store_true",
        help="Use high-performance inference plugin.",
    )
    pipeline_group.add_argument(
        "--hpi_config",
        type=ast.literal_eval,
        help="High-performance inference configuration.",
    )
    pipeline_group.add_argument(
        "--get_pipeline_config",
        type=str,
        default=None,
        help="Retrieve the configuration for a specified pipeline.",
    )

    ################# serving #################
    serving_group.add_argument(
        "--serve",
        action="store_true",
        help="Start the serving application to handle requests.",
    )
    serving_group.add_argument(
        "--host",
        type=str,
        default="0.0.0.0",
        help="Host address to serve on (default: 0.0.0.0).",
    )
    serving_group.add_argument(
        "--port",
        type=int,
        default=8080,
        help="Port number to serve on (default: 8080).",
    )
    # Serving also uses `--pipeline`, `--device`, `--use_hpip`, and `--hpi_config`

    ################# paddle2onnx #################
    paddle2onnx_group.add_argument(
        "--paddle2onnx",
        action="store_true",
        help="Convert PaddlePaddle model to ONNX format.",
    )
    paddle2onnx_group.add_argument(
        "--paddle_model_dir",
        type=str,
        help="Directory containing the PaddlePaddle model.",
    )
    paddle2onnx_group.add_argument(
        "--onnx_model_dir",
        type=str,
        help="Output directory for the ONNX model.",
    )
    paddle2onnx_group.add_argument(
        "--opset_version", type=int, default=7, help="Version of the ONNX opset to use."
    )

    # Parse known arguments to get the pipeline name
    args, remaining_args = parser.parse_known_args()
    pipeline = args.pipeline
    pipeline_args = []

    if (
        not (args.install is not None or args.serve or args.paddle2onnx)
        and pipeline is not None
    ):
        if os.path.isfile(pipeline):
            pipeline_name = load_pipeline_config(pipeline)["pipeline_name"]
        else:
            pipeline_name = pipeline

        if pipeline_name not in PIPELINE_ARGUMENTS:
            support_pipelines = ", ".join(PIPELINE_ARGUMENTS.keys())
            logging.error(
                f"Unsupported pipeline: {pipeline_name}, CLI predict only supports these pipelines: {support_pipelines}\n"
            )
            sys.exit(1)

        pipeline_args = PIPELINE_ARGUMENTS[pipeline_name]
        if pipeline_args is None:
            pipeline_args = []
        pipeline_specific_group = parser.add_argument_group(
            f"{pipeline_name.capitalize()} Pipeline Options"
        )
        for arg in pipeline_args:
            pipeline_specific_group.add_argument(
                arg["name"],
                type=parse_str if arg["type"] is bool else arg["type"],
                help=arg.get("help", f"Argument for {pipeline_name} pipeline."),
            )

    return parser, pipeline_args


def install(args):
    """install paddlex"""

    def _install_serving_deps():
        try:
            install_packages(get_serving_dep_specs())
        except Exception:
            logging.error("Installation failed", exc_info=True)
            sys.exit(1)
        logging.info("Successfully installed the serving plugin")

    def _install_paddle2onnx_deps():
        try:
            install_packages(get_paddle2onnx_dep_specs())
        except Exception:
            logging.error("Installation failed", exc_info=True)
            sys.exit(1)
        logging.info("Successfully installed the Paddle2ONNX plugin")

    def _install_hpi_deps(device_type):
        SUPPORTED_DEVICE_TYPES = ["cpu", "gpu", "npu"]
        if device_type not in SUPPORTED_DEVICE_TYPES:
            logging.error(
                "Failed to install the high-performance plugin.\n"
                "Supported device types: %s. Your input device type: %s.\n",
                SUPPORTED_DEVICE_TYPES,
                device_type,
            )
            sys.exit(2)

        hpip_links_file = "hpip_links.html"
        if device_type == "gpu":
            cuda_version = get_paddle_cuda_version()
            if not cuda_version:
                sys.exit(
                    "No CUDA version found. Please make sure you have installed PaddlePaddle with CUDA enabled."
                )
            if cuda_version[0] == 12:
                hpip_links_file = "hpip_links_cu12.html"
            elif cuda_version[0] != 11:
                sys.exit(
                    "Currently, only CUDA versions 11.x and 12.x are supported by the high-performance inference plugin."
                )

        package_mapping = {
            "cpu": "ultra-infer-python",
            "gpu": "ultra-infer-gpu-python",
            "npu": "ultra-infer-npu-python",
        }
        package = package_mapping[device_type]
        other_packages = set(package_mapping.values()) - {package}
        for other_package in other_packages:
            version = get_dep_version(other_package)
            if version is not None:
                logging.info(
                    f"The high-performance inference plugin '{package}' is mutually exclusive with '{other_package}' (version {version} installed). Uninstalling '{other_package}'..."
                )
                try:
                    uninstall_packages([other_package])
                except Exception:
                    logging.error("Failed to uninstall packages", exc_info=True)
                    sys.exit(1)

        with importlib.resources.path("paddlex", hpip_links_file) as f:
            version = get_dep_version(package)
            try:
                if version is None:
                    install_packages(
                        [package], pip_install_opts=["--find-links", str(f)]
                    )
                else:
                    response = input(
                        f"The high-performance inference plugin is already installed (version {repr(version)}). Do you want to reinstall it? (y/n):"
                    )
                    if response.lower() in ["y", "yes"]:
                        uninstall_packages([package])
                        install_packages(
                            [package],
                            pip_install_opts=[
                                "--find-links",
                                str(f),
                            ],
                        )
                    else:
                        return
            except Exception:
                logging.error("Installation failed", exc_info=True)
                sys.exit(1)

        logging.info("Successfully installed the high-performance inference plugin")

        if not is_paddle2onnx_plugin_available():
            logging.info(
                "The Paddle2ONNX plugin is not available. It is recommended to run `paddlex --install paddle2onnx` to install the Paddle2ONNX plugin to use the full functionality of high-performance inference."
            )

    def _install_genai_deps(plugin_types):
        fd_plugin_types = []
        not_fd_plugin_types = []
        for plugin_type in plugin_types:
            if "fastdeploy" in plugin_type:
                fd_plugin_types.append(plugin_type)
            else:
                not_fd_plugin_types.append(plugin_type)
        if fd_plugin_types:
            if not is_dep_available("paddlepaddle"):
                sys.exit("Please install PaddlePaddle first.")
            import paddle.device

            if not paddle.device.is_compiled_with_cuda():
                sys.exit("Currently, only the GPU version of FastDeploy is supported.")
            cap = paddle.device.cuda.get_device_capability()
            if cap in ((8, 0), (9, 0)):
                index_url = "https://www.paddlepaddle.org.cn/packages/stable/fastdeploy-gpu-80_90/"
            elif cap in ((8, 6), (8, 9)):
                index_url = "https://www.paddlepaddle.org.cn/packages/stable/fastdeploy-gpu-86_89/"
            else:
                sys.exit(
                    f"The compute capability of the GPU is {cap[0]}.{cap[1]}, which is not supported. The supported compute capabilities are 8.0, 8.6, 8.9, and 9.0."
                )
            try:
                install_packages(
                    [get_genai_fastdeploy_spec("gpu")],
                    pip_install_opts=["--extra-index-url", index_url],
                )
            except Exception:
                logging.error("Installation failed", exc_info=True)
                sys.exit(1)

        reqs = []
        for plugin_type in not_fd_plugin_types:
            try:
                r = get_genai_dep_specs(plugin_type)
            except ValueError:
                logging.error("Invalid generative AI plugin type: %s", plugin_type)
                sys.exit(2)
            reqs += r
        try:
            install_packages(reqs, constraints="required")
        except Exception:
            logging.error("Installation failed", exc_info=True)
            sys.exit(1)

        for plugin_type in plugin_types:
            if "vllm" in plugin_type or "sglang" in plugin_type:
                try:
                    install_packages(["wheel"], constraints="required")
                    install_packages(["flash-attn == 2.8.2"], constraints="required")
                except Exception:
                    logging.error("Installation failed", exc_info=True)
                    sys.exit(1)
                break

        logging.info(
            "Successfully installed the generative AI plugin"
            + ("s" if len(plugin_types) > 1 else "")
        )

    # Enable debug info
    os.environ["PADDLE_PDX_DEBUG"] = "True"
    # Disable eager initialization
    os.environ["PADDLE_PDX_EAGER_INIT"] = "False"

    plugins = args.install[:]

    if "serving" in plugins:
        plugins.remove("serving")
        if plugins:
            logging.error("`serving` cannot be used together with other plugins.")
            sys.exit(2)
        _install_serving_deps()
        return

    if "paddle2onnx" in plugins:
        plugins.remove("paddle2onnx")
        if plugins:
            logging.error("`paddle2onnx` cannot be used together with other plugins.")
            sys.exit(2)
        _install_paddle2onnx_deps()
        return

    hpi_plugins = list(filter(lambda name: name.startswith("hpi-"), plugins))
    if hpi_plugins:
        for p in hpi_plugins:
            plugins.remove(p)
        if plugins:
            logging.error("`hpi-xxx` cannot be used together with other plugins.")
            sys.exit(2)
        if len(hpi_plugins) > 1 or len(hpi_plugins[0].split("-")) != 2:
            logging.error(
                "Invalid HPI plugin installation format detected.\n"
                "Correct format: paddlex --install hpi-<device_type>\n"
                "Example: paddlex --install hpi-gpu"
            )
            sys.exit(2)
        device_type = hpi_plugins[0].split("-")[1]
        _install_hpi_deps(device_type=device_type)
        return

    genai_plugins = list(filter(lambda name: name.startswith("genai-"), plugins))
    if genai_plugins:
        for p in genai_plugins:
            plugins.remove(p)
        if plugins:
            logging.error("`genai-xxx` cannot be used together with other plugins.")
            sys.exit(2)
        genai_plugin_types = [p[len("genai-") :] for p in genai_plugins]
        _install_genai_deps(genai_plugin_types)
        return

    all_repo_names = get_all_supported_repo_names()
    unknown_plugins = []
    for p in plugins:
        if p not in all_repo_names:
            unknown_plugins.append(p)
    if unknown_plugins:
        logging.error("Unknown plugins: %s", unknown_plugins)
        sys.exit(2)
    if plugins:
        repo_names = plugins
    elif len(plugins) == 0:
        repo_names = all_repo_names
    setup(
        repo_names=repo_names,
        no_deps=args.no_deps,
        platform=args.platform,
        update_repos=args.update_repos,
        use_local_repos=args.use_local_repos,
        deps_to_replace=args.deps_to_replace,
    )
    return


def pipeline_predict(
    pipeline,
    input,
    device,
    save_path,
    use_hpip,
    hpi_config,
    **pipeline_args,
):
    """pipeline predict"""
    pipeline = create_pipeline(
        pipeline, device=device, use_hpip=use_hpip, hpi_config=hpi_config
    )
    result = pipeline.predict(input, **pipeline_args)
    for res in result:
        res.print()
        if save_path:
            res.save_all(save_path=save_path)


def serve(pipeline, *, device, use_hpip, hpi_config, host, port):
    try:
        from .inference.serving.basic_serving import create_pipeline_app, run_server
    except RuntimeError:
        logging.error("Failed to load the serving module", exc_info=True)
        sys.exit(1)

    pipeline_config = load_pipeline_config(pipeline)
    try:
        pipeline = create_pipeline(
            config=pipeline_config,
            device=device,
            use_hpip=use_hpip,
            hpi_config=hpi_config,
        )
    except Exception:
        logging.error("Failed to create the pipeline", exc_info=True)
        sys.exit(1)
    app = create_pipeline_app(pipeline, pipeline_config)
    run_server(app, host=host, port=port)


# TODO: Move to another module
def paddle_to_onnx(paddle_model_dir, onnx_model_dir, *, opset_version):
    if not is_paddle2onnx_plugin_available():
        sys.exit("Please install the Paddle2ONNX plugin first.")

    ONNX_MODEL_FILENAME = f"{MODEL_FILE_PREFIX}.onnx"
    CONFIG_FILENAME = f"{MODEL_FILE_PREFIX}.yml"
    ADDITIONAL_FILENAMES = ["scaler.pkl"]

    def _check_input_dir(input_dir):
        if input_dir is None:
            sys.exit("Input directory must be specified")
        if not input_dir.exists():
            sys.exit(f"{input_dir} does not exist")
        if not input_dir.is_dir():
            sys.exit(f"{input_dir} is not a directory")
        model_paths = get_model_paths(input_dir)
        if "paddle" not in model_paths:
            sys.exit("PaddlePaddle model does not exist")
        config_path = input_dir / CONFIG_FILENAME
        if not config_path.exists():
            sys.exit(f"{config_path} does not exist")

    def _check_paddle2onnx():
        if shutil.which("paddle2onnx") is None:
            sys.exit("Paddle2ONNX is not available. Please install the plugin first.")

    def _run_paddle2onnx(input_dir, output_dir, opset_version):
        model_paths = get_model_paths(input_dir)
        logging.info("Paddle2ONNX conversion starting...")
        # XXX: To circumvent Paddle2ONNX's bug
        cmd = [
            "paddle2onnx",
            "--model_dir",
            str(model_paths["paddle"][0].parent),
            "--model_filename",
            str(model_paths["paddle"][0].name),
            "--params_filename",
            str(model_paths["paddle"][1].name),
            "--save_file",
            str(output_dir / ONNX_MODEL_FILENAME),
            "--opset_version",
            str(opset_version),
        ]
        try:
            subprocess.check_call(cmd)
        except subprocess.CalledProcessError as e:
            sys.exit(f"Paddle2ONNX conversion failed with exit code {e.returncode}")
        logging.info("Paddle2ONNX conversion succeeded")

    def _copy_config_file(input_dir, output_dir):
        src_path = input_dir / CONFIG_FILENAME
        dst_path = output_dir / CONFIG_FILENAME
        shutil.copy(src_path, dst_path)
        logging.info(f"Copied {src_path} to {dst_path}")

    def _copy_additional_files(input_dir, output_dir):
        for filename in ADDITIONAL_FILENAMES:
            src_path = input_dir / filename
            if not src_path.exists():
                continue
            dst_path = output_dir / filename
            shutil.copy(src_path, dst_path)
            logging.info(f"Copied {src_path} to {dst_path}")

    if not paddle_model_dir:
        sys.exit("PaddlePaddle model directory must be specified")

    paddle_model_dir = Path(paddle_model_dir)
    if not onnx_model_dir:
        onnx_model_dir = paddle_model_dir
    onnx_model_dir = Path(onnx_model_dir)
    logging.info(f"Input dir: {paddle_model_dir}")
    logging.info(f"Output dir: {onnx_model_dir}")
    _check_input_dir(paddle_model_dir)
    _check_paddle2onnx()
    _run_paddle2onnx(paddle_model_dir, onnx_model_dir, opset_version)
    if not (onnx_model_dir.exists() and onnx_model_dir.samefile(paddle_model_dir)):
        _copy_config_file(paddle_model_dir, onnx_model_dir)
        _copy_additional_files(paddle_model_dir, onnx_model_dir)
    logging.info("Done")


# for CLI
def main():
    """API for command line"""
    parser, pipeline_args = args_cfg()
    args = parser.parse_args()

    if len(sys.argv) == 1:
        logging.warning("No arguments provided. Displaying help information:")
        parser.print_help()
        sys.exit(2)

    if args.install is not None:
        install(args)
    elif args.serve:
        serve(
            args.pipeline,
            device=args.device,
            use_hpip=args.use_hpip or None,
            hpi_config=args.hpi_config,
            host=args.host,
            port=args.port,
        )
    elif args.paddle2onnx:
        paddle_to_onnx(
            args.paddle_model_dir,
            args.onnx_model_dir,
            opset_version=args.opset_version,
        )
    else:
        if args.get_pipeline_config is not None:
            interactive_get_pipeline(args.get_pipeline_config, args.save_path)
        else:
            pipeline_args_dict = {}

            for arg in pipeline_args:
                arg_name = arg["name"].lstrip("-")
                if hasattr(args, arg_name):
                    pipeline_args_dict[arg_name] = getattr(args, arg_name)
                else:
                    logging.warning(f"Argument {arg_name} is missing in args")
            try:
                pipeline_predict(
                    args.pipeline,
                    args.input,
                    args.device,
                    args.save_path,
                    use_hpip=args.use_hpip or None,
                    hpi_config=args.hpi_config,
                    **pipeline_args_dict,
                )
            except Exception:
                logging.error("Pipeline prediction failed", exc_info=True)
                sys.exit(1)