zhengchun
/
PaddleX


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108
							# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import json
import os
import subprocess
import sys
import tempfile
import textwrap

from ....utils.deps import require_genai_engine_plugin


def run_sglang_server(host, port, model_name, model_dir, config, chat_template_path):
    require_genai_engine_plugin("sglang-server")

    data = json.dumps(
        {
            "host": host,
            "port": port,
            "model_name": model_name,
            "model_dir": model_dir,
            "config": config,
            "chat_template_path": str(chat_template_path),
        }
    )

    # HACK
    code = textwrap.dedent(
        f"""
    import json
    import os

    from paddlex.inference.genai.configs.utils import (
        backend_config_to_args,
        set_config_defaults,
        update_backend_config,
    )
    from paddlex.inference.genai.models import get_model_components
    from sglang.srt.configs.model_config import multimodal_model_archs
    from sglang.srt.entrypoints.http_server import launch_server
    from sglang.srt.managers.multimodal_processor import PROCESSOR_MAPPING
    from sglang.srt.models.registry import ModelRegistry
    from sglang.srt.server_args import prepare_server_args
    from sglang.srt.utils import kill_process_tree

    data = json.loads({repr(data)})

    host = data["host"]
    port = data["port"]
    model_name = data["model_name"]
    model_dir = data["model_dir"]
    config = data["config"]
    chat_template_path = data["chat_template_path"]

    network_class, processor_class = get_model_components(model_name, "sglang")

    ModelRegistry.models[network_class.__name__] = network_class
    multimodal_model_archs.append(network_class.__name__)
    PROCESSOR_MAPPING[network_class] = processor_class

    set_config_defaults(config, {{"served-model-name": model_name}})

    if chat_template_path:
        set_config_defaults(config, {{"chat-template": chat_template_path}})

    set_config_defaults(config, {{"enable-metrics": True}})

    update_backend_config(
        config,
        {{
            "model-path": model_dir,
            "host": host,
            "port": port,
        }},
    )

    if __name__ == "__main__":
        args = backend_config_to_args(config)

        server_args = prepare_server_args(args)

        try:
            launch_server(server_args)
        finally:
            kill_process_tree(os.getpid(), include_parent=False)
    """
    )

    with tempfile.NamedTemporaryFile("w", suffix=".py", delete=False) as f:
        f.write(code)
        script_path = f.name

    try:
        subprocess.check_call([sys.executable, script_path])
    finally:
        os.unlink(script_path)