| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107 |
- # copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- import os
- import GPUtil
- import lazy_paddle as paddle
- from . import logging
- from .errors import raise_unsupported_device_error
- SUPPORTED_DEVICE_TYPE = ["cpu", "gpu", "xpu", "npu", "mlu", "gcu"]
- def _constr_device(device_type, device_ids):
- if device_ids:
- device_ids = ",".join(map(str, device_ids))
- return f"{device_type}:{device_ids}"
- else:
- return f"{device_type}"
- def get_default_device():
- avail_gpus = GPUtil.getAvailable()
- if not avail_gpus:
- return "cpu"
- else:
- return _constr_device("gpu", [avail_gpus[0]])
- def parse_device(device):
- """parse_device"""
- # According to https://www.paddlepaddle.org.cn/documentation/docs/zh/api/paddle/device/set_device_cn.html
- parts = device.split(":")
- if len(parts) > 2:
- raise ValueError(f"Invalid device: {device}")
- if len(parts) == 1:
- device_type, device_ids = parts[0], None
- else:
- device_type, device_ids = parts
- device_ids = device_ids.split(",")
- for device_id in device_ids:
- if not device_id.isdigit():
- raise ValueError(
- f"Device ID must be an integer. Invalid device ID: {device_id}"
- )
- device_ids = list(map(int, device_ids))
- device_type = device_type.lower()
- # raise_unsupported_device_error(device_type, SUPPORTED_DEVICE_TYPE)
- assert device_type.lower() in SUPPORTED_DEVICE_TYPE
- return device_type, device_ids
- def update_device_num(device, num):
- device_type, device_ids = parse_device(device)
- if device_ids:
- assert len(device_ids) >= num
- return _constr_device(device_type, device_ids[:num])
- else:
- return _constr_device(device_type, device_ids)
- def set_env_for_device(device):
- def _set(envs):
- for key, val in envs.items():
- os.environ[key] = val
- logging.debug(f"{key} has been set to {val}.")
- device_type, device_ids = parse_device(device)
- if device_type.lower() in ["gpu", "xpu", "npu", "mlu", "gcu"]:
- if device_type.lower() == "gpu" and paddle.is_compiled_with_rocm():
- envs = {"FLAGS_conv_workspace_size_limit": "2000"}
- _set(envs)
- if device_type.lower() == "npu":
- envs = {
- "FLAGS_npu_jit_compile": "0",
- "FLAGS_use_stride_kernel": "0",
- "FLAGS_allocator_strategy": "auto_growth",
- "CUSTOM_DEVICE_BLACK_LIST": "pad3d,pad3d_grad,set_value,set_value_with_tensor",
- "FLAGS_npu_scale_aclnn": "True",
- "FLAGS_npu_split_aclnn": "True",
- }
- _set(envs)
- if device_type.lower() == "xpu":
- envs = {
- "BKCL_FORCE_SYNC": "1",
- "BKCL_TIMEOUT": "1800",
- "FLAGS_use_stride_kernel": "0",
- "XPU_BLACK_LIST": "pad3d",
- }
- _set(envs)
- if device_type.lower() == "mlu":
- envs = {"FLAGS_use_stride_kernel": "0"}
- _set(envs)
- if device_type.lower() == "gcu":
- envs = {"FLAGS_use_stride_kernel": "0"}
- _set(envs)
|