| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354 |
- import subprocess
- import sys
- def check_nvidia_environment():
- print("=== NVIDIA环境检查 ===")
-
- # 检查nvidia-smi
- try:
- result = subprocess.run(['nvidia-smi'], capture_output=True, text=True, timeout=10)
- if result.returncode == 0:
- print("✓ nvidia-smi 正常运行")
- # 提取驱动版本
- lines = result.stdout.split('\n')
- for line in lines:
- if 'Driver Version:' in line:
- print(f" {line.strip()}")
- break
- else:
- print("✗ nvidia-smi 失败:")
- print(result.stderr)
- except Exception as e:
- print(f"✗ nvidia-smi 错误: {e}")
-
- # 检查NVML
- try:
- import pynvml
- pynvml.nvmlInit()
- driver_version = pynvml.nvmlSystemGetDriverVersion()
- print(f"✓ NVML初始化成功,驱动版本: {driver_version}")
-
- device_count = pynvml.nvmlDeviceGetCount()
- print(f"✓ 检测到 {device_count} 个GPU设备")
-
- for i in range(device_count):
- handle = pynvml.nvmlDeviceGetHandleByIndex(i)
- name = pynvml.nvmlDeviceGetName(handle)
- print(f" GPU {i}: {name}")
-
- except Exception as e:
- print(f"✗ NVML错误: {e}")
-
- # 检查PaddlePaddle
- try:
- import paddle
- print(f"✓ PaddlePaddle版本: {paddle.__version__}")
- print(f"✓ CUDA编译支持: {paddle.device.is_compiled_with_cuda()}")
- if paddle.device.is_compiled_with_cuda():
- gpu_count = paddle.device.cuda.device_count()
- print(f"✓ PaddlePaddle检测到 {gpu_count} 个GPU")
- except Exception as e:
- print(f"✗ PaddlePaddle错误: {e}")
- if __name__ == "__main__":
- check_nvidia_environment()
|