check_cuda_env.py 1.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354
  1. import subprocess
  2. import sys
  3. def check_nvidia_environment():
  4. print("=== NVIDIA环境检查 ===")
  5. # 检查nvidia-smi
  6. try:
  7. result = subprocess.run(['nvidia-smi'], capture_output=True, text=True, timeout=10)
  8. if result.returncode == 0:
  9. print("✓ nvidia-smi 正常运行")
  10. # 提取驱动版本
  11. lines = result.stdout.split('\n')
  12. for line in lines:
  13. if 'Driver Version:' in line:
  14. print(f" {line.strip()}")
  15. break
  16. else:
  17. print("✗ nvidia-smi 失败:")
  18. print(result.stderr)
  19. except Exception as e:
  20. print(f"✗ nvidia-smi 错误: {e}")
  21. # 检查NVML
  22. try:
  23. import pynvml
  24. pynvml.nvmlInit()
  25. driver_version = pynvml.nvmlSystemGetDriverVersion()
  26. print(f"✓ NVML初始化成功,驱动版本: {driver_version}")
  27. device_count = pynvml.nvmlDeviceGetCount()
  28. print(f"✓ 检测到 {device_count} 个GPU设备")
  29. for i in range(device_count):
  30. handle = pynvml.nvmlDeviceGetHandleByIndex(i)
  31. name = pynvml.nvmlDeviceGetName(handle)
  32. print(f" GPU {i}: {name}")
  33. except Exception as e:
  34. print(f"✗ NVML错误: {e}")
  35. # 检查PaddlePaddle
  36. try:
  37. import paddle
  38. print(f"✓ PaddlePaddle版本: {paddle.__version__}")
  39. print(f"✓ CUDA编译支持: {paddle.device.is_compiled_with_cuda()}")
  40. if paddle.device.is_compiled_with_cuda():
  41. gpu_count = paddle.device.cuda.device_count()
  42. print(f"✓ PaddlePaddle检测到 {gpu_count} 个GPU")
  43. except Exception as e:
  44. print(f"✗ PaddlePaddle错误: {e}")
  45. if __name__ == "__main__":
  46. check_nvidia_environment()