|
@@ -0,0 +1,54 @@
|
|
|
|
|
+import subprocess
|
|
|
|
|
+import sys
|
|
|
|
|
+
|
|
|
|
|
+def check_nvidia_environment():
|
|
|
|
|
+ print("=== NVIDIA环境检查 ===")
|
|
|
|
|
+
|
|
|
|
|
+ # 检查nvidia-smi
|
|
|
|
|
+ try:
|
|
|
|
|
+ result = subprocess.run(['nvidia-smi'], capture_output=True, text=True, timeout=10)
|
|
|
|
|
+ if result.returncode == 0:
|
|
|
|
|
+ print("✓ nvidia-smi 正常运行")
|
|
|
|
|
+ # 提取驱动版本
|
|
|
|
|
+ lines = result.stdout.split('\n')
|
|
|
|
|
+ for line in lines:
|
|
|
|
|
+ if 'Driver Version:' in line:
|
|
|
|
|
+ print(f" {line.strip()}")
|
|
|
|
|
+ break
|
|
|
|
|
+ else:
|
|
|
|
|
+ print("✗ nvidia-smi 失败:")
|
|
|
|
|
+ print(result.stderr)
|
|
|
|
|
+ except Exception as e:
|
|
|
|
|
+ print(f"✗ nvidia-smi 错误: {e}")
|
|
|
|
|
+
|
|
|
|
|
+ # 检查NVML
|
|
|
|
|
+ try:
|
|
|
|
|
+ import pynvml
|
|
|
|
|
+ pynvml.nvmlInit()
|
|
|
|
|
+ driver_version = pynvml.nvmlSystemGetDriverVersion()
|
|
|
|
|
+ print(f"✓ NVML初始化成功,驱动版本: {driver_version}")
|
|
|
|
|
+
|
|
|
|
|
+ device_count = pynvml.nvmlDeviceGetCount()
|
|
|
|
|
+ print(f"✓ 检测到 {device_count} 个GPU设备")
|
|
|
|
|
+
|
|
|
|
|
+ for i in range(device_count):
|
|
|
|
|
+ handle = pynvml.nvmlDeviceGetHandleByIndex(i)
|
|
|
|
|
+ name = pynvml.nvmlDeviceGetName(handle)
|
|
|
|
|
+ print(f" GPU {i}: {name}")
|
|
|
|
|
+
|
|
|
|
|
+ except Exception as e:
|
|
|
|
|
+ print(f"✗ NVML错误: {e}")
|
|
|
|
|
+
|
|
|
|
|
+ # 检查PaddlePaddle
|
|
|
|
|
+ try:
|
|
|
|
|
+ import paddle
|
|
|
|
|
+ print(f"✓ PaddlePaddle版本: {paddle.__version__}")
|
|
|
|
|
+ print(f"✓ CUDA编译支持: {paddle.device.is_compiled_with_cuda()}")
|
|
|
|
|
+ if paddle.device.is_compiled_with_cuda():
|
|
|
|
|
+ gpu_count = paddle.device.cuda.device_count()
|
|
|
|
|
+ print(f"✓ PaddlePaddle检测到 {gpu_count} 个GPU")
|
|
|
|
|
+ except Exception as e:
|
|
|
|
|
+ print(f"✗ PaddlePaddle错误: {e}")
|
|
|
|
|
+
|
|
|
|
|
+if __name__ == "__main__":
|
|
|
|
|
+ check_nvidia_environment()
|