import subprocess import sys def check_nvidia_environment(): print("=== NVIDIA环境检查 ===") # 检查nvidia-smi try: result = subprocess.run(['nvidia-smi'], capture_output=True, text=True, timeout=10) if result.returncode == 0: print("✓ nvidia-smi 正常运行") # 提取驱动版本 lines = result.stdout.split('\n') for line in lines: if 'Driver Version:' in line: print(f" {line.strip()}") break else: print("✗ nvidia-smi 失败:") print(result.stderr) except Exception as e: print(f"✗ nvidia-smi 错误: {e}") # 检查NVML try: import pynvml pynvml.nvmlInit() driver_version = pynvml.nvmlSystemGetDriverVersion() print(f"✓ NVML初始化成功,驱动版本: {driver_version}") device_count = pynvml.nvmlDeviceGetCount() print(f"✓ 检测到 {device_count} 个GPU设备") for i in range(device_count): handle = pynvml.nvmlDeviceGetHandleByIndex(i) name = pynvml.nvmlDeviceGetName(handle) print(f" GPU {i}: {name}") except Exception as e: print(f"✗ NVML错误: {e}") # 检查PaddlePaddle try: import paddle print(f"✓ PaddlePaddle版本: {paddle.__version__}") print(f"✓ CUDA编译支持: {paddle.device.is_compiled_with_cuda()}") if paddle.device.is_compiled_with_cuda(): gpu_count = paddle.device.cuda.device_count() print(f"✓ PaddlePaddle检测到 {gpu_count} 个GPU") except Exception as e: print(f"✗ PaddlePaddle错误: {e}") if __name__ == "__main__": check_nvidia_environment()