3 tháng trước cách đây · 657c41ae11
--- a/zhch/check_cuda_env.py
+++ b/zhch/check_cuda_env.py
@@ -0,0 +1,54 @@
 
															+import subprocess
														
 
															+import sys
														
 
															+
														
 
															+def check_nvidia_environment():
														
 
															+    print("=== NVIDIA环境检查 ===")
														
 
															+    
														
 
															+    # 检查nvidia-smi
														
 
															+    try:
														
 
															+        result = subprocess.run(['nvidia-smi'], capture_output=True, text=True, timeout=10)
														
 
															+        if result.returncode == 0:
														
 
															+            print("✓ nvidia-smi 正常运行")
														
 
															+            # 提取驱动版本
														
 
															+            lines = result.stdout.split('\n')
														
 
															+            for line in lines:
														
 
															+                if 'Driver Version:' in line:
														
 
															+                    print(f"  {line.strip()}")
														
 
															+                    break
														
 
															+        else:
														
 
															+            print("✗ nvidia-smi 失败:")
														
 
															+            print(result.stderr)
														
 
															+    except Exception as e:
														
 
															+        print(f"✗ nvidia-smi 错误: {e}")
														
 
															+    
														
 
															+    # 检查NVML
														
 
															+    try:
														
 
															+        import pynvml
														
 
															+        pynvml.nvmlInit()
														
 
															+        driver_version = pynvml.nvmlSystemGetDriverVersion()
														
 
															+        print(f"✓ NVML初始化成功，驱动版本: {driver_version}")
														
 
															+        
														
 
															+        device_count = pynvml.nvmlDeviceGetCount()
														
 
															+        print(f"✓ 检测到 {device_count} 个GPU设备")
														
 
															+        
														
 
															+        for i in range(device_count):
														
 
															+            handle = pynvml.nvmlDeviceGetHandleByIndex(i)
														
 
															+            name = pynvml.nvmlDeviceGetName(handle)
														
 
															+            print(f"  GPU {i}: {name}")
														
 
															+            
														
 
															+    except Exception as e:
														
 
															+        print(f"✗ NVML错误: {e}")
														
 
															+    
														
 
															+    # 检查PaddlePaddle
														
 
															+    try:
														
 
															+        import paddle
														
 
															+        print(f"✓ PaddlePaddle版本: {paddle.__version__}")
														
 
															+        print(f"✓ CUDA编译支持: {paddle.device.is_compiled_with_cuda()}")
														
 
															+        if paddle.device.is_compiled_with_cuda():
														
 
															+            gpu_count = paddle.device.cuda.device_count()
														
 
															+            print(f"✓ PaddlePaddle检测到 {gpu_count} 个GPU")
														
 
															+    except Exception as e:
														
 
															+        print(f"✗ PaddlePaddle错误: {e}")
														
 
															+
														
 
															+if __name__ == "__main__":
														
 
															+    check_nvidia_environment()