system.py 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102
  1. # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import sys
  15. import os
  16. import psutil
  17. import platform
  18. def pkill(pid):
  19. try:
  20. parent = psutil.Process(pid)
  21. for child in parent.children(recursive=True):
  22. child.kill()
  23. parent.kill()
  24. except:
  25. print("Try to kill process {} failed.".format(pid))
  26. def get_system_info(machine_info={}):
  27. if machine_info:
  28. return {'status': 1, 'info': machine_info}
  29. from .utils import get_gpu_info
  30. gpu_info, message = get_gpu_info()
  31. cpu_num = os.environ.get('CPU_NUM', 1)
  32. sysstr = platform.system()
  33. machine_info['message'] = message
  34. machine_info['cpu_num'] = cpu_num
  35. machine_info['gpu_num'] = gpu_info['gpu_num']
  36. machine_info['sysstr'] = sysstr
  37. if gpu_info['gpu_num'] > 0:
  38. machine_info['driver_version'] = gpu_info['driver_version']
  39. machine_info['gpu_free_mem'] = gpu_info['mem_free']
  40. return {'status': 1, 'info': machine_info}
  41. def get_gpu_memory_info(machine_info):
  42. gpu_mem_infos = list()
  43. if machine_info['gpu_num'] == 0:
  44. pass
  45. else:
  46. from .utils import get_gpu_info
  47. gpu_info, message = get_gpu_info()
  48. for i in range(gpu_info['gpu_num']):
  49. attr = {
  50. 'free': gpu_info['mem_free'][i],
  51. 'used': gpu_info['mem_used'][i],
  52. 'total': gpu_info['mem_total'][i]
  53. }
  54. gpu_mem_infos.append(attr)
  55. return {'status': 1, 'gpu_mem_infos': gpu_mem_infos}
  56. def get_machine_info(data, machine_info):
  57. path = None
  58. if "path" in data:
  59. path = data['path']
  60. if path in machine_info:
  61. return {'status': 1, 'info': machine_info}
  62. from .utils import get_machine_info
  63. info = get_machine_info(path)
  64. machine_info = info
  65. return {'status': 1, 'info': machine_info}
  66. def get_gpu_memory_size(data):
  67. """获取显存大小
  68. Args:
  69. request(comm.Request): 其中request.params为dict, key包括
  70. 'path' 显卡驱动动态链接库路径
  71. """
  72. from .utils import PyNvml
  73. p = PyNvml()
  74. p.nvml_init(data['path'])
  75. count = p.nvml_device_get_count()
  76. gpu_mem_infos = []
  77. for i in range(count):
  78. handler = p.nvml_device_get_handle_by_index(i)
  79. mem = p.nvml_device_get_memory_info(handler)
  80. attr = {'free': mem.free, 'used': mem.used, 'total': mem.total}
  81. gpu_mem_infos.append(attr)
  82. return {'status': 1, 'gpu_mem_infos': gpu_mem_infos}
  83. def exit_system(monitored_processes):
  84. while not monitored_processes.empty():
  85. pid = monitored_processes.get(timeout=0.5)
  86. print("Try to kill process {}".format(pid))
  87. pkill(pid)
  88. return {'status': 1}