utils/gpu: add support of several GPU

This commit is contained in:
Mathis FELARDOS 2017-02-16 12:36:32 -08:00 committed by enkore
parent 133643fc6d
commit 883ff41e60
3 changed files with 8 additions and 5 deletions

View File

@ -25,7 +25,7 @@ class GPUMemory(IntervalModule):
("warn_color", "defines the color used when warn percentage is exceeded"), ("warn_color", "defines the color used when warn percentage is exceeded"),
("alert_color", "defines the color used when alert percentage is exceeded"), ("alert_color", "defines the color used when alert percentage is exceeded"),
("round_size", "defines number of digits in round"), ("round_size", "defines number of digits in round"),
("gpu_number", "set the gpu number when you have several GPU"),
) )
format = "{avail_mem} MiB" format = "{avail_mem} MiB"
@ -36,9 +36,10 @@ class GPUMemory(IntervalModule):
warn_percentage = 50 warn_percentage = 50
alert_percentage = 80 alert_percentage = 80
round_size = 1 round_size = 1
gpu_number = 0
def run(self): def run(self):
info = gpu.query_nvidia_smi() info = gpu.query_nvidia_smi(self.gpu_number)
if info.used_mem is not None and info.total_mem is not None: if info.used_mem is not None and info.total_mem is not None:
mem_percent = 100 * info.used_mem / info.total_mem mem_percent = 100 * info.used_mem / info.total_mem

View File

@ -16,6 +16,7 @@ class GPUTemperature(IntervalModule):
settings = ( settings = (
("format", "format string used for output. {temp} is the temperature in integer degrees celsius"), ("format", "format string used for output. {temp} is the temperature in integer degrees celsius"),
("display_if", "snippet that gets evaluated. if true, displays the module output"), ("display_if", "snippet that gets evaluated. if true, displays the module output"),
("gpu_number", "set the gpu number when you have several GPU"),
"color", "color",
"alert_temp", "alert_temp",
"alert_color", "alert_color",
@ -25,9 +26,10 @@ class GPUTemperature(IntervalModule):
alert_temp = 90 alert_temp = 90
alert_color = "#FF0000" alert_color = "#FF0000"
display_if = 'True' display_if = 'True'
gpu_number = 0
def run(self): def run(self):
temp = gpu.query_nvidia_smi().temp temp = gpu.query_nvidia_smi(self.gpu_number).temp
temp_alert = temp is None or temp >= self.alert_temp temp_alert = temp is None or temp >= self.alert_temp
if eval(self.display_if): if eval(self.display_if):

View File

@ -6,7 +6,7 @@ GPUUsageInfo = namedtuple('GPUUsageInfo', ['total_mem', 'avail_mem', 'used_mem',
'usage_gpu', 'usage_mem']) 'usage_gpu', 'usage_mem'])
def query_nvidia_smi() -> GPUUsageInfo: def query_nvidia_smi(gpu_number) -> GPUUsageInfo:
""" """
:return: :return:
all memory fields are in megabytes, all memory fields are in megabytes,
@ -34,7 +34,7 @@ def query_nvidia_smi() -> GPUUsageInfo:
except subprocess.CalledProcessError: except subprocess.CalledProcessError:
raise Exception("nvidia-smi call failed") raise Exception("nvidia-smi call failed")
output = output.decode('utf-8').strip() output = output.decode('utf-8').split("\n")[gpu_number].strip()
values = output.split(", ") values = output.split(", ")
# If value contains 'not' - it is not supported for this GPU (in fact, for now nvidia-smi returns '[Not Supported]') # If value contains 'not' - it is not supported for this GPU (in fact, for now nvidia-smi returns '[Not Supported]')