utils/gpu: add support of several GPU
This commit is contained in:
parent
133643fc6d
commit
883ff41e60
@ -25,7 +25,7 @@ class GPUMemory(IntervalModule):
|
|||||||
("warn_color", "defines the color used when warn percentage is exceeded"),
|
("warn_color", "defines the color used when warn percentage is exceeded"),
|
||||||
("alert_color", "defines the color used when alert percentage is exceeded"),
|
("alert_color", "defines the color used when alert percentage is exceeded"),
|
||||||
("round_size", "defines number of digits in round"),
|
("round_size", "defines number of digits in round"),
|
||||||
|
("gpu_number", "set the gpu number when you have several GPU"),
|
||||||
)
|
)
|
||||||
|
|
||||||
format = "{avail_mem} MiB"
|
format = "{avail_mem} MiB"
|
||||||
@ -36,9 +36,10 @@ class GPUMemory(IntervalModule):
|
|||||||
warn_percentage = 50
|
warn_percentage = 50
|
||||||
alert_percentage = 80
|
alert_percentage = 80
|
||||||
round_size = 1
|
round_size = 1
|
||||||
|
gpu_number = 0
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
info = gpu.query_nvidia_smi()
|
info = gpu.query_nvidia_smi(self.gpu_number)
|
||||||
|
|
||||||
if info.used_mem is not None and info.total_mem is not None:
|
if info.used_mem is not None and info.total_mem is not None:
|
||||||
mem_percent = 100 * info.used_mem / info.total_mem
|
mem_percent = 100 * info.used_mem / info.total_mem
|
||||||
|
@ -16,6 +16,7 @@ class GPUTemperature(IntervalModule):
|
|||||||
settings = (
|
settings = (
|
||||||
("format", "format string used for output. {temp} is the temperature in integer degrees celsius"),
|
("format", "format string used for output. {temp} is the temperature in integer degrees celsius"),
|
||||||
("display_if", "snippet that gets evaluated. if true, displays the module output"),
|
("display_if", "snippet that gets evaluated. if true, displays the module output"),
|
||||||
|
("gpu_number", "set the gpu number when you have several GPU"),
|
||||||
"color",
|
"color",
|
||||||
"alert_temp",
|
"alert_temp",
|
||||||
"alert_color",
|
"alert_color",
|
||||||
@ -25,9 +26,10 @@ class GPUTemperature(IntervalModule):
|
|||||||
alert_temp = 90
|
alert_temp = 90
|
||||||
alert_color = "#FF0000"
|
alert_color = "#FF0000"
|
||||||
display_if = 'True'
|
display_if = 'True'
|
||||||
|
gpu_number = 0
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
temp = gpu.query_nvidia_smi().temp
|
temp = gpu.query_nvidia_smi(self.gpu_number).temp
|
||||||
temp_alert = temp is None or temp >= self.alert_temp
|
temp_alert = temp is None or temp >= self.alert_temp
|
||||||
|
|
||||||
if eval(self.display_if):
|
if eval(self.display_if):
|
||||||
|
@ -6,7 +6,7 @@ GPUUsageInfo = namedtuple('GPUUsageInfo', ['total_mem', 'avail_mem', 'used_mem',
|
|||||||
'usage_gpu', 'usage_mem'])
|
'usage_gpu', 'usage_mem'])
|
||||||
|
|
||||||
|
|
||||||
def query_nvidia_smi() -> GPUUsageInfo:
|
def query_nvidia_smi(gpu_number) -> GPUUsageInfo:
|
||||||
"""
|
"""
|
||||||
:return:
|
:return:
|
||||||
all memory fields are in megabytes,
|
all memory fields are in megabytes,
|
||||||
@ -34,7 +34,7 @@ def query_nvidia_smi() -> GPUUsageInfo:
|
|||||||
except subprocess.CalledProcessError:
|
except subprocess.CalledProcessError:
|
||||||
raise Exception("nvidia-smi call failed")
|
raise Exception("nvidia-smi call failed")
|
||||||
|
|
||||||
output = output.decode('utf-8').strip()
|
output = output.decode('utf-8').split("\n")[gpu_number].strip()
|
||||||
values = output.split(", ")
|
values = output.split(", ")
|
||||||
|
|
||||||
# If value contains 'not' - it is not supported for this GPU (in fact, for now nvidia-smi returns '[Not Supported]')
|
# If value contains 'not' - it is not supported for this GPU (in fact, for now nvidia-smi returns '[Not Supported]')
|
||||||
|
Loading…
Reference in New Issue
Block a user