-
Notifications
You must be signed in to change notification settings - Fork 18
/
Copy pathgpu-usage-by-node
executable file
·61 lines (49 loc) · 1.87 KB
/
gpu-usage-by-node
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#!/bin/bash
nodes=`sinfo --format="%N" --noheader`
header=false
pretty=false
print_usage () {
cat << EOM
Usage: $0 [-h | -H] [-p] [-n nodelist]
Print a summary of GPU usage for each node.
Arguments:
-n (optional) specify nodes, either comma separated list, or summarised.
Mirrors slurm command -n usage in sinfo, and -w usage for squeue."
-h (optional) do not print header (default behaviour)
-H (optional) print header (note that lowercase -h is no-header for gpu-usage)
-p (optional) make output pretty, ignores -H and -h if specified
Output:
Summarises the current GPU usage for each node:
-- The 'in_use' column shows the number of GPUs on the node that are currently allocated to a job.
-- The 'usable' column shows the number of GPUs on the node that are not in an error state or otherwise unable
to be allocated to a job.
-- The 'total' column shows the total number of GPUs on the node, including those that are currently unable to
be allocated to a job.
The number of free GPUs (GPUs not allocated to a job and not in an error state) on a node is the difference between
the 'usable' column and the 'in_use' column.
The output is printed in CSV format by default, with the -p option output is printed as a table.
EOM
}
while getopts 'n:hHp' flag; do
case "${flag}" in
n) nodes="${OPTARG}" ;;
h) header=false ;;
H) header=true ;;
p) pretty=true ;;
*) print_usage
exit ;;
esac
done
if [ "$pretty" = true ]; then
$0 -H -n ${nodes} | column -t -s','
exit 0
fi
node_list=`sinfo --format="%n" --noheader -n ${nodes} | sort`
dt=$(date '+%d/%m/%Y %H:%M:%S');
this_dir=$(dirname "$0")
if [ "$header" = true ]; then
echo "datetime,nodename,in_use,usable,total,free"
fi
for node in ${node_list}; do
echo "${dt},${node},`${this_dir}/gpu-usage -h -n $node`"
done