* Added variants of node status to display (still work in progress).
This commit is contained in:
@@ -1,49 +1,109 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
# 20151028
|
# 20151028
|
||||||
|
#
|
||||||
|
# Note: original extraction command on turing:
|
||||||
function node_slot_stats_per_machine_type()
|
|
||||||
# Original extraction command on turing:
|
|
||||||
#
|
#
|
||||||
# qstat -f | grep -ve '^[-# ]' -e '^queuename' | less
|
# qstat -f | grep -ve '^[-# ]' -e '^queuename' | less
|
||||||
#
|
#
|
||||||
# FIXME: If a machine is covered by more than one queue, this will cause the counts
|
|
||||||
# to be overestimated.
|
: ${optShowDisabledNodes=0}
|
||||||
|
: ${optPrintRaw=0}
|
||||||
|
|
||||||
|
|
||||||
|
function node_slot_stats_raw()
|
||||||
|
# Prints the node stats from `qstat -f' in raw format:
|
||||||
|
# - not printing disabled nodes
|
||||||
|
# - not showing the computational jobs that are running on these nodes
|
||||||
{
|
{
|
||||||
qstat -f \
|
qstat -f \
|
||||||
| gawk '
|
| gawk -v optShowDisabledNodes="$optShowDisabledNodes" \
|
||||||
|
'
|
||||||
BEGIN {
|
BEGIN {
|
||||||
STDERR = "/dev/stderr"
|
STDERR = "/dev/stderr"
|
||||||
}
|
}
|
||||||
|
FNR == 1 && $1 == "queuename" { print; next; }
|
||||||
|
|
||||||
|
# Valid host status field
|
||||||
|
($0 ~ /^[A-Za-z]/) && (NF == 5 || NF == 6) && (optShowDisabledNodes!=0 || ($6 !~ /d/)) {
|
||||||
|
print
|
||||||
|
}
|
||||||
|
'
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
function node_slot_stats_per_machine_type()
|
||||||
|
# Prints status of slot availability per machine type (defined as
|
||||||
|
# host with the same base hostname (e.g. "c6-", or "c8-").
|
||||||
|
# Originally implemented based on the naming of hosts on Turing cluster.
|
||||||
|
#
|
||||||
|
# Example output: (changes depending on what's disabled and the load of the cluster)
|
||||||
|
#
|
||||||
|
# MACHTYPE NODE CORES used free resv
|
||||||
|
# c6 15 240 77 163 0
|
||||||
|
# c8 40 768 569 199 0
|
||||||
|
# cr 74 1480 988 492 0
|
||||||
|
# crhimem 3 96 0 96 0
|
||||||
|
# crphi 10 200 48 152 0
|
||||||
|
# d430 49 1568 1292 276 0
|
||||||
|
# d730 10 280 10 270 0
|
||||||
|
#
|
||||||
|
# FIXME: If a machine is covered by more than one queue, this will cause the counts
|
||||||
|
# to be overestimated. Must register if a machine has been encountered and not
|
||||||
|
# re-account that machine.
|
||||||
|
{
|
||||||
|
qstat -f | _Process_node_slot_stats_per_machine_type
|
||||||
|
}
|
||||||
|
|
||||||
|
function _Process_node_slot_stats_per_machine_type()
|
||||||
|
# Processing part of the routine above.
|
||||||
|
{
|
||||||
|
gawk \
|
||||||
|
-v optShowDisabledNodes="$optShowDisabledNodes" \
|
||||||
|
-v optPrintRaw="$optPrintRaw" \
|
||||||
|
'####
|
||||||
|
BEGIN {
|
||||||
|
STDERR = "/dev/stderr"
|
||||||
|
hostnames_seen[-1234] = 0
|
||||||
|
}
|
||||||
|
|
||||||
FNR == 1 && $1 == "queuename" { next; }
|
FNR == 1 && $1 == "queuename" { next; }
|
||||||
|
|
||||||
# Valid host status field
|
# Valid host status field
|
||||||
($0 ~ /^[A-Za-z]/) && (NF == 5 || NF == 6) {
|
($0 ~ /^[A-Za-z]/) && (NF == 5 || NF == 6) {
|
||||||
#print($0)
|
|
||||||
queue_node = $1
|
queue_node = $1
|
||||||
core_usage_combo = $3
|
core_usage_combo = $3
|
||||||
states = $6 # if any
|
states = $6 # if any
|
||||||
|
|
||||||
# skip disabled hosts
|
# skip disabled hosts
|
||||||
if (states ~ /d/) next;
|
if (states ~ /d/ && (optShowDisabledNodes==0)) next;
|
||||||
|
|
||||||
|
if (optPrintRaw != 0) print($0)
|
||||||
|
|
||||||
# gawk extension of match:
|
# gawk extension of match:
|
||||||
if (! match(queue_node, /^([^@]+)@([^-]+)-(.*)$/, Strs))
|
if (match(queue_node, /^([^@]+)@([^-]+)-(.*)$/, Strs))
|
||||||
{
|
|
||||||
print("Invalid queue/host combo: " queue_node) > STDERR
|
|
||||||
next
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
{
|
||||||
queue = Strs[1]
|
queue = Strs[1]
|
||||||
hostkind = Strs[2]
|
hostkind = Strs[2]
|
||||||
hostnum = Strs[3]
|
hostnum = Strs[3]
|
||||||
|
hostname = hostkind "-" hostnum
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
print("Invalid queue/host combo: " queue_node) > STDERR
|
||||||
|
next
|
||||||
}
|
}
|
||||||
split(core_usage_combo, Strs, "/")
|
split(core_usage_combo, Strs, "/")
|
||||||
slots_resv = Strs[1]
|
slots_resv = Strs[1]
|
||||||
slots_used = Strs[2]
|
slots_used = Strs[2]
|
||||||
slots_tot = Strs[3]
|
slots_tot = Strs[3]
|
||||||
|
|
||||||
|
# Avoiding double counting:
|
||||||
|
if (hostname in hostname_seen)
|
||||||
|
{
|
||||||
|
print("Host already seen: " hostname) > STDERR
|
||||||
|
next
|
||||||
|
}
|
||||||
|
|
||||||
mach_node_count[hostkind] = mach_node_count[hostkind] + 1
|
mach_node_count[hostkind] = mach_node_count[hostkind] + 1
|
||||||
mach_node_slot_count[hostkind] = slots_tot # assume homogenous! This DOES NOT work with c8-type nodes!
|
mach_node_slot_count[hostkind] = slots_tot # assume homogenous! This DOES NOT work with c8-type nodes!
|
||||||
mach_slots_tot[hostkind] = mach_slots_tot[hostkind] + slots_tot
|
mach_slots_tot[hostkind] = mach_slots_tot[hostkind] + slots_tot
|
||||||
@@ -76,7 +136,36 @@ function report_node_stats()
|
|||||||
END {
|
END {
|
||||||
report_node_stats()
|
report_node_stats()
|
||||||
}
|
}
|
||||||
'
|
' \
|
||||||
|
"$@"
|
||||||
}
|
}
|
||||||
|
|
||||||
node_slot_stats_per_machine_type
|
function node_slot_stats_per_machine_type_f()
|
||||||
|
{
|
||||||
|
_Process_node_slot_stats_per_machine_type "$1"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
case "$1" in
|
||||||
|
(--raw|raw)
|
||||||
|
node_slot_stats_raw
|
||||||
|
;;
|
||||||
|
(--stats|stats|"")
|
||||||
|
if [ "$2" ]; then
|
||||||
|
node_slot_stats_per_machine_type_f "$2"
|
||||||
|
else
|
||||||
|
node_slot_stats_per_machine_type
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
(--stats-with-disabled|stats-with-disabled)
|
||||||
|
if [ "$2" ]; then
|
||||||
|
optShowDisabledNodes=1 node_slot_stats_per_machine_type_f "$2"
|
||||||
|
else
|
||||||
|
optShowDisabledNodes=1 node_slot_stats_per_machine_type
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
(*)
|
||||||
|
echo "Unknown action: $1" >&2
|
||||||
|
exit 2
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|||||||
Reference in New Issue
Block a user