* Added tools to dump compute node info in batch.
This commit is contained in:
156
sge/dump-cluster-info.py
Executable file
156
sge/dump-cluster-info.py
Executable file
@@ -0,0 +1,156 @@
|
||||
#!/usr/bin/env python
|
||||
#
|
||||
# 20160826
|
||||
# Wirawan Purwanto
|
||||
#
|
||||
# A tool that dumps every possibly imaginable info I want to get from
|
||||
# a SGE-managed cluster.
|
||||
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
|
||||
def pipe_out(args, split=False, shell=False):
|
||||
"""Executes a shell command, piping out the stdout to python for parsing.
|
||||
This is my customary shortcut for backtick operator.
|
||||
The result is either a single string (if split==False) or a list of strings
|
||||
with EOLs removed (if split==True)."""
|
||||
retval = subprocess.Popen(args, stdout=subprocess.PIPE, shell=shell).communicate()[0]
|
||||
if not split:
|
||||
return retval
|
||||
else:
|
||||
return retval.splitlines()
|
||||
|
||||
|
||||
class pipe_in(object):
|
||||
"""Executes a shell command, piping in the stdin from python for driving.
|
||||
This is the reverse of pipe_out.
|
||||
Commands are given through file-like write() or writelines() methods."""
|
||||
def __init__(self, args, shell=False):
|
||||
self.px = subprocess.Popen(args, stdin=subprocess.PIPE, shell=shell)
|
||||
self.args = args
|
||||
def write(self, line):
|
||||
self.px.stdin.write(line)
|
||||
def writelines(self, lines):
|
||||
for line in lines:
|
||||
self.write(line)
|
||||
def flush(self):
|
||||
self.px.stdin.flush()
|
||||
def close(self):
|
||||
self.px.stdin.close()
|
||||
|
||||
|
||||
def errchk(cmd, args, retcode):
|
||||
"""Checking for error after the invocation of an external command."""
|
||||
if retcode == 0: return
|
||||
|
||||
print >>sys.stderr, "Error executing ", cmd, " ".join(args)
|
||||
if retcode < 0:
|
||||
err = "Command %s was terminated by signal %d" % (cmd, -retcode)
|
||||
else:
|
||||
err = "Command %s returned %d" % (cmd, retcode)
|
||||
raise RuntimeError, err
|
||||
|
||||
|
||||
class sh(object):
|
||||
@staticmethod
|
||||
def run(prg, args):
|
||||
retcode = subprocess.call((prg,) + tuple(args))
|
||||
errchk(prg, args, retcode)
|
||||
return 0
|
||||
|
||||
|
||||
|
||||
globals().setdefault("NODE_LIST", [])
|
||||
globals().setdefault("NODE_BAD_LIST", set())
|
||||
|
||||
|
||||
def get_node_list():
|
||||
"""Reads node list from SGE configuration."""
|
||||
node_list = pipe_out(("qconf", "-sel"), split=True)
|
||||
return node_list
|
||||
|
||||
|
||||
def node_list():
|
||||
global NODE_LIST
|
||||
if not NODE_LIST:
|
||||
NODE_LIST = get_node_list()
|
||||
|
||||
return NODE_LIST
|
||||
|
||||
|
||||
def rhost_pipe_out(host, cmdline, split=False):
|
||||
cmdline_full = ["ssh", "-o", "PreferredAuthentications=publickey", host] \
|
||||
+ (list(cmdline) if not isinstance(cmdline, basestring) else cmdline.split())
|
||||
rslt = pipe_out(cmdline_full, split=split)
|
||||
return rslt
|
||||
|
||||
|
||||
def rhost_run(host, cmdline):
|
||||
cmdline_full = ["ssh", "-o", "PreferredAuthentications=publickey", host] \
|
||||
+ (list(cmdline) if not isinstance(cmdline, basestring) else cmdline.split())
|
||||
rslt = sh.run(cmdline_full[0], cmdline_full[1:])
|
||||
return rslt
|
||||
|
||||
|
||||
def rhosts_pipe_out(cmdline, filename, hosts=None, rootdir="cluster-info"):
|
||||
"""Executes cmdline on each remote host (the list is given in and
|
||||
"""
|
||||
from os.path import dirname, join, isdir
|
||||
path_join = join
|
||||
Verb = 100
|
||||
if hosts is None:
|
||||
hosts = node_list()
|
||||
for H in hosts:
|
||||
host_base = H.split(".")[0]
|
||||
outfname = path_join(rootdir, host_base, filename)
|
||||
outdir = dirname(outfname)
|
||||
if not isdir(outdir):
|
||||
os.makedirs(outdir)
|
||||
if Verb >= 1:
|
||||
print(" exec: %s %s" % (H, cmdline))
|
||||
out = rhost_pipe_out(H, cmdline, split=False)
|
||||
with open(outfname, "w") as F:
|
||||
F.write(out)
|
||||
|
||||
|
||||
def test_accessible_hosts(hosts=None):
|
||||
"""Tests ssh connectivity for all the hosts and return a two-tuple
|
||||
containing lists of good and inaccessible hosts, respectively."""
|
||||
from os.path import dirname, join, isdir
|
||||
path_join = join
|
||||
Verb = 100
|
||||
if hosts is None:
|
||||
hosts = node_list()
|
||||
good_hosts = []
|
||||
bad_hosts = []
|
||||
for H in hosts:
|
||||
host_base = H.split(".")[0]
|
||||
msg_send = "Success login from host " + host_base
|
||||
msg_recv = rhost_pipe_out(H, ("echo", msg_send))
|
||||
if msg_send == msg_recv.rstrip():
|
||||
good_hosts.append(H)
|
||||
else:
|
||||
bad_hosts.append(H)
|
||||
return good_hosts, bad_hosts
|
||||
|
||||
|
||||
# Below are the main gather tools
|
||||
|
||||
def gather_cpuinfo(hosts=None):
|
||||
"""Gather tool: for cpuinfo"""
|
||||
rhosts_pipe_out(("cat", "/proc/cpuinfo"), "cpuinfo.txt", hosts=hosts)
|
||||
|
||||
|
||||
def gather_lspci(hosts=None):
|
||||
"""Gather tool: for lspci"""
|
||||
rhosts_pipe_out(("lspci"), "lspci.txt", hosts=hosts)
|
||||
|
||||
def gather_free(hosts=None):
|
||||
"""Gather tool: for free"""
|
||||
rhosts_pipe_out(("free"), "free.txt", hosts=hosts)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user