zeek/aux/broctl/BroControl/execute.py
Robin Sommer 2b6ad76bd5 Creating a branch release/1.5 with the current 1.5.3 release code.
This is so that people working from the current stable version can
still start using git.
2011-03-09 15:26:01 -08:00

552 lines
16 KiB
Python

# $Id: execute.py 6956 2009-12-14 22:01:17Z robin $
#
# These modules provides a set of functions to execute actions on a host.
# If the host is local, it's done direcly; if it's remote we log in via SSH.
import os
import sys
import socket
import shutil
import re
import util
import time
import subprocess
import config
haveBroccoli = True
try:
import broccoli
except ImportError:
haveBroccoli = False
LocalAddrs = None
# Wrapper around subprocess.POpen()
def popen(cmdline, stderr_to_stdout=False):
stderr = None
if stderr_to_stdout:
stderr = subprocess.STDOUT
# os.setid makes sure that the child process doesn't receive our CTRL-Cs.
proc = subprocess.Popen([cmdline], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=stderr,
close_fds=True, shell=True, preexec_fn=os.setsid)
# Compatibility with older popen4.
proc.tochild = proc.stdin
proc.fromchild = proc.stdout
return proc
# Returns true if given node corresponds to the host we're running on.
def isLocal(node):
global LocalAddrs
if not LocalAddrs:
(success, output) = runLocalCmd(os.path.join(config.Config.scriptsdir, "local-interfaces"))
if not success:
if not config.Config.installing():
util.warn("cannot get list of local IP addresses")
try:
# This does not work for multi-homed hosts.
LocalAddrs = [socket.gethostbyname(socket.gethostname()), "127.0.0.1"]
except:
LocalAddrs = ["127.0.0.1"]
else:
LocalAddrs = [line.strip() for line in output]
util.debug(1, "Local IPs: %s" % ",".join(LocalAddrs))
return not node or node.host == "localhost" or node.addr in LocalAddrs
# Takes list of (node, dir) pairs and ensures the directories exist on the nodes' host.
# Returns list of (node, sucess) pairs.
def mkdirs(dirs):
results = []
cmds = []
fullcmds = []
for (node, dir) in dirs:
# We make local directories directly.
if isLocal(node):
if not exists(node, dir):
util.debug(1, "%-10s %s" % ("[local]", "mkdir %s" % dir))
os.mkdir(dir)
results += [(node, True)]
else:
cmds += [(node, [], [])]
# Need to be careful here as our helper scripts may not be installed yet.
fullcmds += [("test -d %s || mkdir -p %s 2>/dev/null; echo $?; echo ~~~" % (dir, dir))]
for (node, success, output) in runHelperParallel(cmds, fullcmds=fullcmds):
results += [(node, success)]
return results
# Takes list of (node, dir) pairs and ensures the directories exist on the nodes' host.
# Returns list of (node, sucess) pairs.
def mkdir(node, dir):
return mkdirs([(node, dir)])[0][1]
def rmdirs(dirs):
results = []
cmds = []
for (node, dir) in dirs:
# We remove local directories directly.
if isLocal(node):
(success, output) = captureCmd("rm -rf %s" % dir)
results += [(node, success)]
else:
cmds += [(node, "rmdir", [dir])]
for (node, success, output) in runHelperParallel(cmds):
results += [(node, success)]
return results
# Removes the directory on the host if it's there.
def rmdir(node, dir):
return rmdirs([(node, dir)])[0][1]
# Returns true if the path exists on the host.
def exists(host, path):
if isLocal(host):
return os.path.lexists(path)
else:
(success, output) = runHelper(host, "exists", [path])
return success
# Returns true if the path exists and refers to a file on the host.
def isfile(host, path):
if isLocal(host):
return os.path.isfile(path)
else:
util.error("isfile() not yet supported for remote hosts")
# Returns true if the path exists and refers to a directory on the host.
def isdir(host, path):
if isLocal(host):
return os.path.isdir(path)
else:
(success, output) = runHelper(host, "is-dir", [path])
return success
# Copies src to dst, preserving permission bits.
# Works for files and directories (non-recursive).
def install(host, src, dst):
if isLocal(host):
if not exists(host, src):
util.output("file does not exist: %s" % src)
return False
if os.path.isfile(dst):
os.remove(dst)
util.debug(1, "cp %s %s" % (src, dst))
try:
shutil.copy2(src, dst)
except OSError:
# Python 2.6 has a bug where this may fail on NFS. So we just
# ignore errors.
pass
return True
else:
util.error("install() not yet supported for remote hosts")
return False
# rsyns paths from localhost to destination hosts.
def sync(nodes, paths):
cmds = []
for n in nodes:
args = ["-a", "--delete", "--rsh=\"ssh -o ConnectTimeout=30\""]
dst = ["%s:%s/" % (n.host, config.Config.brobase)]
args += paths + dst
cmdline = "rsync %s" % " ".join(args)
cmds += [(n, cmdline, "", None)]
for (id, success, output) in runLocalCmdsParallel(cmds):
if not success:
util.warn("error rsyncing to %s: %s" % (id.host, output))
# Checks whether the given host is alive.
_deadHosts = {}
def isAlive(host):
if host in _deadHosts:
return False
(success, output) = runLocalCmd(os.path.join(config.Config.scriptsdir, "is-alive") + " " + host)
if not success and not config.Config.cron == "1":
_deadHosts[host] = True
util.warn("host %s is not alive" % host)
return success
# Runs command locally and returns tuple (success, output)
# with success being true if the command terminated with exit code 0,
# and output being the combinded stdout/stderr output of the command.
def captureCmd(cmd, env = "", input = None):
cmdline = env + " " + cmd
util.debug(1, "%-10s %s" % ("[local]", cmdline))
proc = popen(cmdline, stderr_to_stdout=True)
if input:
print >>proc.tochild, input
proc.tochild.close()
rc = proc.wait()
output = [line.strip() for line in proc.fromchild]
util.debug(1, "%-10s exit code %d" % ("[local]", os.WEXITSTATUS(rc)))
for line in output:
util.debug(2, " > %s" % line)
return (os.WIFEXITED(rc) and os.WEXITSTATUS(rc) == 0, output)
## FIXME: Replace "captureCmd" with "runLocalCmd".
# Runs command locally and returns tuple (success, output)
# with success being true if the command terminated with exit code 0,
# and output being the combinded stdout/stderr output of the command.
def runLocalCmd(cmd, env = "", input=None):
proc = _runLocalCmdInit("single", cmd, env, input)
if not proc:
return (False, [])
return _runLocalCmdWait(proc)
# Same as above but runs a set of local commands in parallel.
# Cmds is a list of (id, cmd, envs, input) tuples, where id is
# an arbitrary cookie identifying each command.
# Returns a list of (id, success, output) tuples.
# 'output' is None (vs. []) if we couldn't connect to host.
def runLocalCmdsParallel(cmds):
results = []
running = []
for (id, cmd, envs, input) in cmds:
proc = _runLocalCmdInit(id, cmd, envs, input)
if proc:
running += [(id, proc)]
else:
results += [(id, False, None)]
for (id, proc) in running:
status = _runLocalCmdWait(proc)
if status:
(success, output) = status
results += [(id, success, output)]
else:
results += [(id, False, None)]
return results
def _runLocalCmdInit(id, cmd, env, input):
if not env:
env = ""
cmdline = env + " " + cmd
util.debug(1, "%-10s %s" % ("[local]", cmdline))
proc = popen(cmdline, stderr_to_stdout=True)
if input:
print >>proc.tochild, input
proc.tochild.close()
return proc
def stripNL(str):
if len(str) == 0 or str[-1] != "\n":
return str
return str[0:-1]
def _runLocalCmdWait(proc):
rc = proc.wait()
output = [stripNL(line) for line in proc.fromchild]
util.debug(1, "%-10s exit code %d" % ("[local]", os.WEXITSTATUS(rc)))
for line in output:
util.debug(2, " > %s" % line)
return (os.WIFEXITED(rc) and os.WEXITSTATUS(rc) == 0, output)
# Runs a helper script from bin/helpers, according to the helper
# protocol.
# If fullcmd is given, this is the exact & complete command line (incl. paths).
# Otherwise, cmd is just the helper's name (wo/ path) and args are the
# arguments. Env is an optional enviroment variable of the form
# "key=val". Return value as for captureCmd().
# 'output' is None (vs. []) if we couldn't connect to host.
def runHelper(host, cmd=None, args=None, fullcmd=None, env = ""):
util.disableSignals()
try:
status = _runHelperInit(host, cmd, args, fullcmd, env)
if not status:
return (False, None)
status = _runHelperWait(status)
if not status:
return (False, None)
return status
finally:
util.enableSignals()
# Same as above but runs commands on a set of hosts in parallel.
# Cmds is a list of (node, cmd, args) tuples.
# Fullcmds, if given, is a parallel list of full command lines.
# Envs, if given, is a parallel list of env variables.
# Returns a list of (node, success, output) tuples.
# 'output' is None (vs. []) if we couldn't connect to host.
def runHelperParallel(cmds, fullcmds = None, envs = None):
util.disableSignals()
try:
results = []
running = []
for (node, cmd, args) in cmds:
if fullcmds:
fullcmd = fullcmds[0]
fullcmds = fullcmds[1:]
else:
fullcmd = ""
if envs:
env = envs[0]
envs = envs[1:]
else:
env = ""
status = _runHelperInit(node, cmd, args, fullcmd, env)
if status:
running += [node]
else:
results += [(node, False, None)]
for node in running:
status = _runHelperWait(node)
if status:
(success, output) = status
results += [(node, success, output)]
else:
results += [(node, False, None)]
return results
finally:
util.enableSignals()
# Helpers for running helpers.
#
# We keep the SSH sessions open across calls to runHelper.
Connections = {}
WhoAmI = None
# FIXME: This is an ugly hack. The __del__ method produces
# strange unhandled exceptions in the child at termination
# of the main process. Not sure if disabling the cleanup
# altogether is a good thing but right now that's the
# only fix I can come up with.
def _emptyDel(self):
pass
subprocess.Popen.__del__ = _emptyDel
def _getConnection(host):
global WhoAmI
if not WhoAmI:
(success, output) = captureCmd("whoami")
if not success:
util.error("can't get 'whoami'")
WhoAmI = output[0]
if not host:
host = config.Config.manager()
if host.tag in Connections:
p = Connections[host.tag]
if p.poll() != None:
# Terminated.
global _deadHosts
_deadHosts[host.host] = True
util.warn("connection to %s broke" % host.host)
return None
return (p.stdin, p.stdout)
if isLocal(host):
cmdline = "sh"
else:
# Check whether host is alive.
if not isAlive(host.host):
return None
cmdline = "ssh -o ConnectTimeout=30 -l %s %s sh" % (WhoAmI, host.host)
util.debug(1, "%-10s %s" % ("[local]", cmdline))
try:
p = popen(cmdline)
except OSError, e:
util.warn("cannot login into %s [IOError: %s]" % (host.host, e))
return None
Connections[host.tag] = p
return (p.stdin, p.stdout)
def _runHelperInit(host, cmd, args, fullcmd, env):
c = _getConnection(host)
if not c:
return None
(stdin, stdout) = c
if not fullcmd:
cmdline = "%s %s %s" % (env, os.path.join(config.Config.helperdir, cmd), " ".join(args))
else:
cmdline = fullcmd
util.debug(1, "%-10s %s" % (("[%s]" % host.host), cmdline))
print >>stdin, cmdline
stdin.flush()
return host
def _runHelperWait(host):
output = []
while True:
c = _getConnection(host)
if not c:
return None
(stdin, stdout) = c
line = stdout.readline().strip()
if line == "~~~":
break
output += [line]
try:
rc = int(output[0])
except ValueError:
util.warn("cannot parse exit code from helper on %s: %s" % (host.host, output[0]))
rc = 1
util.debug(1, "%-10s exit code %d" % (("[%s]" % host.host), rc))
for line in output:
util.debug(2, " > %s" % line)
return (rc == 0, output[1:])
# Broccoli communication with running nodes.
# Sends event to a set of nodes in parallel.
#
# events is a list of tuples of the form (node, event, args, result_event).
# node: the destination node.
# event: the name of the event to send (node that receiver must subscribe to it as well).
# args: a list of event args; each arg must be a data type understood by the Broccoli module.
# result_event: name of a event the node sends back. None if no event is sent back.
#
# Returns a list of tuples (node, success, results_args).
# If success is True, result_args is a list of arguments as shipped with the result event,
# or [] if no result_event was specified.
# If success is False, results_args is a string with an error message.
def sendEventsParallel(events):
results = []
sent = []
for (node, event, args, result_event) in events:
if not haveBroccoli:
results += [(node, False, "no Python bindings for Broccoli installed")]
continue
(success, bc) = _sendEventInit(node, event, args, result_event)
if success and result_event:
sent += [(node, result_event, bc)]
else:
results += [(node, success, bc)]
for (node, result_event, bc) in sent:
(success, result_args) = _sendEventWait(node, result_event, bc)
results += [(node, success, result_args)]
return results
def _sendEventInit(node, event, args, result_event):
try:
bc = broccoli.Connection("%s:%d" % (node.addr, node.getPort()), broclass="update",
flags=broccoli.BRO_CFLAG_ALWAYS_QUEUE, connect=False)
bc.subscribe(result_event, _event_callback(bc))
bc.got_result = False
bc.connect()
except IOError, e:
util.debug(1, "%-10s broccoli: cannot connect" % (("[%s]" % node.tag)))
return (False, str(e))
util.debug(1, "%-10s broccoli: %s(%s)" % (("[%s]" % node.tag), event, ", ".join(args)))
bc.send(event, *args)
return (True, bc)
def _sendEventWait(node, result_event, bc):
# Wait until we have sent the event out.
cnt = 0
while bc.processInput():
time.sleep(1)
cnt += 1
if cnt > 10:
util.debug(1, "%-10s broccoli: timeout during send" % (("[%s]" % node.tag)))
return (False, "time-out")
if not result_event:
return (True, [])
# Wait for reply event.
cnt = 0
bc.processInput();
while not bc.got_result:
time.sleep(1)
bc.processInput();
cnt += 1
if cnt > 10:
util.debug(1, "%-10s broccoli: timeout during receive" % (("[%s]" % node.tag)))
return (False, "time-out")
util.debug(1, "%-10s broccoli: %s(%s)" % (("[%s]" % node.tag), result_event, ", ".join(bc.result_args)))
return (True, bc.result_args)
def _event_callback(bc):
def save_results(*args):
bc.got_result = True
bc.result_args = args
return save_results