zeek/aux/broctl/BroControl/execute.py

# $Id: execute.py 6956 2009-12-14 22:01:17Z robin $
#
# These modules provides a set of functions to execute actions on a host.
# If the host is local, it's done direcly; if it's remote we log in via SSH.

import os
import sys
import socket
import shutil
import re
import util
import time
import subprocess

import config

haveBroccoli = True

try:
    import broccoli
except ImportError:
    haveBroccoli = False

LocalAddrs = None

# Wrapper around subprocess.POpen()
def popen(cmdline, stderr_to_stdout=False):
    stderr = None
    if stderr_to_stdout:
        stderr = subprocess.STDOUT

    # os.setid makes sure that the child process doesn't receive our CTRL-Cs.
    proc = subprocess.Popen([cmdline], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=stderr,
                            close_fds=True, shell=True, preexec_fn=os.setsid)
    # Compatibility with older popen4.
    proc.tochild = proc.stdin
    proc.fromchild = proc.stdout

    return proc

# Returns true if given node corresponds to the host we're running on.
def isLocal(node):
    global LocalAddrs
    if not LocalAddrs:
        (success, output) = runLocalCmd(os.path.join(config.Config.scriptsdir, "local-interfaces"))
        if not success:
            if not config.Config.installing():
                util.warn("cannot get list of local IP addresses")

            try:
                # This does not work for multi-homed hosts.
                LocalAddrs = [socket.gethostbyname(socket.gethostname()), "127.0.0.1"]
            except:
                LocalAddrs = ["127.0.0.1"]
        else:
            LocalAddrs = [line.strip() for line in output]

        util.debug(1, "Local IPs: %s" % ",".join(LocalAddrs))

    return not node or node.host == "localhost" or node.addr in LocalAddrs

# Takes list of (node, dir) pairs and ensures the directories exist on the nodes' host.
# Returns list of (node, sucess) pairs.
def mkdirs(dirs):

    results = []
    cmds = []
    fullcmds = []

    for (node, dir) in dirs:
        # We make local directories directly.
        if isLocal(node):
            if not exists(node, dir):
                util.debug(1, "%-10s %s" % ("[local]", "mkdir %s" % dir))
                os.mkdir(dir)

            results += [(node, True)]

        else:
            cmds += [(node, [], [])]
                # Need to be careful here as our helper scripts may not be installed yet.
            fullcmds += [("test -d %s || mkdir -p %s 2>/dev/null; echo $?; echo ~~~" % (dir, dir))]

    for (node, success, output) in runHelperParallel(cmds, fullcmds=fullcmds):
        results += [(node, success)]

    return results

# Takes list of (node, dir) pairs and ensures the directories exist on the nodes' host.
# Returns list of (node, sucess) pairs.
def mkdir(node, dir):
    return mkdirs([(node, dir)])[0][1]

def rmdirs(dirs):
    results = []
    cmds = []

    for (node, dir) in dirs:
        # We remove local directories directly.
        if isLocal(node):
            (success, output) = captureCmd("rm -rf %s" % dir)
            results += [(node, success)]
        else:
            cmds += [(node, "rmdir", [dir])]

    for (node, success, output) in runHelperParallel(cmds):
        results += [(node, success)]

    return results

# Removes the directory on the host if it's there.
def rmdir(node, dir):
    return rmdirs([(node, dir)])[0][1]

# Returns true if the path exists on the host.
def exists(host, path):
    if isLocal(host):
        return os.path.lexists(path)
    else:
        (success, output) = runHelper(host, "exists", [path])
        return success

# Returns true if the path exists and refers to a file on the host.
def isfile(host, path):
    if isLocal(host):
        return os.path.isfile(path)
    else:
        util.error("isfile() not yet supported for remote hosts")

# Returns true if the path exists and refers to a directory on the host.
def isdir(host, path):
    if isLocal(host):
        return os.path.isdir(path)
    else:
        (success, output) = runHelper(host, "is-dir", [path])
        return success

# Copies src to dst, preserving permission bits.
# Works for files and directories (non-recursive).
def install(host, src, dst):
    if isLocal(host):
        if not exists(host, src):
            util.output("file does not exist: %s" % src)
            return False

        if os.path.isfile(dst):
            os.remove(dst)

        util.debug(1, "cp %s %s" % (src, dst))

        try:
            shutil.copy2(src, dst)
        except OSError:
            # Python 2.6 has a bug where this may fail on NFS. So we just
            # ignore errors.
            pass

        return True
    else:
        util.error("install() not yet supported for remote hosts")
        return False

# rsyns paths from localhost to destination hosts.
def sync(nodes, paths):

    cmds = []
    for n in nodes:
        args = ["-a", "--delete", "--rsh=\"ssh -o ConnectTimeout=30\""]
        dst = ["%s:%s/" % (n.host, config.Config.brobase)]
        args += paths + dst
        cmdline = "rsync %s" % " ".join(args)
        cmds += [(n, cmdline, "", None)]

    for (id, success, output) in runLocalCmdsParallel(cmds):
        if not success:
            util.warn("error rsyncing to %s: %s" % (id.host, output))

# Checks whether the given host is alive.
_deadHosts = {}

def isAlive(host):

    if host in _deadHosts:
        return False

    (success, output) = runLocalCmd(os.path.join(config.Config.scriptsdir, "is-alive") + " " + host)

    if not success and not config.Config.cron == "1":
        _deadHosts[host] = True
        util.warn("host %s is not alive" % host)

    return success

# Runs command locally and returns tuple (success, output)
# with success being true if the command terminated with exit code 0,
# and output being the combinded stdout/stderr output of the command.
def captureCmd(cmd, env = "", input = None):

    cmdline = env + " " + cmd
    util.debug(1, "%-10s %s" % ("[local]", cmdline))

    proc = popen(cmdline, stderr_to_stdout=True)

    if input:
        print >>proc.tochild, input
        proc.tochild.close()

    rc = proc.wait()
    output = [line.strip() for line in proc.fromchild]

    util.debug(1, "%-10s exit code %d" % ("[local]", os.WEXITSTATUS(rc)))
    for line in output:
        util.debug(2, "           > %s" % line)

    return (os.WIFEXITED(rc) and os.WEXITSTATUS(rc) == 0, output)

## FIXME: Replace "captureCmd" with "runLocalCmd".

# Runs command locally and returns tuple (success, output)
# with success being true if the command terminated with exit code 0,
# and output being the combinded stdout/stderr output of the command.
def runLocalCmd(cmd, env = "", input=None):
    proc = _runLocalCmdInit("single", cmd, env, input)
    if not proc:
        return (False, [])

    return _runLocalCmdWait(proc)

# Same as above but runs a set of local commands in parallel.
# Cmds is a list of (id, cmd, envs, input) tuples, where id is
# an arbitrary cookie identifying each command.
# Returns a list of (id, success, output) tuples.
# 'output' is None (vs. []) if we couldn't connect to host.
def runLocalCmdsParallel(cmds):

    results = []
    running = []

    for (id, cmd, envs, input) in cmds:
        proc = _runLocalCmdInit(id, cmd, envs, input)
        if proc:
            running += [(id, proc)]
        else:
            results += [(id, False, None)]

    for (id, proc) in running:
        status  = _runLocalCmdWait(proc)
        if status:
            (success, output) = status
            results += [(id, success, output)]
        else:
            results += [(id, False, None)]

    return results

def _runLocalCmdInit(id, cmd, env, input):

    if not env:
        env = ""

    cmdline = env + " " + cmd
    util.debug(1, "%-10s %s" % ("[local]", cmdline))

    proc = popen(cmdline, stderr_to_stdout=True)

    if input:
        print >>proc.tochild, input

    proc.tochild.close()
    return proc

def stripNL(str):
    if len(str) == 0 or str[-1] != "\n":
        return str

    return str[0:-1]

def _runLocalCmdWait(proc):

    rc = proc.wait()
    output = [stripNL(line) for line in proc.fromchild]

    util.debug(1, "%-10s exit code %d" % ("[local]", os.WEXITSTATUS(rc)))
    for line in output:
        util.debug(2, "           > %s" % line)

    return (os.WIFEXITED(rc) and os.WEXITSTATUS(rc) == 0, output)

# Runs a helper script from bin/helpers, according to the helper
# protocol.
# If fullcmd is given, this is the exact & complete command line (incl. paths).
# Otherwise, cmd is just the helper's name (wo/ path) and args are the
# arguments. Env is an optional enviroment variable of the form
# "key=val". Return value as for captureCmd().
# 'output' is None (vs. []) if we couldn't connect to host.
def runHelper(host, cmd=None, args=None, fullcmd=None, env = ""):
    util.disableSignals()
    try:
        status = _runHelperInit(host, cmd, args, fullcmd, env)
        if not status:
            return (False, None)

        status = _runHelperWait(status)
        if not status:
            return (False, None)

        return status

    finally:
        util.enableSignals()

# Same as above but runs commands on a set of hosts in parallel.
# Cmds is a list of (node, cmd, args) tuples.
# Fullcmds, if given, is a parallel list of full command lines.
# Envs, if given, is a parallel list of env variables.
# Returns a list of (node, success, output) tuples.
# 'output' is None (vs. []) if we couldn't connect to host.
def runHelperParallel(cmds, fullcmds = None, envs = None):

    util.disableSignals()

    try:
        results = []
        running = []

        for (node, cmd, args) in cmds:

            if fullcmds:
                fullcmd = fullcmds[0]
                fullcmds = fullcmds[1:]
            else:
                fullcmd = ""

            if envs:
                env = envs[0]
                envs = envs[1:]
            else:
                env = ""

            status = _runHelperInit(node, cmd, args, fullcmd, env)
            if status:
                running += [node]
            else:
                results += [(node, False, None)]

        for node in running:
            status =  _runHelperWait(node)
            if status:
                (success, output) = status
                results += [(node, success, output)]
            else:
                results += [(node, False, None)]

        return results

    finally:
        util.enableSignals()

# Helpers for running helpers.
#
# We keep the SSH sessions open across calls to runHelper.
Connections = {}
WhoAmI = None

# FIXME: This is an ugly hack. The __del__ method produces
# strange unhandled exceptions in the child at termination
# of the main process. Not sure if disabling the cleanup
# altogether is a good thing but right now that's the
# only fix I can come up with.
def _emptyDel(self):
    pass
subprocess.Popen.__del__ = _emptyDel

def _getConnection(host):

    global WhoAmI
    if not WhoAmI:
        (success, output) = captureCmd("whoami")
        if not success:
            util.error("can't get 'whoami'")
        WhoAmI = output[0]

    if not host:
        host = config.Config.manager()

    if host.tag in Connections:
        p = Connections[host.tag]
        if p.poll() != None:
            # Terminated.
            global _deadHosts
            _deadHosts[host.host] = True
            util.warn("connection to %s broke" % host.host)
            return None

        return (p.stdin, p.stdout)

    if isLocal(host):
        cmdline = "sh"
    else:
        # Check whether host is alive.
        if not isAlive(host.host):
            return None

        cmdline = "ssh -o ConnectTimeout=30 -l %s %s sh" % (WhoAmI, host.host)

    util.debug(1, "%-10s %s" % ("[local]", cmdline))

    try:
        p = popen(cmdline)
    except OSError, e:
        util.warn("cannot login into %s [IOError: %s]" % (host.host, e))
        return None

    Connections[host.tag] = p
    return (p.stdin, p.stdout)

def _runHelperInit(host, cmd, args, fullcmd, env):

    c = _getConnection(host)
    if not c:
        return None

    (stdin, stdout) = c

    if not fullcmd:
        cmdline = "%s %s %s" % (env, os.path.join(config.Config.helperdir, cmd), " ".join(args))
    else:
        cmdline = fullcmd

    util.debug(1, "%-10s %s" % (("[%s]" % host.host), cmdline))
    print >>stdin, cmdline
    stdin.flush()

    return host

def _runHelperWait(host):
    output = []
    while True:

        c = _getConnection(host)
        if not c:
            return None

        (stdin, stdout) = c

        line = stdout.readline().strip()
        if line == "~~~":
            break
        output += [line]

    try:
        rc = int(output[0])
    except ValueError:
        util.warn("cannot parse exit code from helper on %s: %s" % (host.host, output[0]))
        rc = 1

    util.debug(1, "%-10s exit code %d" % (("[%s]" % host.host), rc))

    for line in output:
        util.debug(2, "           > %s" % line)

    return (rc == 0, output[1:])

# Broccoli communication with running nodes.

# Sends event  to a set of nodes in parallel.
#
# events is a list of tuples of the form (node, event, args, result_event).
#   node:    the destination node.
#   event:   the name of the event to send (node that receiver must subscribe to it as well).
#   args:    a list of event args; each arg must be a data type understood by the Broccoli module.
#   result_event: name of a event the node sends back. None if no event is sent back.
#
# Returns a list of tuples (node, success, results_args).
#   If success is True, result_args is a list of arguments as shipped with the result event,
#   or [] if no result_event was specified.
#   If success is False, results_args is a string with an error message.

def sendEventsParallel(events):

    results = []
    sent = []

    for (node, event, args, result_event) in events:

        if not haveBroccoli:
            results += [(node, False, "no Python bindings for Broccoli installed")]
            continue

        (success, bc) = _sendEventInit(node, event, args, result_event)
        if success and result_event:
            sent += [(node, result_event, bc)]
        else:
            results += [(node, success, bc)]

    for (node, result_event, bc) in sent:
        (success, result_args) = _sendEventWait(node, result_event, bc)
        results += [(node, success, result_args)]

    return results

def _sendEventInit(node, event, args, result_event):

    try:
        bc = broccoli.Connection("%s:%d" % (node.addr, node.getPort()), broclass="update",
                                 flags=broccoli.BRO_CFLAG_ALWAYS_QUEUE, connect=False)
        bc.subscribe(result_event, _event_callback(bc))
        bc.got_result = False
        bc.connect()
    except IOError, e:
        util.debug(1, "%-10s broccoli: cannot connect" % (("[%s]" % node.tag)))
        return (False, str(e))

    util.debug(1, "%-10s broccoli: %s(%s)" % (("[%s]" % node.tag), event, ", ".join(args)))
    bc.send(event, *args)
    return (True, bc)

def _sendEventWait(node, result_event, bc):
    # Wait until we have sent the event out.
    cnt = 0
    while bc.processInput():
        time.sleep(1)

        cnt += 1
        if cnt > 10:
            util.debug(1, "%-10s broccoli: timeout during send" % (("[%s]" % node.tag)))
            return (False, "time-out")

    if not result_event:
        return (True, [])

    # Wait for reply event.
    cnt = 0
    bc.processInput();
    while not bc.got_result:
        time.sleep(1)
        bc.processInput();

        cnt += 1
        if cnt > 10:
            util.debug(1, "%-10s broccoli: timeout during receive" % (("[%s]" % node.tag)))
            return (False, "time-out")

    util.debug(1, "%-10s broccoli: %s(%s)" % (("[%s]" % node.tag), result_event, ", ".join(bc.result_args)))
    return (True, bc.result_args)

def _event_callback(bc):
    def save_results(*args):
        bc.got_result = True
        bc.result_args = args
    return save_results