Management framework: Supervisor extensions for stdout/stderr handling

This improves the framework's handling of Zeek node stdout and stderr by
extending the (script-layer) Supervisor functionality.

- The Supervisor _either_ directs Zeek nodes' stdout/stderr to files _or_ lets
you hook into it at the script level. We'd like both: files make sense to allow
inspection outside of the framework, and the framework would benefit from
tapping into the streams e.g. for error context. We now provide the file
redirection functionality in the Supervisor, in addition to the hook
mechanism. The hook mechanism also builds up rolling windows of up to
100 lines (configurable) into stdout/stderr.

- The new Mangement::Supervisor::API::notify_node_exit event notifies
subscribers (agents, really) that a particular node has exited (and is possibly
being restarted by the Supervisor). The event includes the name of the node,
plus its recent stdout/stderr context.
This commit is contained in:
Christian Kreibich 2022-05-30 12:54:58 -07:00
parent f74f21767a
commit 24a495da42
7 changed files with 172 additions and 2 deletions

View file

@ -0,0 +1 @@
@load ./main

View file

@ -0,0 +1,18 @@
@load policy/frameworks/management/types
module Management::Supervisor::API;
export {
## The Supervisor generates this event whenever it has received a status
## update from the stem, indicating that a node exited.
##
## node: the name of a node previously created via
## :zeek:see:`Supervisor::create`.
##
## outputs: stdout/stderr context for the node. The contained strings
## span up to the 100 most recent lines in the corresponding
## stream. See :zeek:see:`Management::Supervisor::output_max_lines`
## to adjust the line limit.
##
global notify_node_exit: event(node: string, outputs: Management::NodeOutputs);
}

View file

@ -0,0 +1,13 @@
##! Configuration settings for the Management framework's supervisor extension.
module Management::Supervisor;
export {
## The Broker topic for Management framework communication with the
## Supervisor. The agent subscribes to this.
const topic_prefix = "zeek/management/supervisor" &redef;
## The maximum number of stdout/stderr output lines to convey in
## :zeek:see:`Management::Supervisor::API::notify_node_exit` events.
const output_max_lines: count = 100 &redef;
}

View file

@ -0,0 +1,122 @@
##! This module provides functionality the Management framework places directly
##! in the Supervisor.
@load base/utils/paths
@load base/utils/queue
@load policy/frameworks/management/types
@load policy/frameworks/management/node/config
@load ./api
@load ./config
module Management::Supervisor;
# stdout/stderr state for a given node.
type NodeOutputStreams: record {
# Line buffers for stdout and stderr. Their length is capped
# to the most recent Management::Supervisor::output_max_lines.
stdout: Queue::Queue;
stderr: Queue::Queue;
#
stdout_file: file &optional;
stderr_file: file &optional;
};
# This tracks output state for the current nodes.
global g_outputs: table[string] of NodeOutputStreams;
function make_node_output_streams(node: string): NodeOutputStreams
{
local stdout = Queue::init([$max_len = Management::Supervisor::output_max_lines]);
local stderr = Queue::init([$max_len = Management::Supervisor::output_max_lines]);
local res = NodeOutputStreams($stdout=stdout, $stderr=stderr);
local status = Supervisor::status(node);
if ( node !in status$nodes )
return res;
local ns = status$nodes[node];
local directory = ".";
if ( ns$node?$directory )
directory = ns$node$directory;
if ( Management::Node::stdout_file != "" )
res$stdout_file = open(build_path(directory, Management::Node::stdout_file));
if ( Management::Node::stderr_file != "" )
res$stderr_file = open(build_path(directory, Management::Node::stderr_file));
return res;
}
hook Supervisor::stdout_hook(node: string, msg: string)
{
if ( node !in g_outputs )
g_outputs[node] = make_node_output_streams(node);
# Write to the stdout file if we have one. The flush is clunky, but
# seems worth it: it's too confusing for errors to have happened and not
# yet shown up in the file. (The Supervisor's built-in file redirection
# does this too.)
if ( g_outputs[node]?$stdout_file )
{
print g_outputs[node]$stdout_file, msg;
flush_all();
}
# Update the sliding window of recent output lines.
Queue::put(g_outputs[node]$stdout, msg);
# Don't print this message in the Supervisor's own stdout
break;
}
hook Supervisor::stderr_hook(node: string, msg: string)
{
if ( node !in g_outputs )
g_outputs[node] = make_node_output_streams(node);
if ( g_outputs[node]?$stderr_file )
{
print g_outputs[node]$stderr_file, msg;
flush_all();
}
Queue::put(g_outputs[node]$stderr, msg);
# Don't print this message in the Supervisor's own stdout
break;
}
event Supervisor::node_status(node: string, pid: count)
{
# The node just started or restarted. If we have collected any output
# for its past life, send it via a notify_node_exit event.
if ( node in g_outputs )
{
local stdout_lines: vector of string;
local stderr_lines: vector of string;
Queue::get_vector(g_outputs[node]$stdout, stdout_lines);
Queue::get_vector(g_outputs[node]$stderr, stderr_lines);
if ( |stdout_lines| > 0 || |stderr_lines| > 0 )
{
local outputs = Management::NodeOutputs(
$stdout = join_string_vec(stdout_lines, "\n"),
$stderr = join_string_vec(stderr_lines, "\n"));
Broker::publish(topic_prefix, Management::Supervisor::API::notify_node_exit, node, outputs);
}
if ( g_outputs[node]?$stdout_file )
close(g_outputs[node]$stdout_file);
if ( g_outputs[node]?$stderr_file )
close(g_outputs[node]$stderr_file);
}
g_outputs[node] = make_node_output_streams(node);
}

View file

@ -104,6 +104,18 @@ export {
type ResultVec: vector of Result;
## In :zeek:see:`Management::Controller::API::set_configuration_response`,
## events, each :zeek:see:`Management::Result` indicates the outcome of a
## requested cluster node. If a node does not launch properly (meaning
## it doesn't check in with the agent on thee machine it's running on),
## the result will indicate failure, and its data field will be an
## instance of this record, capturing the stdout and stderr output of
## the failing node.
type NodeOutputs: record {
stdout: string; ##< The stdout stream of a Zeek process
stderr: string; ##< The stderr stream of a Zeek process
};
## Given a :zeek:see:`Management::Result` record,
## this function returns a string summarizing it.
global result_to_string: function(res: Result): string;

View file

@ -29,6 +29,10 @@
@load frameworks/management/node/api.zeek
@load frameworks/management/node/config.zeek
# @load frameworks/management/node/main.zeek
@load frameworks/management/supervisor/__load__.zeek
@load frameworks/management/supervisor/api.zeek
@load frameworks/management/supervisor/config.zeek
@load frameworks/management/supervisor/main.zeek
@load frameworks/management/request.zeek
@load frameworks/management/types.zeek
@load frameworks/management/util.zeek

View file

@ -2,8 +2,8 @@
### NOTE: This file has been sorted with diff-sort.
warning in <...>/extract-certs-pem.zeek, line 1: deprecated script loaded from <...>/__load__.zeek:15 "Remove in v5.1. Use log-certs-base64.zeek instead."
warning in <...>/extract-certs-pem.zeek, line 1: deprecated script loaded from command line arguments "Remove in v5.1. Use log-certs-base64.zeek instead."
warning in <...>/log-ocsp.zeek, line 1: deprecated script loaded from <...>/test-all-policy.zeek:66 ("Remove in v5.1. OCSP logging is now enabled by default")
warning in <...>/log-ocsp.zeek, line 1: deprecated script loaded from <...>/test-all-policy.zeek:66 ("Remove in v5.1. OCSP logging is now enabled by default")
warning in <...>/log-ocsp.zeek, line 1: deprecated script loaded from <...>/test-all-policy.zeek:70 ("Remove in v5.1. OCSP logging is now enabled by default")
warning in <...>/log-ocsp.zeek, line 1: deprecated script loaded from <...>/test-all-policy.zeek:70 ("Remove in v5.1. OCSP logging is now enabled by default")
warning in <...>/log-ocsp.zeek, line 1: deprecated script loaded from command line arguments ("Remove in v5.1. OCSP logging is now enabled by default")
warning in <...>/notary.zeek, line 1: deprecated script loaded from <...>/__load__.zeek:5 ("Remove in v5.1. Please switch to other more modern approaches like SCT validation (validate-sct.zeek).")
warning in <...>/notary.zeek, line 1: deprecated script loaded from command line arguments ("Remove in v5.1. Please switch to other more modern approaches like SCT validation (validate-sct.zeek).")