Management framework: allow agents to communicate with cluster nodes

This provides Broker-level plumbing that allows agents to reach out to their
managed Zeek nodes and collect responses.

As a first event, it establishes Management::Node::API::notify_agent_hello,
to notify the agent when the cluster node is ready to communicate.

Also a bit of comment rewording to replace use of "data cluster" with simply
"cluster", to avoid ambiguity with data nodes in SumStats, and expansion of
test-all-policy.zeek and related/dependent tests, since we're introducing new
scripts.
This commit is contained in:
Christian Kreibich 2022-03-23 16:27:28 -07:00
parent d29160e9de
commit 337c7267e0
11 changed files with 100 additions and 15 deletions

View file

@ -5,6 +5,8 @@
@load base/frameworks/broker @load base/frameworks/broker
@load policy/frameworks/management @load policy/frameworks/management
@load policy/frameworks/management/node/api
@load policy/frameworks/management/node/config
@load ./api @load ./api
@load ./config @load ./config
@ -120,7 +122,7 @@ event Management::Agent::API::set_configuration_request(reqid: string, config: M
g_nodes = table(); g_nodes = table();
# Refresh the data cluster and nodes tables # Refresh the cluster and nodes tables
g_data_cluster = table(); g_data_cluster = table();
for ( node in config$nodes ) for ( node in config$nodes )
@ -166,6 +168,11 @@ event Management::Agent::API::set_configuration_request(reqid: string, config: M
if ( node?$env ) if ( node?$env )
nc$env = node$env; nc$env = node$env;
# Always add the policy/management/node scripts to any cluster
# node, since we require it to be able to communicate with the
# node.
nc$scripts[|nc$scripts|] = "policy/frameworks/management/node";
# XXX could use options to enable per-node overrides for # XXX could use options to enable per-node overrides for
# directory, stdout, stderr, others? # directory, stdout, stderr, others?
@ -209,7 +216,7 @@ event SupervisorControl::status_response(reqid: string, result: Supervisor::Stat
local cns = Management::NodeStatus( local cns = Management::NodeStatus(
$node=node, $state=Management::PENDING); $node=node, $state=Management::PENDING);
# Identify the role of the node. For data cluster roles (worker, # Identify the role of the node. For cluster roles (worker,
# manager, etc) we derive this from the cluster node table. For # manager, etc) we derive this from the cluster node table. For
# agent and controller, we identify via environment variables # agent and controller, we identify via environment variables
# that the controller framework establishes upon creation (see # that the controller framework establishes upon creation (see
@ -342,10 +349,11 @@ event zeek_init()
Broker::peer(supervisor_addr, Broker::default_port, Broker::default_listen_retry); Broker::peer(supervisor_addr, Broker::default_port, Broker::default_listen_retry);
# Agents need receive communication targeted at it, and any responses # Agents need receive communication targeted at it, any responses
# from the supervisor. # from the supervisor, and any responses from cluster nodes.
Broker::subscribe(agent_topic); Broker::subscribe(agent_topic);
Broker::subscribe(SupervisorControl::topic_prefix); Broker::subscribe(SupervisorControl::topic_prefix);
Broker::subscribe(Management::Node::node_topic);
# Auto-publish a bunch of events. Glob patterns or module-level # Auto-publish a bunch of events. Glob patterns or module-level
# auto-publish would be helpful here. # auto-publish would be helpful here.
@ -373,11 +381,10 @@ event zeek_init()
Management::Agent::controller$bound_port, Management::Agent::controller$bound_port,
Management::connect_retry); Management::connect_retry);
} }
else
{ # The agent always listens, to allow cluster nodes to peer with it.
# Controller connects to us; listen for it. # If the controller connects to us, it also uses this port.
Broker::listen(cat(epi$network$address), epi$network$bound_port); Broker::listen(cat(epi$network$address), epi$network$bound_port);
}
Management::Log::info("agent is live"); Management::Log::info("agent is live");
} }

View file

@ -386,10 +386,10 @@ event Management::Controller::API::set_configuration_request(reqid: string, conf
g_config_reqid_pending = req$id; g_config_reqid_pending = req$id;
# Compare the instance configuration to our current one. If it matches, # Compare the instance configuration to our current one. If it matches,
# we can proceed to deploying the new data cluster topology. If it does # we can proceed to deploying the new cluster topology. If it does
# not, we need to establish connectivity with agents we connect to, or # not, we need to establish connectivity with agents we connect to, or
# wait until all instances that connect to us have done so. Either triggers # wait until all instances that connect to us have done so. Either triggers
# a notify_agents_ready event, upon which we then deploy the data cluster. # a notify_agents_ready event, upon which we then deploy the topology.
# The current & new set of instance names. # The current & new set of instance names.
local insts_current: set[string]; local insts_current: set[string];

View file

@ -82,6 +82,7 @@ global l2s: table[Level] of string = {
global r2s: table[Management::Role] of string = { global r2s: table[Management::Role] of string = {
[Management::AGENT] = "AGENT", [Management::AGENT] = "AGENT",
[Management::CONTROLLER] = "CONTROLLER", [Management::CONTROLLER] = "CONTROLLER",
[Management::NODE] = "NODE",
}; };
function debug(message: string) function debug(message: string)

View file

@ -0,0 +1 @@
@load ./main

View file

@ -0,0 +1,21 @@
##! The Management event API of cluster nodes. The API consists of request/
##! response event pairs, like elsewhere in the Management, Supervisor, and
##! Control frameworks.
@load policy/frameworks/management/types
module Management::Node::API;
export {
# Notification events, node -> agent
## The cluster nodes send this event upon peering as a "check-in" to
## the agent, to indicate the node is now available to communicate
## with. It is an agent-level equivalent of :zeek:see:`Broker::peer_added`,
## and similar to :zeek:see:`Management::Agent::API::notify_agent_hello`
## for agents.
##
## node: the name of the node, as given in :zeek:see:`Cluster::node`.
##
global notify_node_hello: event(node: string);
}

View file

@ -0,0 +1,9 @@
##! Configuration settings for nodes controlled by the Management framework.
module Management::Node;
export {
## The nodes' Broker topic. Cluster nodes automatically subscribe
## to it, to receive request events from the Management framework.
const node_topic = "zeek/management/node" &redef;
}

View file

@ -0,0 +1,39 @@
##! This module provides Management framework functionality that needs to be
##! present in every cluster node to allow Management agents to interact with
##! the cluster nodes they manage.
@load policy/frameworks/management/agent/config
@load policy/frameworks/management/log
@load ./config
module Management::Node;
# Tag our logs correctly
redef Management::Log::role = Management::NODE;
event Broker::peer_added(peer: Broker::EndpointInfo, msg: string)
{
local epi = Management::Agent::endpoint_info();
# If this is the agent peering, notify it that we're ready
if ( peer$network$address == epi$network$address &&
peer$network$bound_port == epi$network$bound_port )
event Management::Node::API::notify_node_hello(Cluster::node);
}
event zeek_init()
{
local epi = Management::Agent::endpoint_info();
Broker::peer(epi$network$address, epi$network$bound_port, Management::connect_retry);
Broker::subscribe(node_topic);
# Events automatically sent to the Management agent.
local events: vector of any = [
Management::Node::API::notify_node_hello
];
for ( i in events )
Broker::auto_publish(node_topic, events[i]);
}

View file

@ -6,12 +6,13 @@ module Management;
export { export {
## Management infrastructure node type. This intentionally does not ## Management infrastructure node type. This intentionally does not
## include the data cluster node types (worker, logger, etc) -- those ## include the managed cluster node types (worker, logger, etc) -- those
## continue to be managed by the cluster framework. ## continue to be managed by the cluster framework.
type Role: enum { type Role: enum {
NONE, ##< No active role in cluster management NONE, ##< No active role in cluster management
AGENT, ##< A cluster management agent. AGENT, ##< A cluster management agent.
CONTROLLER, ##< The cluster's controller. CONTROLLER, ##< The cluster's controller.
NODE, ##< A managed cluster node (worker, manager, etc).
}; };
## A Zeek-side option with value. ## A Zeek-side option with value.

View file

@ -24,6 +24,10 @@
@load frameworks/management/__load__.zeek @load frameworks/management/__load__.zeek
@load frameworks/management/config.zeek @load frameworks/management/config.zeek
@load frameworks/management/log.zeek @load frameworks/management/log.zeek
# @load frameworks/management/node/__load__.zeek
@load frameworks/management/node/api.zeek
@load frameworks/management/node/config.zeek
# @load frameworks/management/node/main.zeek
@load frameworks/management/request.zeek @load frameworks/management/request.zeek
@load frameworks/management/types.zeek @load frameworks/management/types.zeek
@load frameworks/management/util.zeek @load frameworks/management/util.zeek

View file

@ -7,6 +7,8 @@
@load frameworks/control/controller.zeek @load frameworks/control/controller.zeek
@load frameworks/management/agent/main.zeek @load frameworks/management/agent/main.zeek
@load frameworks/management/controller/main.zeek @load frameworks/management/controller/main.zeek
@load frameworks/management/node/__load__.zeek
@load frameworks/management/node/main.zeek
@load frameworks/files/extract-all-files.zeek @load frameworks/files/extract-all-files.zeek
@load policy/misc/dump-events.zeek @load policy/misc/dump-events.zeek
@load policy/protocols/conn/speculative-service.zeek @load policy/protocols/conn/speculative-service.zeek

View file

@ -1,9 +1,9 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. ### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
### NOTE: This file has been sorted with diff-sort. ### NOTE: This file has been sorted with diff-sort.
warning in <...>/extract-certs-pem.zeek, line 1: deprecated script loaded from <...>/__load__.zeek:13 "Remove in v5.1. Use log-certs-base64.zeek instead." warning in <...>/extract-certs-pem.zeek, line 1: deprecated script loaded from <...>/__load__.zeek:15 "Remove in v5.1. Use log-certs-base64.zeek instead."
warning in <...>/extract-certs-pem.zeek, line 1: deprecated script loaded from command line arguments "Remove in v5.1. Use log-certs-base64.zeek instead." warning in <...>/extract-certs-pem.zeek, line 1: deprecated script loaded from command line arguments "Remove in v5.1. Use log-certs-base64.zeek instead."
warning in <...>/log-ocsp.zeek, line 1: deprecated script loaded from <...>/test-all-policy.zeek:61 ("Remove in v5.1. OCSP logging is now enabled by default") warning in <...>/log-ocsp.zeek, line 1: deprecated script loaded from <...>/test-all-policy.zeek:65 ("Remove in v5.1. OCSP logging is now enabled by default")
warning in <...>/log-ocsp.zeek, line 1: deprecated script loaded from <...>/test-all-policy.zeek:61 ("Remove in v5.1. OCSP logging is now enabled by default") warning in <...>/log-ocsp.zeek, line 1: deprecated script loaded from <...>/test-all-policy.zeek:65 ("Remove in v5.1. OCSP logging is now enabled by default")
warning in <...>/log-ocsp.zeek, line 1: deprecated script loaded from command line arguments ("Remove in v5.1. OCSP logging is now enabled by default") warning in <...>/log-ocsp.zeek, line 1: deprecated script loaded from command line arguments ("Remove in v5.1. OCSP logging is now enabled by default")
warning in <...>/notary.zeek, line 1: deprecated script loaded from <...>/__load__.zeek:5 ("Remove in v5.1. Please switch to other more modern approaches like SCT validation (validate-sct.zeek).") warning in <...>/notary.zeek, line 1: deprecated script loaded from <...>/__load__.zeek:5 ("Remove in v5.1. Please switch to other more modern approaches like SCT validation (validate-sct.zeek).")
warning in <...>/notary.zeek, line 1: deprecated script loaded from command line arguments ("Remove in v5.1. Please switch to other more modern approaches like SCT validation (validate-sct.zeek).") warning in <...>/notary.zeek, line 1: deprecated script loaded from command line arguments ("Remove in v5.1. Please switch to other more modern approaches like SCT validation (validate-sct.zeek).")