mirror of
https://github.com/zeek/zeek.git
synced 2025-10-02 14:48:21 +00:00
Introduce cluster controller and cluster agent scripting
This is a preliminary implementation of a subset of the functionality set out in our cluster controller architecture. The controller is the central management node, existing once in any Zeek cluster. The agent is a node that runs once per instance, where an instance will commonly be a physical machine. The agent in turn manages the "data cluster", i.e. the traditional notion of a Zeek cluster with manager, worker nodes, etc. Agent and controller live in the policy folder, and are activated when loading policy/frameworks/cluster/agent and policy/frameworks/cluster/controller, respectively. Both run in nodes forked by the supervisor. When Zeek doesn't use the supervisor, they do nothing. Otherwise, boot.zeek instructs the supervisor to create the respective node, running main.zeek. Both controller and agent have their own config.zeek with relevant knobs. For both, controller/types.zeek provides common data types, and controller/log.zeek provides basic logging (without logger communication -- no such node might exist). A primitive request-tracking abstraction can be found in controller/request.zeek to track outstanding request events and their subsequent responses.
This commit is contained in:
parent
a3623bfb2d
commit
c744702f94
13 changed files with 1041 additions and 0 deletions
5
scripts/policy/frameworks/cluster/agent/__load__.zeek
Normal file
5
scripts/policy/frameworks/cluster/agent/__load__.zeek
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
# The entry point for the cluster agent. It only runs bootstrap logic for
|
||||||
|
# launching via the Supervisor. If we're not running the Supervisor, this does
|
||||||
|
# nothing.
|
||||||
|
|
||||||
|
@load ./boot
|
33
scripts/policy/frameworks/cluster/agent/api.zeek
Normal file
33
scripts/policy/frameworks/cluster/agent/api.zeek
Normal file
|
@ -0,0 +1,33 @@
|
||||||
|
@load base/frameworks/supervisor/control
|
||||||
|
@load policy/frameworks/cluster/controller/types
|
||||||
|
|
||||||
|
module ClusterAgent::API;
|
||||||
|
|
||||||
|
export {
|
||||||
|
const version = 1;
|
||||||
|
|
||||||
|
# Agent API events
|
||||||
|
|
||||||
|
global set_configuration_request: event(reqid: string,
|
||||||
|
config: ClusterController::Types::Configuration);
|
||||||
|
global set_configuration_response: event(reqid: string,
|
||||||
|
result: ClusterController::Types::Result);
|
||||||
|
|
||||||
|
# Notification events, agent -> controller
|
||||||
|
|
||||||
|
# Report agent being available.
|
||||||
|
global notify_agent_hello: event(instance: string, host: addr,
|
||||||
|
api_version: count);
|
||||||
|
|
||||||
|
# Report node state changes.
|
||||||
|
global notify_change: event(instance: string,
|
||||||
|
n: ClusterController::Types::Node,
|
||||||
|
old: ClusterController::Types::State,
|
||||||
|
new: ClusterController::Types::State);
|
||||||
|
|
||||||
|
# Report operational error.
|
||||||
|
global notify_error: event(instance: string, msg: string, node: string &default="");
|
||||||
|
|
||||||
|
# Report informational message.
|
||||||
|
global notify_log: event(instance: string, msg: string, node: string &default="");
|
||||||
|
}
|
35
scripts/policy/frameworks/cluster/agent/boot.zeek
Normal file
35
scripts/policy/frameworks/cluster/agent/boot.zeek
Normal file
|
@ -0,0 +1,35 @@
|
||||||
|
@load ./config
|
||||||
|
|
||||||
|
# The agent needs the supervisor to listen for node management requests. We
|
||||||
|
# need to tell it to do so, and we need to do so here, in the agent
|
||||||
|
# bootstrapping code, so the redef applies prior to the fork of the agent
|
||||||
|
# process itself.
|
||||||
|
redef SupervisorControl::enable_listen = T;
|
||||||
|
|
||||||
|
event zeek_init()
|
||||||
|
{
|
||||||
|
if ( ! Supervisor::is_supervisor() )
|
||||||
|
return;
|
||||||
|
|
||||||
|
local epi = ClusterAgent::endpoint_info();
|
||||||
|
local sn = Supervisor::NodeConfig($name=epi$id, $bare_mode=T,
|
||||||
|
$scripts=vector("policy/frameworks/cluster/agent/main.zeek"));
|
||||||
|
|
||||||
|
if ( ClusterAgent::directory != "" )
|
||||||
|
sn$directory = ClusterAgent::directory;
|
||||||
|
if ( ClusterAgent::stdout_file_suffix != "" )
|
||||||
|
sn$stdout_file = epi$id + "." + ClusterAgent::stdout_file_suffix;
|
||||||
|
if ( ClusterAgent::stderr_file_suffix != "" )
|
||||||
|
sn$stderr_file = epi$id + "." + ClusterAgent::stderr_file_suffix;
|
||||||
|
|
||||||
|
# This helps Zeek run controller and agent with a minimal set of scripts.
|
||||||
|
sn$env["ZEEK_CLUSTER_MGMT_NODE"] = "AGENT";
|
||||||
|
|
||||||
|
local res = Supervisor::create(sn);
|
||||||
|
|
||||||
|
if ( res != "" )
|
||||||
|
{
|
||||||
|
print(fmt("error: supervisor could not create agent node: %s", res));
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
}
|
85
scripts/policy/frameworks/cluster/agent/config.zeek
Normal file
85
scripts/policy/frameworks/cluster/agent/config.zeek
Normal file
|
@ -0,0 +1,85 @@
|
||||||
|
@load policy/frameworks/cluster/controller/types
|
||||||
|
|
||||||
|
module ClusterAgent;
|
||||||
|
|
||||||
|
export {
|
||||||
|
# The name this agent uses to represent the cluster instance
|
||||||
|
# it manages. When the environment variable isn't set and there's,
|
||||||
|
# no redef, this falls back to "agent-<hostname>".
|
||||||
|
const name = getenv("ZEEK_AGENT_NAME") &redef;
|
||||||
|
|
||||||
|
# Agent stdout/stderr log files to produce in Zeek's working
|
||||||
|
# directory. If empty, no such logs will result. The actual
|
||||||
|
# log files have the agent's name (as per above) dot-prefixed.
|
||||||
|
const stdout_file_suffix = "agent.stdout" &redef;
|
||||||
|
const stderr_file_suffix = "agent.stderr" &redef;
|
||||||
|
|
||||||
|
# The address and port the agent listens on. When
|
||||||
|
# undefined, falls back to configurable default values.
|
||||||
|
const listen_address = getenv("ZEEK_AGENT_ADDR") &redef;
|
||||||
|
const default_address = Broker::default_listen_address &redef;
|
||||||
|
|
||||||
|
const listen_port = getenv("ZEEK_AGENT_PORT") &redef;
|
||||||
|
const default_port = 2151/tcp &redef;
|
||||||
|
|
||||||
|
# The agent communicates under to following topic prefix,
|
||||||
|
# suffixed with "/<name>" (see above):
|
||||||
|
const topic_prefix = "zeek/cluster-control/agent" &redef;
|
||||||
|
|
||||||
|
# The coordinates of the controller. When defined, it means
|
||||||
|
# agents peer with (connect to) the controller; otherwise the
|
||||||
|
# controller knows all agents and peers with them.
|
||||||
|
const controller: Broker::NetworkInfo = [
|
||||||
|
$address="0.0.0.0", $bound_port=0/unknown] &redef;
|
||||||
|
|
||||||
|
# Agent and controller currently log only, not via the data cluster's
|
||||||
|
# logger. (This might get added later.) For now, this means that
|
||||||
|
# if both write to the same log file, it gets garbled. The following
|
||||||
|
# lets you specify the working directory specifically for the agent.
|
||||||
|
const directory = "" &redef;
|
||||||
|
|
||||||
|
# Working directory for data cluster nodes. When relative, note
|
||||||
|
# that this will apply from the working directory of the agent,
|
||||||
|
# since it creates data cluster nodes.
|
||||||
|
const cluster_directory = "" &redef;
|
||||||
|
|
||||||
|
# The following functions return the effective network endpoint
|
||||||
|
# information for this agent, in two related forms.
|
||||||
|
global instance: function(): ClusterController::Types::Instance;
|
||||||
|
global endpoint_info: function(): Broker::EndpointInfo;
|
||||||
|
}
|
||||||
|
|
||||||
|
function instance(): ClusterController::Types::Instance
|
||||||
|
{
|
||||||
|
local epi = endpoint_info();
|
||||||
|
return ClusterController::Types::Instance($name=epi$id,
|
||||||
|
$host=to_addr(epi$network$address),
|
||||||
|
$listen_port=epi$network$bound_port);
|
||||||
|
}
|
||||||
|
|
||||||
|
function endpoint_info(): Broker::EndpointInfo
|
||||||
|
{
|
||||||
|
local epi: Broker::EndpointInfo;
|
||||||
|
local network: Broker::NetworkInfo;
|
||||||
|
|
||||||
|
if ( ClusterAgent::name != "" )
|
||||||
|
epi$id = ClusterAgent::name;
|
||||||
|
else
|
||||||
|
epi$id = fmt("agent-%s", gethostname());
|
||||||
|
|
||||||
|
if ( ClusterAgent::listen_address != "" )
|
||||||
|
network$address = ClusterAgent::listen_address;
|
||||||
|
else if ( ClusterAgent::default_address != "" )
|
||||||
|
network$address = ClusterAgent::default_address;
|
||||||
|
else
|
||||||
|
network$address = "127.0.0.1";
|
||||||
|
|
||||||
|
if ( ClusterAgent::listen_port != "" )
|
||||||
|
network$bound_port = to_port(ClusterAgent::listen_port);
|
||||||
|
else
|
||||||
|
network$bound_port = ClusterAgent::default_port;
|
||||||
|
|
||||||
|
epi$network = network;
|
||||||
|
|
||||||
|
return epi;
|
||||||
|
}
|
223
scripts/policy/frameworks/cluster/agent/main.zeek
Normal file
223
scripts/policy/frameworks/cluster/agent/main.zeek
Normal file
|
@ -0,0 +1,223 @@
|
||||||
|
@load base/frameworks/broker
|
||||||
|
|
||||||
|
@load policy/frameworks/cluster/controller/config
|
||||||
|
@load policy/frameworks/cluster/controller/log
|
||||||
|
@load policy/frameworks/cluster/controller/request
|
||||||
|
|
||||||
|
@load ./api
|
||||||
|
|
||||||
|
redef ClusterController::role = ClusterController::Types::AGENT;
|
||||||
|
|
||||||
|
# The global configuration as passed to us by the controller
|
||||||
|
global global_config: ClusterController::Types::Configuration;
|
||||||
|
|
||||||
|
# A map to make other instance info accessible
|
||||||
|
global instances: table[string] of ClusterController::Types::Instance;
|
||||||
|
|
||||||
|
# A map for the nodes we run on this instance, via this agent.
|
||||||
|
global nodes: table[string] of ClusterController::Types::Node;
|
||||||
|
|
||||||
|
# The node map employed by the supervisor to describe the cluster
|
||||||
|
# topology to newly forked nodes. We refresh it when we receive
|
||||||
|
# new configurations.
|
||||||
|
global data_cluster: table[string] of Supervisor::ClusterEndpoint;
|
||||||
|
|
||||||
|
event SupervisorControl::create_response(reqid: string, result: string)
|
||||||
|
{
|
||||||
|
local req = ClusterController::Request::lookup(reqid);
|
||||||
|
if ( ClusterController::Request::is_null(req) )
|
||||||
|
return;
|
||||||
|
|
||||||
|
local name = req$supervisor_state$node;
|
||||||
|
|
||||||
|
if ( |result| > 0 )
|
||||||
|
{
|
||||||
|
local msg = fmt("failed to create node %s: %s", name, result);
|
||||||
|
ClusterController::Log::error(msg);
|
||||||
|
event ClusterAgent::API::notify_error(ClusterAgent::name, msg, name);
|
||||||
|
}
|
||||||
|
|
||||||
|
ClusterController::Request::finish(reqid);
|
||||||
|
}
|
||||||
|
|
||||||
|
event SupervisorControl::destroy_response(reqid: string, result: bool)
|
||||||
|
{
|
||||||
|
local req = ClusterController::Request::lookup(reqid);
|
||||||
|
if ( ClusterController::Request::is_null(req) )
|
||||||
|
return;
|
||||||
|
|
||||||
|
local name = req$supervisor_state$node;
|
||||||
|
|
||||||
|
if ( ! result )
|
||||||
|
{
|
||||||
|
local msg = fmt("failed to destroy node %s, %s", name, reqid);
|
||||||
|
ClusterController::Log::error(msg);
|
||||||
|
event ClusterAgent::API::notify_error(ClusterAgent::name, msg, name);
|
||||||
|
}
|
||||||
|
|
||||||
|
ClusterController::Request::finish(reqid);
|
||||||
|
}
|
||||||
|
|
||||||
|
function supervisor_create(nc: Supervisor::NodeConfig)
|
||||||
|
{
|
||||||
|
local req = ClusterController::Request::create();
|
||||||
|
req$supervisor_state = ClusterController::Request::SupervisorState($node = nc$name);
|
||||||
|
event SupervisorControl::create_request(req$id, nc);
|
||||||
|
ClusterController::Log::info(fmt("issued supervisor create for %s, %s", nc$name, req$id));
|
||||||
|
}
|
||||||
|
|
||||||
|
function supervisor_destroy(node: string)
|
||||||
|
{
|
||||||
|
local req = ClusterController::Request::create();
|
||||||
|
req$supervisor_state = ClusterController::Request::SupervisorState($node = node);
|
||||||
|
event SupervisorControl::destroy_request(req$id, node);
|
||||||
|
ClusterController::Log::info(fmt("issued supervisor destroy for %s, %s", node, req$id));
|
||||||
|
}
|
||||||
|
|
||||||
|
event ClusterAgent::API::set_configuration_request(reqid: string, config: ClusterController::Types::Configuration)
|
||||||
|
{
|
||||||
|
ClusterController::Log::info(fmt("rx ClusterAgent::API::set_configuration_request %s", reqid));
|
||||||
|
|
||||||
|
local nodename: string;
|
||||||
|
local node: ClusterController::Types::Node;
|
||||||
|
local nc: Supervisor::NodeConfig;
|
||||||
|
local msg: string;
|
||||||
|
|
||||||
|
# Adopt the global configuration provided.
|
||||||
|
# XXX this can later handle validation and persistence
|
||||||
|
# XXX should do this transactionally, only set when all else worked
|
||||||
|
global_config = config;
|
||||||
|
|
||||||
|
# Refresh the instances table:
|
||||||
|
instances = table();
|
||||||
|
for ( inst in config$instances )
|
||||||
|
instances[inst$name] = inst;
|
||||||
|
|
||||||
|
# Terminate existing nodes
|
||||||
|
for ( nodename in nodes )
|
||||||
|
supervisor_destroy(nodename);
|
||||||
|
|
||||||
|
nodes = table();
|
||||||
|
|
||||||
|
# Refresh the data cluster and nodes tables
|
||||||
|
|
||||||
|
data_cluster = table();
|
||||||
|
for ( node in config$nodes )
|
||||||
|
{
|
||||||
|
if ( node$instance == ClusterAgent::name )
|
||||||
|
nodes[node$name] = node;
|
||||||
|
|
||||||
|
local cep = Supervisor::ClusterEndpoint(
|
||||||
|
$role = node$role,
|
||||||
|
$host = instances[node$instance]$host,
|
||||||
|
$p = node$p);
|
||||||
|
|
||||||
|
if ( node?$interface )
|
||||||
|
cep$interface = node$interface;
|
||||||
|
|
||||||
|
data_cluster[node$name] = cep;
|
||||||
|
}
|
||||||
|
|
||||||
|
# Apply the new configuration via the supervisor
|
||||||
|
|
||||||
|
for ( nodename in nodes )
|
||||||
|
{
|
||||||
|
node = nodes[nodename];
|
||||||
|
nc = Supervisor::NodeConfig($name=nodename);
|
||||||
|
|
||||||
|
if ( ClusterAgent::cluster_directory != "" )
|
||||||
|
nc$directory = ClusterAgent::cluster_directory;
|
||||||
|
|
||||||
|
if ( node?$interface )
|
||||||
|
nc$interface = node$interface;
|
||||||
|
if ( node?$cpu_affinity )
|
||||||
|
nc$cpu_affinity = node$cpu_affinity;
|
||||||
|
if ( node?$scripts )
|
||||||
|
nc$scripts = node$scripts;
|
||||||
|
if ( node?$env )
|
||||||
|
nc$env = node$env;
|
||||||
|
|
||||||
|
# XXX could use options to enable per-node overrides for
|
||||||
|
# directory, stdout, stderr, others?
|
||||||
|
|
||||||
|
nc$cluster = data_cluster;
|
||||||
|
supervisor_create(nc);
|
||||||
|
}
|
||||||
|
|
||||||
|
# XXX this currently doesn not fail if any of above problems occurred,
|
||||||
|
# mainly due to the tediousness of handling the supervisor's response
|
||||||
|
# events asynchonously. The only indication of error will be
|
||||||
|
# notification events to the controller.
|
||||||
|
|
||||||
|
local res = ClusterController::Types::Result(
|
||||||
|
$reqid = reqid,
|
||||||
|
$instance = ClusterAgent::name);
|
||||||
|
|
||||||
|
ClusterController::Log::info(fmt("tx ClusterAgent::API::set_configuration_response %s", reqid));
|
||||||
|
event ClusterAgent::API::set_configuration_response(reqid, res);
|
||||||
|
}
|
||||||
|
|
||||||
|
event Broker::peer_added(peer: Broker::EndpointInfo, msg: string)
|
||||||
|
{
|
||||||
|
# This does not (cannot?) immediately verify that the new peer
|
||||||
|
# is in fact a controller, so we might send this redundantly.
|
||||||
|
# Controllers handle the hello event accordingly.
|
||||||
|
|
||||||
|
local epi = ClusterAgent::endpoint_info();
|
||||||
|
# XXX deal with unexpected peers, unless we're okay with it
|
||||||
|
event ClusterAgent::API::notify_agent_hello(epi$id,
|
||||||
|
to_addr(epi$network$address), ClusterAgent::API::version);
|
||||||
|
}
|
||||||
|
|
||||||
|
event zeek_init()
|
||||||
|
{
|
||||||
|
local epi = ClusterAgent::endpoint_info();
|
||||||
|
local agent_topic = ClusterAgent::topic_prefix + "/" + epi$id;
|
||||||
|
|
||||||
|
# The agent needs to peer with the supervisor -- this doesn't currently
|
||||||
|
# happen automatically. The address defaults to Broker's default, which
|
||||||
|
# relies on ZEEK_DEFAULT_LISTEN_ADDR and so might just be "". Broker
|
||||||
|
# internally falls back to listening on any; we pick 127.0.0.1.
|
||||||
|
local supervisor_addr = Broker::default_listen_address;
|
||||||
|
if ( supervisor_addr == "" )
|
||||||
|
supervisor_addr = "127.0.0.1";
|
||||||
|
|
||||||
|
Broker::peer(supervisor_addr, Broker::default_port, Broker::default_listen_retry);
|
||||||
|
|
||||||
|
# Agents need receive communication targeted at it, and any responses
|
||||||
|
# from the supervisor.
|
||||||
|
Broker::subscribe(agent_topic);
|
||||||
|
Broker::subscribe(SupervisorControl::topic_prefix);
|
||||||
|
|
||||||
|
# Auto-publish a bunch of events. Glob patterns or module-level
|
||||||
|
# auto-publish would be helpful here.
|
||||||
|
Broker::auto_publish(agent_topic, ClusterAgent::API::set_configuration_response);
|
||||||
|
Broker::auto_publish(agent_topic, ClusterAgent::API::notify_agent_hello);
|
||||||
|
Broker::auto_publish(agent_topic, ClusterAgent::API::notify_change);
|
||||||
|
Broker::auto_publish(agent_topic, ClusterAgent::API::notify_error);
|
||||||
|
Broker::auto_publish(agent_topic, ClusterAgent::API::notify_log);
|
||||||
|
|
||||||
|
Broker::auto_publish(SupervisorControl::topic_prefix, SupervisorControl::create_request);
|
||||||
|
Broker::auto_publish(SupervisorControl::topic_prefix, SupervisorControl::create_response);
|
||||||
|
Broker::auto_publish(SupervisorControl::topic_prefix, SupervisorControl::destroy_request);
|
||||||
|
Broker::auto_publish(SupervisorControl::topic_prefix, SupervisorControl::destroy_response);
|
||||||
|
Broker::auto_publish(SupervisorControl::topic_prefix, SupervisorControl::restart_request);
|
||||||
|
Broker::auto_publish(SupervisorControl::topic_prefix, SupervisorControl::restart_response);
|
||||||
|
Broker::auto_publish(SupervisorControl::topic_prefix, SupervisorControl::stop_request);
|
||||||
|
|
||||||
|
# Establish connectivity with the controller.
|
||||||
|
if ( ClusterAgent::controller$address != "0.0.0.0" )
|
||||||
|
{
|
||||||
|
# We connect to the controller.
|
||||||
|
Broker::peer(ClusterAgent::controller$address,
|
||||||
|
ClusterAgent::controller$bound_port,
|
||||||
|
ClusterController::connect_retry);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
# Controller connects to us; listen for it.
|
||||||
|
Broker::listen(cat(epi$network$address), epi$network$bound_port);
|
||||||
|
}
|
||||||
|
|
||||||
|
ClusterController::Log::info("agent is live");
|
||||||
|
}
|
|
@ -0,0 +1,5 @@
|
||||||
|
# The entry point for the cluster controller. It only runs bootstrap logic for
|
||||||
|
# launching via the Supervisor. If we're not running the Supervisor, this does
|
||||||
|
# nothing.
|
||||||
|
|
||||||
|
@load ./boot
|
16
scripts/policy/frameworks/cluster/controller/api.zeek
Normal file
16
scripts/policy/frameworks/cluster/controller/api.zeek
Normal file
|
@ -0,0 +1,16 @@
|
||||||
|
@load ./types
|
||||||
|
|
||||||
|
module ClusterController::API;
|
||||||
|
|
||||||
|
export {
|
||||||
|
const version = 1;
|
||||||
|
|
||||||
|
global get_instances_request: event(reqid: string);
|
||||||
|
global get_instances_response: event(reqid: string,
|
||||||
|
instances: vector of ClusterController::Types::Instance);
|
||||||
|
|
||||||
|
global set_configuration_request: event(reqid: string,
|
||||||
|
config: ClusterController::Types::Configuration);
|
||||||
|
global set_configuration_response: event(reqid: string,
|
||||||
|
result: ClusterController::Types::ResultVec);
|
||||||
|
}
|
29
scripts/policy/frameworks/cluster/controller/boot.zeek
Normal file
29
scripts/policy/frameworks/cluster/controller/boot.zeek
Normal file
|
@ -0,0 +1,29 @@
|
||||||
|
@load ./config
|
||||||
|
|
||||||
|
event zeek_init()
|
||||||
|
{
|
||||||
|
if ( ! Supervisor::is_supervisor() )
|
||||||
|
return;
|
||||||
|
|
||||||
|
local epi = ClusterController::endpoint_info();
|
||||||
|
local sn = Supervisor::NodeConfig($name=epi$id, $bare_mode=T,
|
||||||
|
$scripts=vector("policy/frameworks/cluster/controller/main.zeek"));
|
||||||
|
|
||||||
|
if ( ClusterController::directory != "" )
|
||||||
|
sn$directory = ClusterController::directory;
|
||||||
|
if ( ClusterController::stdout_file != "" )
|
||||||
|
sn$stdout_file = ClusterController::stdout_file;
|
||||||
|
if ( ClusterController::stderr_file != "" )
|
||||||
|
sn$stderr_file = ClusterController::stderr_file;
|
||||||
|
|
||||||
|
# This helps Zeek run controller and agent with a minimal set of scripts.
|
||||||
|
sn$env["ZEEK_CLUSTER_MGMT_NODE"] = "CONTROLLER";
|
||||||
|
|
||||||
|
local res = Supervisor::create(sn);
|
||||||
|
|
||||||
|
if ( res != "" )
|
||||||
|
{
|
||||||
|
print(fmt("error: supervisor could not create controller node: %s", res));
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
}
|
85
scripts/policy/frameworks/cluster/controller/config.zeek
Normal file
85
scripts/policy/frameworks/cluster/controller/config.zeek
Normal file
|
@ -0,0 +1,85 @@
|
||||||
|
@load policy/frameworks/cluster/agent/config
|
||||||
|
|
||||||
|
module ClusterController;
|
||||||
|
|
||||||
|
export {
|
||||||
|
# The name of this controller in the cluster.
|
||||||
|
# Without the environment variable and no redef, this
|
||||||
|
# falls back to "controller-<hostname>".
|
||||||
|
const name = getenv("ZEEK_CONTROLLER_NAME") &redef;
|
||||||
|
|
||||||
|
# Controller stdout/stderr log files to produce in Zeek's
|
||||||
|
# working directory. If empty, no such logs will result.
|
||||||
|
const stdout_file = "controller.stdout" &redef;
|
||||||
|
const stderr_file = "controller.stderr" &redef;
|
||||||
|
|
||||||
|
# The address and port the controller listens on. When
|
||||||
|
# undefined, falls back to the default_address, which you can
|
||||||
|
# likewise customize.
|
||||||
|
const listen_address = getenv("ZEEK_CONTROLLER_ADDR") &redef;
|
||||||
|
const default_address = Broker::default_listen_address &redef;
|
||||||
|
|
||||||
|
const listen_port = getenv("ZEEK_CONTROLLER_PORT") &redef;
|
||||||
|
const default_port = 2150/tcp &redef;
|
||||||
|
|
||||||
|
# A more aggressive default retry interval (vs default 30s)
|
||||||
|
const connect_retry = 1sec &redef;
|
||||||
|
|
||||||
|
# The controller listens for messages on this topic:
|
||||||
|
const topic = "zeek/cluster-control/controller" &redef;
|
||||||
|
|
||||||
|
# The set of agents to interact with. When this is non-empty
|
||||||
|
# at startup, the controller contacts the agents; when it is
|
||||||
|
# empty, it waits for agents to connect. They key is a name of
|
||||||
|
# each instance. This should match the $name member of the
|
||||||
|
# instance records.
|
||||||
|
const instances: table[string] of ClusterController::Types::Instance = { } &redef;
|
||||||
|
|
||||||
|
# The role of this node in cluster management. Agent and
|
||||||
|
# controller both redef this. Used during logging.
|
||||||
|
const role = ClusterController::Types::NONE &redef;
|
||||||
|
|
||||||
|
# Agent and controller currently log only, not via the data cluster's
|
||||||
|
# logger. (This might get added later.) For now, this means that
|
||||||
|
# if both write to the same log file, it gets garbled. The following
|
||||||
|
# lets you specify the working directory specifically for the agent.
|
||||||
|
const directory = "" &redef;
|
||||||
|
|
||||||
|
# The following functions return the effective network endpoint
|
||||||
|
# information for this controller, in two related forms.
|
||||||
|
global network_info: function(): Broker::NetworkInfo;
|
||||||
|
global endpoint_info: function(): Broker::EndpointInfo;
|
||||||
|
}
|
||||||
|
|
||||||
|
function network_info(): Broker::NetworkInfo
|
||||||
|
{
|
||||||
|
local ni: Broker::NetworkInfo;
|
||||||
|
|
||||||
|
if ( ClusterController::listen_address != "" )
|
||||||
|
ni$address = ClusterController::listen_address;
|
||||||
|
else if ( ClusterController::default_address != "" )
|
||||||
|
ni$address = ClusterController::default_address;
|
||||||
|
else
|
||||||
|
ni$address = "127.0.0.1";
|
||||||
|
|
||||||
|
if ( ClusterController::listen_port != "" )
|
||||||
|
ni$bound_port = to_port(ClusterController::listen_port);
|
||||||
|
else
|
||||||
|
ni$bound_port = ClusterController::default_port;
|
||||||
|
|
||||||
|
return ni;
|
||||||
|
}
|
||||||
|
|
||||||
|
function endpoint_info(): Broker::EndpointInfo
|
||||||
|
{
|
||||||
|
local epi: Broker::EndpointInfo;
|
||||||
|
|
||||||
|
if ( ClusterController::name != "" )
|
||||||
|
epi$id = ClusterController::name;
|
||||||
|
else
|
||||||
|
epi$id = fmt("controller-%s", gethostname());
|
||||||
|
|
||||||
|
epi$network = network_info();
|
||||||
|
|
||||||
|
return epi;
|
||||||
|
}
|
109
scripts/policy/frameworks/cluster/controller/log.zeek
Normal file
109
scripts/policy/frameworks/cluster/controller/log.zeek
Normal file
|
@ -0,0 +1,109 @@
|
||||||
|
@load ./config
|
||||||
|
|
||||||
|
module ClusterController::Log;
|
||||||
|
|
||||||
|
export {
|
||||||
|
## The cluster logging stream identifier.
|
||||||
|
redef enum Log::ID += { LOG };
|
||||||
|
|
||||||
|
## A default logging policy hook for the stream.
|
||||||
|
global log_policy: Log::PolicyHook;
|
||||||
|
|
||||||
|
type Level: enum {
|
||||||
|
DEBUG,
|
||||||
|
INFO,
|
||||||
|
WARNING,
|
||||||
|
ERROR,
|
||||||
|
};
|
||||||
|
|
||||||
|
## The record type which contains the column fields of the cluster log.
|
||||||
|
type Info: record {
|
||||||
|
## The time at which a cluster message was generated.
|
||||||
|
ts: time;
|
||||||
|
## The name of the node that is creating the log record.
|
||||||
|
node: string;
|
||||||
|
## Log level of this message, converted from the above Level enum
|
||||||
|
level: string;
|
||||||
|
## The role of the node, translated from ClusterController::Types::Role.
|
||||||
|
role: string;
|
||||||
|
## A message indicating information about cluster controller operation.
|
||||||
|
message: string;
|
||||||
|
} &log;
|
||||||
|
|
||||||
|
global log_level = DEBUG &redef;
|
||||||
|
|
||||||
|
global info: function(message: string);
|
||||||
|
global warning: function(message: string);
|
||||||
|
global error: function(message: string);
|
||||||
|
}
|
||||||
|
|
||||||
|
# Enum translations to strings. This avoids those enums being reported
|
||||||
|
# with full qualifications in the logs, which is too verbose.
|
||||||
|
|
||||||
|
global l2s: table[Level] of string = {
|
||||||
|
[DEBUG] = "DEBUG",
|
||||||
|
[INFO] = "INFO",
|
||||||
|
[WARNING] = "WARNING",
|
||||||
|
[ERROR] = "ERROR",
|
||||||
|
};
|
||||||
|
|
||||||
|
global r2s: table[ClusterController::Types::Role] of string = {
|
||||||
|
[ClusterController::Types::AGENT] = "AGENT",
|
||||||
|
[ClusterController::Types::CONTROLLER] = "CONTROLLER",
|
||||||
|
};
|
||||||
|
|
||||||
|
function debug(message: string)
|
||||||
|
{
|
||||||
|
if ( enum_to_int(log_level) > enum_to_int(DEBUG) )
|
||||||
|
return;
|
||||||
|
|
||||||
|
local node = Supervisor::node();
|
||||||
|
Log::write(LOG, [$ts=network_time(), $node=node$name, $level=l2s[DEBUG],
|
||||||
|
$role=r2s[ClusterController::role], $message=message]);
|
||||||
|
}
|
||||||
|
|
||||||
|
function info(message: string)
|
||||||
|
{
|
||||||
|
if ( enum_to_int(log_level) > enum_to_int(INFO) )
|
||||||
|
return;
|
||||||
|
|
||||||
|
local node = Supervisor::node();
|
||||||
|
Log::write(LOG, [$ts=network_time(), $node=node$name, $level=l2s[INFO],
|
||||||
|
$role=r2s[ClusterController::role], $message=message]);
|
||||||
|
}
|
||||||
|
|
||||||
|
function warning(message: string)
|
||||||
|
{
|
||||||
|
if ( enum_to_int(log_level) > enum_to_int(WARNING) )
|
||||||
|
return;
|
||||||
|
|
||||||
|
local node = Supervisor::node();
|
||||||
|
Log::write(LOG, [$ts=network_time(), $node=node$name, $level=l2s[WARNING],
|
||||||
|
$role=r2s[ClusterController::role], $message=message]);
|
||||||
|
}
|
||||||
|
|
||||||
|
function error(message: string)
|
||||||
|
{
|
||||||
|
if ( enum_to_int(log_level) > enum_to_int(ERROR) )
|
||||||
|
return;
|
||||||
|
|
||||||
|
local node = Supervisor::node();
|
||||||
|
Log::write(LOG, [$ts=network_time(), $node=node$name, $level=l2s[ERROR],
|
||||||
|
$role=r2s[ClusterController::role], $message=message]);
|
||||||
|
}
|
||||||
|
|
||||||
|
event zeek_init()
|
||||||
|
{
|
||||||
|
if ( ! Supervisor::is_supervised() )
|
||||||
|
return;
|
||||||
|
|
||||||
|
local node = Supervisor::node();
|
||||||
|
|
||||||
|
# Defining the stream outside of the stream creation call sidesteps
|
||||||
|
# the coverage.find-bro-logs test, which tries to inventory all logs.
|
||||||
|
# This log isn't yet ready for that level of scrutiny.
|
||||||
|
local stream = Log::Stream($columns=Info, $path=fmt("cluster-%s", node$name),
|
||||||
|
$policy=log_policy);
|
||||||
|
|
||||||
|
Log::create_stream(ClusterController::Log::LOG, stream);
|
||||||
|
}
|
250
scripts/policy/frameworks/cluster/controller/main.zeek
Normal file
250
scripts/policy/frameworks/cluster/controller/main.zeek
Normal file
|
@ -0,0 +1,250 @@
|
||||||
|
@load base/frameworks/broker
|
||||||
|
|
||||||
|
@load policy/frameworks/cluster/agent/config
|
||||||
|
@load policy/frameworks/cluster/agent/api
|
||||||
|
|
||||||
|
@load ./api
|
||||||
|
@load ./log
|
||||||
|
@load ./request
|
||||||
|
|
||||||
|
redef ClusterController::role = ClusterController::Types::CONTROLLER;
|
||||||
|
|
||||||
|
event ClusterAgent::API::notify_agent_hello(instance: string, host: addr, api_version: count)
|
||||||
|
{
|
||||||
|
# See if we already know about this agent; if not, register
|
||||||
|
# it.
|
||||||
|
#
|
||||||
|
# XXX protection against rogue agents?
|
||||||
|
|
||||||
|
if ( instance in ClusterController::instances )
|
||||||
|
{
|
||||||
|
# Do nothing, unless this known agent checks in with a mismatching
|
||||||
|
# API version, in which case we kick it out.
|
||||||
|
if ( api_version != ClusterController::API::version )
|
||||||
|
{
|
||||||
|
local inst = ClusterController::instances[instance];
|
||||||
|
if ( inst?$listen_port )
|
||||||
|
{
|
||||||
|
# We peered with this instance, unpeer.
|
||||||
|
Broker::unpeer(cat(inst$host), inst$listen_port );
|
||||||
|
# XXX what to do if they connected to us?
|
||||||
|
}
|
||||||
|
delete ClusterController::instances[instance];
|
||||||
|
}
|
||||||
|
|
||||||
|
# Update the instance name in the pointed-to record, in case it
|
||||||
|
# was previously named otherwise. Not being too picky here allows
|
||||||
|
# the user some leeway in spelling out the original config.
|
||||||
|
ClusterController::instances[instance]$name = instance;
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( api_version != ClusterController::API::version )
|
||||||
|
{
|
||||||
|
ClusterController::Log::warning(
|
||||||
|
fmt("agent %s/%s speaks incompatible agent protocol (%s, need %s), unpeering",
|
||||||
|
instance, host, api_version, ClusterController::API::version));
|
||||||
|
}
|
||||||
|
|
||||||
|
ClusterController::instances[instance] = ClusterController::Types::Instance($name=instance, $host=host);
|
||||||
|
ClusterController::Log::info(fmt("instance %s/%s has checked in", instance, host));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
event ClusterAgent::API::notify_change(instance: string, n: ClusterController::Types::Node,
|
||||||
|
old: ClusterController::Types::State,
|
||||||
|
new: ClusterController::Types::State)
|
||||||
|
{
|
||||||
|
# XXX TODO
|
||||||
|
}
|
||||||
|
|
||||||
|
event ClusterAgent::API::notify_error(instance: string, msg: string, node: string)
|
||||||
|
{
|
||||||
|
# XXX TODO
|
||||||
|
}
|
||||||
|
|
||||||
|
event ClusterAgent::API::notify_log(instance: string, msg: string, node: string)
|
||||||
|
{
|
||||||
|
# XXX TODO
|
||||||
|
}
|
||||||
|
|
||||||
|
event ClusterAgent::API::set_configuration_response(reqid: string, result: ClusterController::Types::Result)
|
||||||
|
{
|
||||||
|
ClusterController::Log::info(fmt("rx ClusterAgent::API::set_configuration_response %s", reqid));
|
||||||
|
|
||||||
|
# Retrieve state for the request we just got a response to
|
||||||
|
local areq = ClusterController::Request::lookup(reqid);
|
||||||
|
if ( ClusterController::Request::is_null(areq) )
|
||||||
|
return;
|
||||||
|
|
||||||
|
# Record the result and mark the request as done. This also
|
||||||
|
# marks the request as done in the parent-level request, since
|
||||||
|
# these records are stored by reference.
|
||||||
|
areq$results[0] = result; # We only have a single result here atm
|
||||||
|
areq$finished = T;
|
||||||
|
|
||||||
|
# Update the original request from the client:
|
||||||
|
local req = ClusterController::Request::lookup(areq$parent_id);
|
||||||
|
if ( ClusterController::Request::is_null(req) )
|
||||||
|
return;
|
||||||
|
|
||||||
|
# If there are any requests to the agents still unfinished,
|
||||||
|
# we're not done yet.
|
||||||
|
for ( i in req$set_configuration_state$requests )
|
||||||
|
if ( ! req$set_configuration_state$requests[i]$finished )
|
||||||
|
return;
|
||||||
|
|
||||||
|
# All set_configuration requests to instances are done, so respond
|
||||||
|
# back to client. We need to compose the result, aggregating
|
||||||
|
# the results we got from the requests to the agents. In the
|
||||||
|
# end we have one Result per instance requested in the
|
||||||
|
# original set_configuration_request.
|
||||||
|
#
|
||||||
|
# XXX we can likely generalize result aggregation in the request module.
|
||||||
|
for ( i in req$set_configuration_state$requests )
|
||||||
|
{
|
||||||
|
local r = req$set_configuration_state$requests[i];
|
||||||
|
|
||||||
|
local success = T;
|
||||||
|
local errors: string_vec;
|
||||||
|
local instance = "";
|
||||||
|
|
||||||
|
for ( j in r$results )
|
||||||
|
{
|
||||||
|
local res = r$results[j];
|
||||||
|
instance = res$instance;
|
||||||
|
|
||||||
|
if ( res$success )
|
||||||
|
next;
|
||||||
|
|
||||||
|
success = F;
|
||||||
|
errors += fmt("node %s failed: %s", res$node, res$error);
|
||||||
|
}
|
||||||
|
|
||||||
|
req$results += ClusterController::Types::Result(
|
||||||
|
$reqid = req$id,
|
||||||
|
$instance = instance,
|
||||||
|
$success = success,
|
||||||
|
$error = join_string_vec(errors, ", ")
|
||||||
|
);
|
||||||
|
|
||||||
|
ClusterController::Request::finish(r$id);
|
||||||
|
}
|
||||||
|
|
||||||
|
ClusterController::Log::info(fmt("tx ClusterController::API::set_configuration_response %s", req$id));
|
||||||
|
event ClusterController::API::set_configuration_response(req$id, req$results);
|
||||||
|
ClusterController::Request::finish(req$id);
|
||||||
|
}
|
||||||
|
|
||||||
|
event ClusterController::API::set_configuration_request(reqid: string, config: ClusterController::Types::Configuration)
|
||||||
|
{
|
||||||
|
ClusterController::Log::info(fmt("rx ClusterController::API::set_configuration_request %s", reqid));
|
||||||
|
|
||||||
|
local req = ClusterController::Request::create(reqid);
|
||||||
|
req$set_configuration_state = ClusterController::Request::SetConfigurationState();
|
||||||
|
|
||||||
|
# Compare new configuration to the current one and send updates
|
||||||
|
# to the instances as needed.
|
||||||
|
if ( config?$instances )
|
||||||
|
{
|
||||||
|
# XXX properly handle instance update: connect to new instances provided
|
||||||
|
# when they are listening, accept connections from new instances that are
|
||||||
|
# not
|
||||||
|
for ( inst in config$instances )
|
||||||
|
{
|
||||||
|
if ( inst$name !in ClusterController::instances )
|
||||||
|
{
|
||||||
|
local res = ClusterController::Types::Result($reqid=reqid, $instance=inst$name);
|
||||||
|
res$error = fmt("instance %s is unknown, skipping", inst$name);
|
||||||
|
req$results += res;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# XXX validate the configuration:
|
||||||
|
# - Are node instances among defined instances?
|
||||||
|
# - Are all names unique?
|
||||||
|
# - Are any node options understood?
|
||||||
|
# - Do node types with optional fields have required values?
|
||||||
|
# ...
|
||||||
|
|
||||||
|
# Transmit the configuration on to the agents. They need to be aware of
|
||||||
|
# each other's location and nodes, so the data cluster nodes can connect
|
||||||
|
# (for example, so a worker on instance 1 can connect to a logger on
|
||||||
|
# instance 2).
|
||||||
|
for ( name in ClusterController::instances )
|
||||||
|
{
|
||||||
|
local agent_topic = ClusterAgent::topic_prefix + "/" + name;
|
||||||
|
local areq = ClusterController::Request::create();
|
||||||
|
areq$parent_id = reqid;
|
||||||
|
|
||||||
|
# We track the requests sent off to each agent. As the
|
||||||
|
# responses come in, we can check them off as completed,
|
||||||
|
# and once all are, we respond back to the client.
|
||||||
|
req$set_configuration_state$requests += areq;
|
||||||
|
|
||||||
|
# XXX could also broadcast just once on the agent prefix, but
|
||||||
|
# explicit request/response pairs for each agent seems cleaner.
|
||||||
|
ClusterController::Log::info(fmt("tx ClusterAgent::API::set_configuration_request %s to %s",
|
||||||
|
areq$id, name));
|
||||||
|
Broker::publish(agent_topic, ClusterAgent::API::set_configuration_request, areq$id, config);
|
||||||
|
}
|
||||||
|
|
||||||
|
# Response event gets sent via the agents' reponse event.
|
||||||
|
}
|
||||||
|
|
||||||
|
event ClusterController::API::get_instances_request(reqid: string)
|
||||||
|
{
|
||||||
|
ClusterController::Log::info(fmt("rx ClusterController::API::set_instances_request %s", reqid));
|
||||||
|
|
||||||
|
local insts: vector of ClusterController::Types::Instance;
|
||||||
|
|
||||||
|
for ( i in ClusterController::instances )
|
||||||
|
insts += ClusterController::instances[i];
|
||||||
|
|
||||||
|
ClusterController::Log::info(fmt("tx ClusterController::API::get_instances_response %s", reqid));
|
||||||
|
event ClusterController::API::get_instances_response(reqid, insts);
|
||||||
|
}
|
||||||
|
|
||||||
|
event zeek_init()
|
||||||
|
{
|
||||||
|
# Controller always listens -- it needs to be able to respond
|
||||||
|
# to the Zeek client. This port is also used by the agents
|
||||||
|
# if they connect to the client.
|
||||||
|
local cni = ClusterController::network_info();
|
||||||
|
Broker::listen(cat(cni$address), cni$bound_port);
|
||||||
|
|
||||||
|
Broker::subscribe(ClusterAgent::topic_prefix);
|
||||||
|
Broker::subscribe(ClusterController::topic);
|
||||||
|
|
||||||
|
Broker::auto_publish(ClusterController::topic,
|
||||||
|
ClusterController::API::get_instances_response);
|
||||||
|
Broker::auto_publish(ClusterController::topic,
|
||||||
|
ClusterController::API::set_configuration_response);
|
||||||
|
|
||||||
|
if ( |ClusterController::instances| > 0 )
|
||||||
|
{
|
||||||
|
# We peer with the agents -- otherwise, the agents peer
|
||||||
|
# with (i.e., connect to) us.
|
||||||
|
for ( i in ClusterController::instances )
|
||||||
|
{
|
||||||
|
local inst = ClusterController::instances[i];
|
||||||
|
|
||||||
|
if ( ! inst?$listen_port )
|
||||||
|
{
|
||||||
|
# XXX config error -- this must be there
|
||||||
|
next;
|
||||||
|
}
|
||||||
|
|
||||||
|
Broker::peer(cat(inst$host), inst$listen_port,
|
||||||
|
ClusterController::connect_retry);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# If ClusterController::instances is empty, agents peer with
|
||||||
|
# us and we do nothing. We'll build up state as the
|
||||||
|
# notify_agent_hello() events come int.
|
||||||
|
|
||||||
|
ClusterController::Log::info("controller is live");
|
||||||
|
}
|
86
scripts/policy/frameworks/cluster/controller/request.zeek
Normal file
86
scripts/policy/frameworks/cluster/controller/request.zeek
Normal file
|
@ -0,0 +1,86 @@
|
||||||
|
@load ./types
|
||||||
|
|
||||||
|
module ClusterController::Request;
|
||||||
|
|
||||||
|
export {
|
||||||
|
type Request: record {
|
||||||
|
id: string;
|
||||||
|
parent_id: string &optional;
|
||||||
|
};
|
||||||
|
|
||||||
|
# API-specific state. XXX we may be able to generalize after this
|
||||||
|
# has settled a bit more.
|
||||||
|
|
||||||
|
# State specific to the set_configuration request/response events
|
||||||
|
type SetConfigurationState: record {
|
||||||
|
requests: vector of Request &default=vector();
|
||||||
|
};
|
||||||
|
|
||||||
|
# State specific to the set_nodes request/response events
|
||||||
|
type SetNodesState: record {
|
||||||
|
requests: vector of Request &default=vector();
|
||||||
|
};
|
||||||
|
|
||||||
|
# State specific to supervisor interactions
|
||||||
|
type SupervisorState: record {
|
||||||
|
node: string;
|
||||||
|
};
|
||||||
|
|
||||||
|
# The redef is a workaround so we can use the Request type
|
||||||
|
# while it is still being defined
|
||||||
|
redef record Request += {
|
||||||
|
results: ClusterController::Types::ResultVec &default=vector();
|
||||||
|
finished: bool &default=F;
|
||||||
|
|
||||||
|
set_configuration_state: SetConfigurationState &optional;
|
||||||
|
set_nodes_state: SetNodesState &optional;
|
||||||
|
supervisor_state: SupervisorState &optional;
|
||||||
|
};
|
||||||
|
|
||||||
|
global null_req = Request($id="", $finished=T);
|
||||||
|
|
||||||
|
global create: function(reqid: string &default=unique_id("")): Request;
|
||||||
|
global lookup: function(reqid: string): Request;
|
||||||
|
global finish: function(reqid: string): bool;
|
||||||
|
|
||||||
|
global is_null: function(request: Request): bool;
|
||||||
|
}
|
||||||
|
|
||||||
|
# XXX this needs a mechanism for expiring stale requests
|
||||||
|
global requests: table[string] of Request;
|
||||||
|
|
||||||
|
function create(reqid: string): Request
|
||||||
|
{
|
||||||
|
local ret = Request($id=reqid);
|
||||||
|
requests[reqid] = ret;
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
function lookup(reqid: string): Request
|
||||||
|
{
|
||||||
|
if ( reqid in requests )
|
||||||
|
return requests[reqid];
|
||||||
|
|
||||||
|
return null_req;
|
||||||
|
}
|
||||||
|
|
||||||
|
function finish(reqid: string): bool
|
||||||
|
{
|
||||||
|
if ( reqid !in requests )
|
||||||
|
return F;
|
||||||
|
|
||||||
|
local req = requests[reqid];
|
||||||
|
delete requests[reqid];
|
||||||
|
|
||||||
|
req$finished = T;
|
||||||
|
|
||||||
|
return T;
|
||||||
|
}
|
||||||
|
|
||||||
|
function is_null(request: Request): bool
|
||||||
|
{
|
||||||
|
if ( request$id == "" )
|
||||||
|
return T;
|
||||||
|
|
||||||
|
return F;
|
||||||
|
}
|
80
scripts/policy/frameworks/cluster/controller/types.zeek
Normal file
80
scripts/policy/frameworks/cluster/controller/types.zeek
Normal file
|
@ -0,0 +1,80 @@
|
||||||
|
# Types for the Cluster Controller framework. These are used by both agent and controller.
|
||||||
|
|
||||||
|
module ClusterController::Types;
|
||||||
|
|
||||||
|
export {
|
||||||
|
## Management infrastructure node type. This intentionally does not
|
||||||
|
## include the data cluster node types (worker, logger, etc) -- those
|
||||||
|
## continue to be managed by the cluster framework.
|
||||||
|
type Role: enum {
|
||||||
|
NONE,
|
||||||
|
AGENT,
|
||||||
|
CONTROLLER,
|
||||||
|
};
|
||||||
|
|
||||||
|
## A Zeek-side option with value.
|
||||||
|
type Option: record {
|
||||||
|
name: string; # Name of option
|
||||||
|
value: string; # Value of option
|
||||||
|
};
|
||||||
|
|
||||||
|
## Configuration describing a Zeek instance running a Cluster
|
||||||
|
## Agent. Normally, there'll be one instance per cluster
|
||||||
|
## system: a single physical system.
|
||||||
|
type Instance: record {
|
||||||
|
# Unique, human-readable instance name
|
||||||
|
name: string;
|
||||||
|
# IP address of system
|
||||||
|
host: addr;
|
||||||
|
# Agent listening port. Not needed if agents connect to controller.
|
||||||
|
listen_port: port &optional;
|
||||||
|
};
|
||||||
|
|
||||||
|
## State that a Cluster Node can be in. State changes trigger an
|
||||||
|
## API notification (see notify_change()).
|
||||||
|
type State: enum {
|
||||||
|
Running, # Running and operating normally
|
||||||
|
Stopped, # Explicitly stopped
|
||||||
|
Failed, # Failed to start; and permanently halted
|
||||||
|
Crashed, # Crashed, will be restarted,
|
||||||
|
Unknown, # State not known currently (e.g., because of lost connectivity)
|
||||||
|
};
|
||||||
|
|
||||||
|
## Configuration describing a Cluster Node process.
|
||||||
|
type Node: record {
|
||||||
|
name: string; # Cluster-unique, human-readable node name
|
||||||
|
instance: string; # Name of instance where node is to run
|
||||||
|
p: port; # Port on which this node will listen
|
||||||
|
role: Supervisor::ClusterRole; # Role of the node.
|
||||||
|
state: State; # Desired, or current, run state.
|
||||||
|
scripts: vector of string &optional; # Additional Zeek scripts for node
|
||||||
|
options: set[Option] &optional; # Zeek options for node
|
||||||
|
interface: string &optional; # Interface to sniff
|
||||||
|
cpu_affinity: int &optional; # CPU/core number to pin to
|
||||||
|
env: table[string] of string &default=table(); # Custom environment vars
|
||||||
|
};
|
||||||
|
|
||||||
|
# Data structure capturing a cluster's complete configuration.
|
||||||
|
type Configuration: record {
|
||||||
|
id: string &default=unique_id(""); # Unique identifier for a particular configuration
|
||||||
|
|
||||||
|
## The instances in the cluster.
|
||||||
|
## XXX we may be able to make this optional
|
||||||
|
instances: set[Instance];
|
||||||
|
|
||||||
|
## The set of nodes in the cluster, as distributed over the instances.
|
||||||
|
nodes: set[Node];
|
||||||
|
};
|
||||||
|
|
||||||
|
# Return value for request-response API event pairs
|
||||||
|
type Result: record {
|
||||||
|
reqid: string; # Request ID of operation this result refers to
|
||||||
|
instance: string; # Name of associated instance (for context)
|
||||||
|
success: bool &default=T; # True if successful
|
||||||
|
data: any &optional; # Addl data returned for successful operation
|
||||||
|
error: string &default=""; # Descriptive error on failure
|
||||||
|
node: string &optional; # Name of associated node (for context)
|
||||||
|
};
|
||||||
|
|
||||||
|
type ResultVec: vector of Result;
|
||||||
|
}
|
Loading…
Add table
Add a link
Reference in a new issue