From d2903bb6454c2f51b483872f635e32b10a464966 Mon Sep 17 00:00:00 2001 From: Christian Kreibich Date: Thu, 12 May 2022 09:59:51 -0700 Subject: [PATCH 01/15] Management framework: increase robustness of agent/controller naming The fallback mechanism when no explicit agent/controller names are configured didn't work properly, because many places in the code relied on accessing the name via the variables meant for explicit configuration, such as Management::Agent::name. Agent and controller now offer functions for computing the correct effective name, and we use that throughout. --- .../frameworks/management/agent/api.zeek | 7 +++++-- .../frameworks/management/agent/config.zeek | 20 +++++++++++++------ .../frameworks/management/agent/main.zeek | 14 ++++++------- .../frameworks/management/controller/api.zeek | 5 ++++- .../management/controller/config.zeek | 17 +++++++++++----- 5 files changed, 42 insertions(+), 21 deletions(-) diff --git a/scripts/policy/frameworks/management/agent/api.zeek b/scripts/policy/frameworks/management/agent/api.zeek index 8ba47ee67d..f8dc1239a7 100644 --- a/scripts/policy/frameworks/management/agent/api.zeek +++ b/scripts/policy/frameworks/management/agent/api.zeek @@ -79,6 +79,7 @@ export { ## nodes: a set of cluster node names (e.g. "worker-01") to retrieve ## the values from. An empty set, supplied by default, means ## retrieval from all nodes managed by the agent. + ## global node_dispatch_request: event(reqid: string, action: vector of string, nodes: set[string] &default=set()); @@ -93,6 +94,7 @@ export { ## agent. Upon success, each :zeek:see:`Management::Result` record's ## data member contains the dispatches' response in a data type ## appropriate for the respective dispatch. + ## global node_dispatch_response: event(reqid: string, result: Management::ResultVec); @@ -145,7 +147,8 @@ export { ## communicate with. It is a controller-level equivalent of ## `:zeek:see:`Broker::peer_added`. ## - ## instance: an instance name, really the agent's name as per :zeek:see:`Management::Agent::name`. + ## instance: an instance name, really the agent's name as per + ## :zeek:see:`Management::Agent::get_name`. ## ## host: the IP address of the agent. (This may change in the future.) ## @@ -168,4 +171,4 @@ export { # Report informational message. global notify_log: event(instance: string, msg: string, node: string &default=""); - } +} diff --git a/scripts/policy/frameworks/management/agent/config.zeek b/scripts/policy/frameworks/management/agent/config.zeek index 29567b8f5e..1a94fc8df1 100644 --- a/scripts/policy/frameworks/management/agent/config.zeek +++ b/scripts/policy/frameworks/management/agent/config.zeek @@ -15,7 +15,7 @@ export { ## Agent stdout log configuration. If the string is non-empty, Zeek will ## produce a free-form log (i.e., not one governed by Zeek's logging ## framework) in Zeek's working directory. The final log's name is - ## ".", where the name is taken from :zeek:see:`Management::Agent::name`, + ## ".", where the name is taken from :zeek:see:`Management::Agent::get_name`, ## and the suffix is defined by the following variable. If left empty, ## no such log results. ## @@ -44,7 +44,7 @@ export { const default_port = 2151/tcp &redef; ## The agent's Broker topic prefix. For its own communication, the agent - ## suffixes this with "/", based on :zeek:see:`Management::Agent::name`. + ## suffixes this with "/", based on :zeek:see:`Management::Agent::get_name`. const topic_prefix = "zeek/management/agent" &redef; ## The network coordinates of the controller. When defined, the agent @@ -66,6 +66,9 @@ export { ## cluster nodes. const cluster_directory = "" &redef; + ## Returns the effective name of this agent. + global get_name: function(): string; + ## Returns a :zeek:see:`Management::Instance` describing this ## instance (its agent name plus listening address/port, as applicable). global instance: function(): Management::Instance; @@ -76,6 +79,14 @@ export { global endpoint_info: function(): Broker::EndpointInfo; } +function get_name(): string + { + if ( name != "" ) + return name; + + return fmt("agent-%s", gethostname()); + } + function instance(): Management::Instance { local epi = endpoint_info(); @@ -89,10 +100,7 @@ function endpoint_info(): Broker::EndpointInfo local epi: Broker::EndpointInfo; local network: Broker::NetworkInfo; - if ( Management::Agent::name != "" ) - epi$id = Management::Agent::name; - else - epi$id = fmt("agent-%s", gethostname()); + epi$id = get_name(); if ( Management::Agent::listen_address != "" ) network$address = Management::Agent::listen_address; diff --git a/scripts/policy/frameworks/management/agent/main.zeek b/scripts/policy/frameworks/management/agent/main.zeek index 45993b29c4..4d977939a4 100644 --- a/scripts/policy/frameworks/management/agent/main.zeek +++ b/scripts/policy/frameworks/management/agent/main.zeek @@ -77,7 +77,7 @@ event SupervisorControl::create_response(reqid: string, result: string) Management::Log::error(msg); Broker::publish(agent_topic(), Management::Agent::API::notify_error, - Management::Agent::name, msg, name); + Management::Agent::get_name(), msg, name); } Management::Request::finish(reqid); @@ -97,7 +97,7 @@ event SupervisorControl::destroy_response(reqid: string, result: bool) Management::Log::error(msg); Broker::publish(agent_topic(), Management::Agent::API::notify_error, - Management::Agent::name, msg, name); + Management::Agent::get_name(), msg, name); } Management::Request::finish(reqid); @@ -150,7 +150,7 @@ event Management::Agent::API::set_configuration_request(reqid: string, config: M for ( node in config$nodes ) { - if ( node$instance == Management::Agent::name ) + if ( node$instance == Management::Agent::get_name() ) g_nodes[node$name] = node; # The cluster and supervisor frameworks require a port for every @@ -214,7 +214,7 @@ event Management::Agent::API::set_configuration_request(reqid: string, config: M { local res = Management::Result( $reqid = reqid, - $instance = Management::Agent::name); + $instance = Management::Agent::get_name()); Management::Log::info(fmt("tx Management::Agent::API::set_configuration_response %s", Management::result_to_string(res))); @@ -232,7 +232,7 @@ event SupervisorControl::status_response(reqid: string, result: Supervisor::Stat Management::Request::finish(reqid); local res = Management::Result( - $reqid = req$parent_id, $instance = Management::Agent::name); + $reqid = req$parent_id, $instance = Management::Agent::get_name()); local node_statuses: Management::NodeStatusVec; @@ -494,7 +494,7 @@ event Management::Agent::API::agent_welcome_request(reqid: string) local res = Management::Result( $reqid = reqid, - $instance = Management::Agent::name); + $instance = Management::Agent::get_name()); Management::Log::info(fmt("tx Management::Agent::API::agent_welcome_response %s", Management::result_to_string(res))); @@ -515,7 +515,7 @@ event Management::Agent::API::agent_standby_request(reqid: string) local res = Management::Result( $reqid = reqid, - $instance = Management::Agent::name); + $instance = Management::Agent::get_name()); Management::Log::info(fmt("tx Management::Agent::API::agent_standby_response %s", Management::result_to_string(res))); diff --git a/scripts/policy/frameworks/management/controller/api.zeek b/scripts/policy/frameworks/management/controller/api.zeek index d3822f3d02..b840aecab6 100644 --- a/scripts/policy/frameworks/management/controller/api.zeek +++ b/scripts/policy/frameworks/management/controller/api.zeek @@ -97,6 +97,7 @@ export { ## member is a vector of :zeek:see:`Management::NodeStatus` ## records, covering the nodes at that instance. Results may also indicate ## failure, with error messages indicating what went wrong. + ## global get_nodes_response: event(reqid: string, result: Management::ResultVec); @@ -115,6 +116,7 @@ export { ## nodes: a set of cluster node names (e.g. "worker-01") to retrieve ## the values from. An empty set, supplied by default, means ## retrieval from all current cluster nodes. + ## global get_id_value_request: event(reqid: string, id: string, nodes: set[string] &default=set()); @@ -128,6 +130,7 @@ export { ## data field contains a string with the JSON rendering (as produced ## by :zeek:id:`to_json`, including the error strings it potentially ## returns). + ## global get_id_value_response: event(reqid: string, result: Management::ResultVec); @@ -167,4 +170,4 @@ export { ## instances: the set of instance names now ready. ## global notify_agents_ready: event(instances: set[string]); - } +} diff --git a/scripts/policy/frameworks/management/controller/config.zeek b/scripts/policy/frameworks/management/controller/config.zeek index c97c11bb6a..83c067e76c 100644 --- a/scripts/policy/frameworks/management/controller/config.zeek +++ b/scripts/policy/frameworks/management/controller/config.zeek @@ -49,6 +49,9 @@ export { ## output gets garbled. const directory = "" &redef; + ## Returns the effective name of the controller. + global get_name: function(): string; + ## Returns a :zeek:see:`Broker::NetworkInfo` record describing the controller. global network_info: function(): Broker::NetworkInfo; @@ -56,6 +59,14 @@ export { global endpoint_info: function(): Broker::EndpointInfo; } +function get_name(): string + { + if ( name != "" ) + return name; + + return fmt("controller-%s", gethostname()); + } + function network_info(): Broker::NetworkInfo { local ni: Broker::NetworkInfo; @@ -79,11 +90,7 @@ function endpoint_info(): Broker::EndpointInfo { local epi: Broker::EndpointInfo; - if ( Management::Controller::name != "" ) - epi$id = Management::Controller::name; - else - epi$id = fmt("controller-%s", gethostname()); - + epi$id = Management::Controller::get_name(); epi$network = network_info(); return epi; From d4d6f10299fac131114084126359425af98c7896 Mon Sep 17 00:00:00 2001 From: Christian Kreibich Date: Thu, 12 May 2022 13:05:46 -0700 Subject: [PATCH 02/15] Management framework: rename env var that labels agents/controllers Just a consistency tweak to avoid confusion with "cluster". --- scripts/policy/frameworks/management/agent/boot.zeek | 4 ++-- scripts/policy/frameworks/management/agent/main.zeek | 4 ++-- scripts/policy/frameworks/management/controller/boot.zeek | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/scripts/policy/frameworks/management/agent/boot.zeek b/scripts/policy/frameworks/management/agent/boot.zeek index 7b8bedd088..663614ecf0 100644 --- a/scripts/policy/frameworks/management/agent/boot.zeek +++ b/scripts/policy/frameworks/management/agent/boot.zeek @@ -28,8 +28,8 @@ event zeek_init() if ( Management::Agent::stderr_file_suffix != "" ) sn$stderr_file = epi$id + "." + Management::Agent::stderr_file_suffix; - # This helps Zeek run controller and agent with a minimal set of scripts. - sn$env["ZEEK_CLUSTER_MGMT_NODE"] = "AGENT"; + # This helps identify Management framework nodes reliably. + sn$env["ZEEK_MANAGEMENT_NODE"] = "AGENT"; local res = Supervisor::create(sn); diff --git a/scripts/policy/frameworks/management/agent/main.zeek b/scripts/policy/frameworks/management/agent/main.zeek index 4d977939a4..2a5b0cab7b 100644 --- a/scripts/policy/frameworks/management/agent/main.zeek +++ b/scripts/policy/frameworks/management/agent/main.zeek @@ -264,9 +264,9 @@ event SupervisorControl::status_response(reqid: string, result: Supervisor::Stat } else { - if ( "ZEEK_CLUSTER_MGMT_NODE" in sns$node$env ) + if ( "ZEEK_MANAGEMENT_NODE" in sns$node$env ) { - local role = sns$node$env["ZEEK_CLUSTER_MGMT_NODE"]; + local role = sns$node$env["ZEEK_MANAGEMENT_NODE"]; if ( role == "CONTROLLER" ) { cns$mgmt_role = Management::CONTROLLER; diff --git a/scripts/policy/frameworks/management/controller/boot.zeek b/scripts/policy/frameworks/management/controller/boot.zeek index a3c679c257..f592e6b7a1 100644 --- a/scripts/policy/frameworks/management/controller/boot.zeek +++ b/scripts/policy/frameworks/management/controller/boot.zeek @@ -23,8 +23,8 @@ event zeek_init() if ( Management::Controller::stderr_file != "" ) sn$stderr_file = Management::Controller::stderr_file; - # This helps Zeek run controller and agent with a minimal set of scripts. - sn$env["ZEEK_CLUSTER_MGMT_NODE"] = "CONTROLLER"; + # This helps identify Management framework nodes reliably. + sn$env["ZEEK_MANAGEMENT_NODE"] = "CONTROLLER"; local res = Supervisor::create(sn); From bd6c1683a2fe0677de43e4ae87f42912b9e969b7 Mon Sep 17 00:00:00 2001 From: Christian Kreibich Date: Thu, 12 May 2022 13:07:03 -0700 Subject: [PATCH 03/15] Management framework: comment and layouting tweaks, no functional change Also remove additional instances of the term "data cluster". --- scripts/policy/frameworks/management/agent/config.zeek | 4 ++-- scripts/policy/frameworks/management/agent/main.zeek | 8 +++++++- .../policy/frameworks/management/controller/config.zeek | 2 +- scripts/policy/frameworks/management/controller/main.zeek | 5 ++--- scripts/policy/frameworks/management/types.zeek | 2 +- 5 files changed, 13 insertions(+), 8 deletions(-) diff --git a/scripts/policy/frameworks/management/agent/config.zeek b/scripts/policy/frameworks/management/agent/config.zeek index 1a94fc8df1..51a36343b9 100644 --- a/scripts/policy/frameworks/management/agent/config.zeek +++ b/scripts/policy/frameworks/management/agent/config.zeek @@ -55,12 +55,12 @@ export { $address="0.0.0.0", $bound_port=0/unknown] &redef; ## An optional custom output directory for stdout/stderr. Agent and - ## controller currently only log locally, not via the data cluster's + ## controller currently only log locally, not via the Zeek cluster's ## logger node. This means that if both write to the same log file, ## output gets garbled. const directory = "" &redef; - ## The working directory for data cluster nodes created by this + ## The working directory for Zeek cluster nodes created by this ## agent. If you make this a relative path, note that the path is ## relative to the agent's working directory, since it creates data ## cluster nodes. diff --git a/scripts/policy/frameworks/management/agent/main.zeek b/scripts/policy/frameworks/management/agent/main.zeek index 2a5b0cab7b..a767c757fa 100644 --- a/scripts/policy/frameworks/management/agent/main.zeek +++ b/scripts/policy/frameworks/management/agent/main.zeek @@ -172,7 +172,13 @@ event Management::Agent::API::set_configuration_request(reqid: string, config: M g_cluster[node$name] = cep; } - # Apply the new configuration via the supervisor + # Apply the new configuration via the supervisor. + # + # XXX this should launch in the nodes in controlled order (loggers -> + # manager -> proxies -> workers), ideally checking that one stage is up + # before launching the next. This is tricky because that's not the point + # of the Supervisor's response event. Until we have this, bootstrap + # might be noisy, particular in the Broker log. for ( nodename in g_nodes ) { diff --git a/scripts/policy/frameworks/management/controller/config.zeek b/scripts/policy/frameworks/management/controller/config.zeek index 83c067e76c..f8ac86f0ec 100644 --- a/scripts/policy/frameworks/management/controller/config.zeek +++ b/scripts/policy/frameworks/management/controller/config.zeek @@ -44,7 +44,7 @@ export { const topic = "zeek/management/controller" &redef; ## An optional custom output directory for stdout/stderr. Agent and - ## controller currently only log locally, not via the data cluster's + ## controller currently only log locally, not via the Zeek cluster's ## logger node. This means that if both write to the same log file, ## output gets garbled. const directory = "" &redef; diff --git a/scripts/policy/frameworks/management/controller/main.zeek b/scripts/policy/frameworks/management/controller/main.zeek index 728ca36518..b2f5c44711 100644 --- a/scripts/policy/frameworks/management/controller/main.zeek +++ b/scripts/policy/frameworks/management/controller/main.zeek @@ -81,11 +81,10 @@ global null_config: function(): Management::Configuration; global is_null_config: function(config: Management::Configuration): bool; # Checks whether the given instance is one that we know with different -# communication settings: a a different peering direction, a different listening +# communication settings: a different peering direction, a different listening # port, etc. Used as a predicate to indicate when we need to drop the existing # one from our internal state. -global is_instance_connectivity_change: function - (inst: Management::Instance): bool; +global is_instance_connectivity_change: function(inst: Management::Instance): bool; # The set of agents the controller interacts with to manage to currently # configured cluster. This may be a subset of all the agents known to the diff --git a/scripts/policy/frameworks/management/types.zeek b/scripts/policy/frameworks/management/types.zeek index 6d89fbda1a..db3ac8da55 100644 --- a/scripts/policy/frameworks/management/types.zeek +++ b/scripts/policy/frameworks/management/types.zeek @@ -81,7 +81,7 @@ export { state: State; ## Role the node plays in cluster management. mgmt_role: Role &default=NONE; - ## Role the node plays in the data cluster. + ## Role the node plays in the Zeek cluster. cluster_role: Supervisor::ClusterRole &default=Supervisor::NONE; ## Process ID of the node. This is optional because the Supervisor may not have ## a PID when a node is still bootstrapping. From f8f7fd97e867def74a4869c28bc51442c2e068e7 Mon Sep 17 00:00:00 2001 From: Christian Kreibich Date: Thu, 12 May 2022 13:09:33 -0700 Subject: [PATCH 04/15] Management framework: prefix the management logs with "management-" These were still using "cluster-", a leftover from earlier days of the framework. --- scripts/policy/frameworks/management/log.zeek | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/policy/frameworks/management/log.zeek b/scripts/policy/frameworks/management/log.zeek index e8732df1bc..60c47ffdd2 100644 --- a/scripts/policy/frameworks/management/log.zeek +++ b/scripts/policy/frameworks/management/log.zeek @@ -136,7 +136,7 @@ event zeek_init() # Defining the stream outside of the stream creation call sidesteps # the coverage.find-bro-logs test, which tries to inventory all logs. # This log isn't yet ready for that level of scrutiny. - local stream = Log::Stream($columns=Info, $path=fmt("cluster-%s", node$name), + local stream = Log::Stream($columns=Info, $path=fmt("management-%s", node$name), $policy=log_policy); Log::create_stream(Management::Log::LOG, stream); From d40bb6e85f334843f53489afd71e761f6125eeeb Mon Sep 17 00:00:00 2001 From: Christian Kreibich Date: Thu, 12 May 2022 22:50:18 -0700 Subject: [PATCH 05/15] Management framework: simplify agent and controller stdout/stderr files Moving to a model in which every Zeek process runs out of its own working directory simplifies the handling of those files. --- scripts/policy/frameworks/management/agent/boot.zeek | 8 ++++---- .../policy/frameworks/management/agent/config.zeek | 12 +++++------- .../frameworks/management/controller/config.zeek | 12 ++++++------ 3 files changed, 15 insertions(+), 17 deletions(-) diff --git a/scripts/policy/frameworks/management/agent/boot.zeek b/scripts/policy/frameworks/management/agent/boot.zeek index 663614ecf0..5863b0517d 100644 --- a/scripts/policy/frameworks/management/agent/boot.zeek +++ b/scripts/policy/frameworks/management/agent/boot.zeek @@ -23,10 +23,10 @@ event zeek_init() if ( Management::Agent::directory != "" ) sn$directory = Management::Agent::directory; - if ( Management::Agent::stdout_file_suffix != "" ) - sn$stdout_file = epi$id + "." + Management::Agent::stdout_file_suffix; - if ( Management::Agent::stderr_file_suffix != "" ) - sn$stderr_file = epi$id + "." + Management::Agent::stderr_file_suffix; + if ( Management::Agent::stdout_file != "" ) + sn$stdout_file = Management::Agent::stdout_file; + if ( Management::Agent::stderr_file != "" ) + sn$stderr_file = Management::Agent::stderr_file; # This helps identify Management framework nodes reliably. sn$env["ZEEK_MANAGEMENT_NODE"] = "AGENT"; diff --git a/scripts/policy/frameworks/management/agent/config.zeek b/scripts/policy/frameworks/management/agent/config.zeek index 51a36343b9..6a6dda7c6c 100644 --- a/scripts/policy/frameworks/management/agent/config.zeek +++ b/scripts/policy/frameworks/management/agent/config.zeek @@ -14,18 +14,16 @@ export { ## Agent stdout log configuration. If the string is non-empty, Zeek will ## produce a free-form log (i.e., not one governed by Zeek's logging - ## framework) in Zeek's working directory. The final log's name is - ## ".", where the name is taken from :zeek:see:`Management::Agent::get_name`, - ## and the suffix is defined by the following variable. If left empty, - ## no such log results. + ## framework) in the agent's working directory. If left empty, no such + ## log results. ## ## Note that the agent also establishes a "proper" Zeek log via the ## :zeek:see:`Management::Log` module. - const stdout_file_suffix = "agent.stdout" &redef; + const stdout_file = "stdout" &redef; - ## Agent stderr log configuration. Like :zeek:see:`Management::Agent::stdout_file_suffix`, + ## Agent stderr log configuration. Like :zeek:see:`Management::Agent::stdout_file`, ## but for the stderr stream. - const stderr_file_suffix = "agent.stderr" &redef; + const stderr_file = "stderr" &redef; ## The network address the agent listens on. This only takes effect if ## the agent isn't configured to connect to the controller (see diff --git a/scripts/policy/frameworks/management/controller/config.zeek b/scripts/policy/frameworks/management/controller/config.zeek index f8ac86f0ec..f39d26ba15 100644 --- a/scripts/policy/frameworks/management/controller/config.zeek +++ b/scripts/policy/frameworks/management/controller/config.zeek @@ -12,18 +12,18 @@ export { ## "controller-". const name = getenv("ZEEK_CONTROLLER_NAME") &redef; - ## The controller's stdout log name. If the string is non-empty, Zeek will - ## produce a free-form log (i.e., not one governed by Zeek's logging - ## framework) in Zeek's working directory. If left empty, no such log - ## results. + ## The controller's stdout log name. If the string is non-empty, Zeek + ## will produce a free-form log (i.e., not one governed by Zeek's + ## logging framework) in the controller's working directory. If left + ## empty, no such log results. ## ## Note that the controller also establishes a "proper" Zeek log via the ## :zeek:see:`Management::Log` module. - const stdout_file = "controller.stdout" &redef; + const stdout_file = "stdout" &redef; ## The controller's stderr log name. Like :zeek:see:`Management::Controller::stdout_file`, ## but for the stderr stream. - const stderr_file = "controller.stderr" &redef; + const stderr_file = "stderr" &redef; ## The network address the controller listens on. By default this uses ## the value of the ZEEK_CONTROLLER_ADDR environment variable, but you From e78fdc39e442fe1d5f4b357e7630ba9878a04a0b Mon Sep 17 00:00:00 2001 From: Christian Kreibich Date: Fri, 13 May 2022 14:21:47 -0700 Subject: [PATCH 06/15] Management framework: distinguish supervisor/supervisee when loading agent/controller Load the agent/controller bootstrapping code only from the Supervisor, and the basic config only from a supervisee. When we're neither (which is likely a mistake), we do nothing. --- .../frameworks/management/agent/__load__.zeek | 14 +++++++++++++- .../frameworks/management/controller/__load__.zeek | 14 +++++++++++++- 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/scripts/policy/frameworks/management/agent/__load__.zeek b/scripts/policy/frameworks/management/agent/__load__.zeek index 57cefe0757..590325ed9a 100644 --- a/scripts/policy/frameworks/management/agent/__load__.zeek +++ b/scripts/policy/frameworks/management/agent/__load__.zeek @@ -1,4 +1,16 @@ ##! The entry point for the Management framework's cluster agent. It runs -##! bootstrap logic for launching the agent process via Zeek's Supervisor. +##! bootstrap logic for launching an agent process via Zeek's Supervisor. +# When the user sources this from other scripts, the intent may not be just to +# create an agent, but also access Management framework infrastructure, for +# example to reconfigure ports and other settings. So we always load that +# infrastructure, but initiate the agent launch only when this is actually the +# Supervisor process. + +@if ( Supervisor::is_supervised() ) +@load policy/frameworks/management/agent/config +@endif + +@if ( Supervisor::is_supervisor() ) @load ./boot +@endif diff --git a/scripts/policy/frameworks/management/controller/__load__.zeek b/scripts/policy/frameworks/management/controller/__load__.zeek index 47af9762c5..16534d86d4 100644 --- a/scripts/policy/frameworks/management/controller/__load__.zeek +++ b/scripts/policy/frameworks/management/controller/__load__.zeek @@ -1,4 +1,16 @@ ##! The entry point for the Management framework's cluster controller. It runs -##! bootstrap logic for launching the controller process via Zeek's Supervisor. +##! bootstrap logic for launching a controller process via Zeek's Supervisor. +# When the user sources this from other scripts, the intent may not be just to +# create a controller, but also access Management framework infrastructure, for +# example to reconfigure ports and other settings. So we always load that +# infrastructure, but initiate the controller launch only when this is actually +# the Supervisor process. + +@if ( Supervisor::is_supervised() ) +@load policy/frameworks/management/controller/config +@endif + +@if ( Supervisor::is_supervisor() ) @load ./boot +@endif From b96a4276eba9cbd38c8724df8049d4c6bb5645ef Mon Sep 17 00:00:00 2001 From: Christian Kreibich Date: Wed, 18 May 2022 18:47:59 -0700 Subject: [PATCH 07/15] Management framework: move role variable from logging into framework-wide config The role isn't just about logging, it can also act as a general indicator to key in on in role-specific code elsewhere, such as @if. --- .../policy/frameworks/management/agent/main.zeek | 2 +- scripts/policy/frameworks/management/config.zeek | 7 +++++++ .../frameworks/management/controller/main.zeek | 2 +- scripts/policy/frameworks/management/log.zeek | 14 +++++--------- .../policy/frameworks/management/node/main.zeek | 2 +- 5 files changed, 15 insertions(+), 12 deletions(-) diff --git a/scripts/policy/frameworks/management/agent/main.zeek b/scripts/policy/frameworks/management/agent/main.zeek index a767c757fa..117dc3f1c9 100644 --- a/scripts/policy/frameworks/management/agent/main.zeek +++ b/scripts/policy/frameworks/management/agent/main.zeek @@ -40,7 +40,7 @@ redef record Management::Request::Request += { }; # Tag our logs correctly -redef Management::Log::role = Management::AGENT; +redef Management::role = Management::AGENT; # The global configuration as passed to us by the controller global g_config: Management::Configuration; diff --git a/scripts/policy/frameworks/management/config.zeek b/scripts/policy/frameworks/management/config.zeek index 7b87655ae5..6f59bbc6eb 100644 --- a/scripts/policy/frameworks/management/config.zeek +++ b/scripts/policy/frameworks/management/config.zeek @@ -5,9 +5,16 @@ ##! anyway). For role-specific settings, see management/controller/config.zeek ##! and management/agent/config.zeek. +@load ./types + module Management; export { + ## The role of this process in cluster management. Use this to + ## differentiate code based on the type of node in which it ends up + ## running. + const role = Management::NONE &redef; + ## The fallback listen address if more specific adddresses, such as ## the controller's :zeek:see:`Management::Controller::listen_address` ## remains empty. Unless redefined, this uses Broker's own default diff --git a/scripts/policy/frameworks/management/controller/main.zeek b/scripts/policy/frameworks/management/controller/main.zeek index b2f5c44711..f9d3c2d0a3 100644 --- a/scripts/policy/frameworks/management/controller/main.zeek +++ b/scripts/policy/frameworks/management/controller/main.zeek @@ -71,7 +71,7 @@ redef record Management::Request::Request += { }; # Tag our logs correctly -redef Management::Log::role = Management::CONTROLLER; +redef Management::role = Management::CONTROLLER; global check_instances_ready: function(); global add_instance: function(inst: Management::Instance); diff --git a/scripts/policy/frameworks/management/log.zeek b/scripts/policy/frameworks/management/log.zeek index 60c47ffdd2..a6f8d37571 100644 --- a/scripts/policy/frameworks/management/log.zeek +++ b/scripts/policy/frameworks/management/log.zeek @@ -3,7 +3,7 @@ ##! supervisor. In this setting Zeek's logging framework operates locally, i.e., ##! this does not involve logger nodes. -@load ./types +@load ./config module Management::Log; @@ -64,10 +64,6 @@ export { ## message: the message to log. ## global error: function(message: string); - - ## The role of this process in cluster management. Agent and controller - ## both redefine this, and we use it during logging. - const role = Management::NONE &redef; } # Enum translations to strings. This avoids those enums being reported @@ -93,7 +89,7 @@ function debug(message: string) local node = Supervisor::node(); Log::write(LOG, [$ts=network_time(), $node=node$name, $level=l2s[DEBUG], - $role=r2s[role], $message=message]); + $role=r2s[Management::role], $message=message]); } function info(message: string) @@ -103,7 +99,7 @@ function info(message: string) local node = Supervisor::node(); Log::write(LOG, [$ts=network_time(), $node=node$name, $level=l2s[INFO], - $role=r2s[role], $message=message]); + $role=r2s[Management::role], $message=message]); } function warning(message: string) @@ -113,7 +109,7 @@ function warning(message: string) local node = Supervisor::node(); Log::write(LOG, [$ts=network_time(), $node=node$name, $level=l2s[WARNING], - $role=r2s[role], $message=message]); + $role=r2s[Management::role], $message=message]); } function error(message: string) @@ -123,7 +119,7 @@ function error(message: string) local node = Supervisor::node(); Log::write(LOG, [$ts=network_time(), $node=node$name, $level=l2s[ERROR], - $role=r2s[role], $message=message]); + $role=r2s[Management::role], $message=message]); } event zeek_init() diff --git a/scripts/policy/frameworks/management/node/main.zeek b/scripts/policy/frameworks/management/node/main.zeek index 52aa5f1e96..c8649b6054 100644 --- a/scripts/policy/frameworks/management/node/main.zeek +++ b/scripts/policy/frameworks/management/node/main.zeek @@ -12,7 +12,7 @@ module Management::Node; # Tag our logs correctly -redef Management::Log::role = Management::NODE; +redef Management::role = Management::NODE; ## The type of dispatch callbacks. These implement a particular dispatch action, ## using the provided string vector as arguments, filling results into the From da016b8a68e4858cf1bda0f77703a07b07b7e43b Mon Sep 17 00:00:00 2001 From: Christian Kreibich Date: Fri, 13 May 2022 15:51:58 -0700 Subject: [PATCH 08/15] Management framework: default to having agents check in with the (local) controller This allows single-machine settings to work out of the box when agent and cluster are loaded in Supervisor mode. --- .../frameworks/management/agent/config.zeek | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/scripts/policy/frameworks/management/agent/config.zeek b/scripts/policy/frameworks/management/agent/config.zeek index 6a6dda7c6c..eb9d7f315e 100644 --- a/scripts/policy/frameworks/management/agent/config.zeek +++ b/scripts/policy/frameworks/management/agent/config.zeek @@ -3,6 +3,10 @@ @load policy/frameworks/management/config @load policy/frameworks/management/types +# We source the controller configuration to obtain its network coordinates, so +# we can default to connecting to it. +@load policy/frameworks/management/controller/config + module Management::Agent; export { @@ -45,12 +49,15 @@ export { ## suffixes this with "/", based on :zeek:see:`Management::Agent::get_name`. const topic_prefix = "zeek/management/agent" &redef; - ## The network coordinates of the controller. When defined, the agent - ## peers with (and connects to) the controller; otherwise the controller - ## will peer (and connect to) the agent, listening as defined by - ## :zeek:see:`Management::Agent::listen_address` and :zeek:see:`Management::Agent::listen_port`. - const controller: Broker::NetworkInfo = [ - $address="0.0.0.0", $bound_port=0/unknown] &redef; + ## The network coordinates of the controller. By default, the agent + ## connects locally to the controller at its default port. Assigning + ## a :zeek:see:`Broker::NetworkInfo` record with IP address "0.0.0.0" + ## means the controller should instead connect to the agent. If you'd + ## like to use that mode, make sure to set + ## :zeek:see:`Management::Agent::listen_address` and + ## :zeek:see:`Management::Agent::listen_port` as needed. + const controller = Broker::NetworkInfo($address="127.0.0.1", + $bound_port=Management::Controller::network_info()$bound_port) &redef; ## An optional custom output directory for stdout/stderr. Agent and ## controller currently only log locally, not via the Zeek cluster's From e305d9c613569745f32c92b3bd0f982273323448 Mon Sep 17 00:00:00 2001 From: Christian Kreibich Date: Wed, 18 May 2022 18:21:21 -0700 Subject: [PATCH 09/15] Management framework: establish stdout/stderr files also for cluster nodes --- .../policy/frameworks/management/agent/boot.zeek | 2 ++ .../policy/frameworks/management/agent/main.zeek | 7 +++++++ .../frameworks/management/controller/boot.zeek | 2 ++ .../policy/frameworks/management/node/config.zeek | 13 +++++++++++++ 4 files changed, 24 insertions(+) diff --git a/scripts/policy/frameworks/management/agent/boot.zeek b/scripts/policy/frameworks/management/agent/boot.zeek index 5863b0517d..d906b21558 100644 --- a/scripts/policy/frameworks/management/agent/boot.zeek +++ b/scripts/policy/frameworks/management/agent/boot.zeek @@ -4,6 +4,8 @@ ##! ##! If the current process is not the Zeek supervisor, this does nothing. +@load base/utils/paths + @load ./config # The agent needs the supervisor to listen for node management requests. We diff --git a/scripts/policy/frameworks/management/agent/main.zeek b/scripts/policy/frameworks/management/agent/main.zeek index 117dc3f1c9..a623fa4e5b 100644 --- a/scripts/policy/frameworks/management/agent/main.zeek +++ b/scripts/policy/frameworks/management/agent/main.zeek @@ -4,6 +4,8 @@ ##! supervisor. @load base/frameworks/broker +@load base/utils/paths + @load policy/frameworks/management @load policy/frameworks/management/node/api @load policy/frameworks/management/node/config @@ -204,6 +206,11 @@ event Management::Agent::API::set_configuration_request(reqid: string, config: M # node. nc$scripts[|nc$scripts|] = "policy/frameworks/management/node"; + if ( Management::Node::stdout_file != "" ) + nc$stdout_file = Management::Node::stdout_file; + if ( Management::Node::stderr_file != "" ) + nc$stderr_file = Management::Node::stderr_file; + # XXX could use options to enable per-node overrides for # directory, stdout, stderr, others? diff --git a/scripts/policy/frameworks/management/controller/boot.zeek b/scripts/policy/frameworks/management/controller/boot.zeek index f592e6b7a1..aaafca8c96 100644 --- a/scripts/policy/frameworks/management/controller/boot.zeek +++ b/scripts/policy/frameworks/management/controller/boot.zeek @@ -5,6 +5,8 @@ ##! ##! If the current process is not the Zeek supervisor, this does nothing. +@load base/utils/paths + @load ./config event zeek_init() diff --git a/scripts/policy/frameworks/management/node/config.zeek b/scripts/policy/frameworks/management/node/config.zeek index d17fd663a1..e15631aa87 100644 --- a/scripts/policy/frameworks/management/node/config.zeek +++ b/scripts/policy/frameworks/management/node/config.zeek @@ -6,4 +6,17 @@ export { ## The nodes' Broker topic. Cluster nodes automatically subscribe ## to it, to receive request events from the Management framework. const node_topic = "zeek/management/node" &redef; + + ## Cluster node stdout log configuration. If the string is non-empty, + ## Zeek will produce a free-form log (i.e., not one governed by Zeek's + ## logging framework) in the node's working directory. If left empty, no + ## such log results. + ## + ## Note that cluster nodes also establish a "proper" management log via + ## the :zeek:see:`Management::Log` module. + const stdout_file = "stdout" &redef; + + ## Cluster node stderr log configuration. Like + ## :zeek:see:`Management::Node::stdout_file`, but for the stderr stream. + const stderr_file = "stderr" &redef; } From 7708cbe500872d76e0ef7ba20a678574f9d93775 Mon Sep 17 00:00:00 2001 From: Christian Kreibich Date: Thu, 12 May 2022 23:02:22 -0700 Subject: [PATCH 10/15] Management framework: add spool and state directory config settings This allows specifying spool and variable-state directories specifically for the management framework. They default to the corresponding installation-level folders. --- .../policy/frameworks/management/config.zeek | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/scripts/policy/frameworks/management/config.zeek b/scripts/policy/frameworks/management/config.zeek index 6f59bbc6eb..9ca80caf85 100644 --- a/scripts/policy/frameworks/management/config.zeek +++ b/scripts/policy/frameworks/management/config.zeek @@ -5,6 +5,8 @@ ##! anyway). For role-specific settings, see management/controller/config.zeek ##! and management/agent/config.zeek. +@load base/misc/installation + @load ./types module Management; @@ -24,4 +26,41 @@ export { ## The retry interval for Broker connnects. Defaults to a more ## aggressive value compared to Broker's 30s. const connect_retry = 1sec &redef; + + ## The toplevel directory in which the Management framework creates + ## spool state for any Zeek nodes, including the Zeek cluster, agents, + ## and the controller. Don't use this directly, use the + ## :zeek:see:`Management::get_spool_dir` function. + const spool_dir = getenv("ZEEK_MANAGEMENT_SPOOL_DIR") &redef; + + ## The toplevel directory for variable state, such as Broker data + ## stores. Don't use this directly, use the + ## :zeek:see:`Management::get_state_dir` function. + const state_dir = getenv("ZEEK_MANAGEMENT_STATE_DIR") &redef; + + ## Returns the effective spool directory for the management framework. + ## That's :zeek:see:`Management::spool_dir` when set, otherwise the + ## installation's spool directory. + global get_spool_dir: function(): string; + + ## Returns the effective state directory for the management framework. + ## That's :zeek:see:`Management::state_dir` when set, otherwise the + ## installation's state directory. + global get_state_dir: function(): string; } + +function get_spool_dir(): string + { + if ( spool_dir != "" ) + return spool_dir; + + return Installation::spool_dir; + } + +function get_state_dir(): string + { + if ( state_dir != "" ) + return state_dir; + + return Installation::state_dir; + } From d1cd409e59491a77a1a0a63ee7ad825fa36986bc Mon Sep 17 00:00:00 2001 From: Christian Kreibich Date: Fri, 20 May 2022 16:42:04 -0700 Subject: [PATCH 11/15] Management framework: set defaults for log rotation and persistent state This adds management/persistence.zeek to establish common configuration for log rotation and persistent variable state. Log-writing Zeek processes initially write locally in their working directory, and rotate into subdirectory "log-queue" of the spool. Since agent and controller have no logger, persistence.zeek puts in place compatible configurations for them. Storage folders for Broker-backed tables and clusterized stores default to subdirectories of the new Zeek-level state folder. When setting the ZEEK_MANAGEMENT_TESTING environment variable, persistent state is kept in the local directory, and log rotation remains disabled. This also tweaks @loads a bit in favor of simply loading frameworks/management, which is easier to keep track of. --- .../frameworks/management/__load__.zeek | 1 + .../frameworks/management/agent/config.zeek | 3 +- .../management/controller/config.zeek | 3 +- .../frameworks/management/node/main.zeek | 13 ++++- .../frameworks/management/persistence.zeek | 47 +++++++++++++++++++ scripts/test-all-policy.zeek | 1 + .../Baseline/coverage.bare-mode-errors/errors | 4 +- 7 files changed, 65 insertions(+), 7 deletions(-) create mode 100644 scripts/policy/frameworks/management/persistence.zeek diff --git a/scripts/policy/frameworks/management/__load__.zeek b/scripts/policy/frameworks/management/__load__.zeek index 96192ea366..8dc27de8c4 100644 --- a/scripts/policy/frameworks/management/__load__.zeek +++ b/scripts/policy/frameworks/management/__load__.zeek @@ -6,6 +6,7 @@ @load ./config @load ./log +@load ./persistence @load ./request @load ./types @load ./util diff --git a/scripts/policy/frameworks/management/agent/config.zeek b/scripts/policy/frameworks/management/agent/config.zeek index eb9d7f315e..53688dcbad 100644 --- a/scripts/policy/frameworks/management/agent/config.zeek +++ b/scripts/policy/frameworks/management/agent/config.zeek @@ -1,7 +1,6 @@ ##! Configuration settings for a cluster agent. -@load policy/frameworks/management/config -@load policy/frameworks/management/types +@load policy/frameworks/management # We source the controller configuration to obtain its network coordinates, so # we can default to connecting to it. diff --git a/scripts/policy/frameworks/management/controller/config.zeek b/scripts/policy/frameworks/management/controller/config.zeek index f39d26ba15..2728d01ec3 100644 --- a/scripts/policy/frameworks/management/controller/config.zeek +++ b/scripts/policy/frameworks/management/controller/config.zeek @@ -1,7 +1,6 @@ ##! Configuration settings for the cluster controller. -@load policy/frameworks/management/config -@load policy/frameworks/management/types +@load policy/frameworks/management module Management::Controller; diff --git a/scripts/policy/frameworks/management/node/main.zeek b/scripts/policy/frameworks/management/node/main.zeek index c8649b6054..df65d754b9 100644 --- a/scripts/policy/frameworks/management/node/main.zeek +++ b/scripts/policy/frameworks/management/node/main.zeek @@ -1,10 +1,14 @@ ##! This module provides Management framework functionality present in every ##! cluster node, to allowing Management agents to interact with the nodes. +@load base/frameworks/broker/store @load base/frameworks/cluster +@load base/frameworks/logging/writers/ascii +@load base/misc/installation +@load base/utils/paths +@load policy/frameworks/management @load policy/frameworks/management/agent/config -@load policy/frameworks/management/log @load ./api @load ./config @@ -103,6 +107,13 @@ event Broker::peer_added(peer: Broker::EndpointInfo, msg: string) event zeek_init() { + if ( Broker::table_store_db_directory != "" && ! mkdir(Broker::table_store_db_directory) ) + Management::Log::error(fmt("could not create Broker data store directory '%s'", + Broker::table_store_db_directory)); + if ( Cluster::default_store_dir != "" && ! mkdir(Cluster::default_store_dir) ) + Management::Log::error(fmt("could not create Cluster store directory '%s'", + Cluster::default_store_dir)); + local epi = Management::Agent::endpoint_info(); Broker::peer(epi$network$address, epi$network$bound_port, Management::connect_retry); diff --git a/scripts/policy/frameworks/management/persistence.zeek b/scripts/policy/frameworks/management/persistence.zeek new file mode 100644 index 0000000000..bb5731ced0 --- /dev/null +++ b/scripts/policy/frameworks/management/persistence.zeek @@ -0,0 +1,47 @@ +##! Common adjustments for any kind of Zeek node when we run the Management +##! framework. + +@load base/misc/installation +@load base/utils/paths + +@load ./config + +# For testing, keep persistent state local to the current working directory, +# and disable log rotation. +@if ( getenv("ZEEK_MANAGEMENT_TESTING") != "" ) + +redef Management::spool_dir = "."; +redef Management::state_dir = "."; +redef Log::default_rotation_interval = 0 secs; + +@else + +# For any kind of Zeek process we steer rotated logs awaiting archival into a +# queue directory in the spool. The name "log-queue" matches logger nodes' default +# config with the Supervisor; see base/frameworks/cluster/nodes/logger.zeek. +redef Log::default_rotation_dir = build_path(Management::get_spool_dir(), "log-queue"); + +@if ( getenv("ZEEK_MANAGEMENT_NODE") != "" ) + +# Management agents and controllers don't have loggers, nor their configuration, +# so establish a similar one here: + +function archiver_rotation_format_func(ri: Log::RotationFmtInfo): Log::RotationPath + { + local open_str = strftime(Log::default_rotation_date_format, ri$open); + local close_str = strftime(Log::default_rotation_date_format, ri$close); + local base = fmt("%s__%s__%s__", ri$path, open_str, close_str); + local rval = Log::RotationPath($file_basename=base); + return rval; + } + +redef Log::default_rotation_interval = 1 hrs; +redef Log::enable_local_logging = T; +redef Log::enable_remote_logging = T; +redef Log::rotation_format_func = archiver_rotation_format_func; + +redef LogAscii::enable_leftover_log_rotation = T; + +@endif # ZEEK_MANAGEMENT_NODE + +@endif # ZEEK_MANAGEMENT_TESTING diff --git a/scripts/test-all-policy.zeek b/scripts/test-all-policy.zeek index 0f2de90609..2dc50cd8af 100644 --- a/scripts/test-all-policy.zeek +++ b/scripts/test-all-policy.zeek @@ -24,6 +24,7 @@ @load frameworks/management/__load__.zeek @load frameworks/management/config.zeek @load frameworks/management/log.zeek +@load frameworks/management/persistence.zeek # @load frameworks/management/node/__load__.zeek @load frameworks/management/node/api.zeek @load frameworks/management/node/config.zeek diff --git a/testing/btest/Baseline/coverage.bare-mode-errors/errors b/testing/btest/Baseline/coverage.bare-mode-errors/errors index bc9bd28f83..6e8c09e1bd 100644 --- a/testing/btest/Baseline/coverage.bare-mode-errors/errors +++ b/testing/btest/Baseline/coverage.bare-mode-errors/errors @@ -2,8 +2,8 @@ ### NOTE: This file has been sorted with diff-sort. warning in <...>/extract-certs-pem.zeek, line 1: deprecated script loaded from <...>/__load__.zeek:15 "Remove in v5.1. Use log-certs-base64.zeek instead." warning in <...>/extract-certs-pem.zeek, line 1: deprecated script loaded from command line arguments "Remove in v5.1. Use log-certs-base64.zeek instead." -warning in <...>/log-ocsp.zeek, line 1: deprecated script loaded from <...>/test-all-policy.zeek:65 ("Remove in v5.1. OCSP logging is now enabled by default") -warning in <...>/log-ocsp.zeek, line 1: deprecated script loaded from <...>/test-all-policy.zeek:65 ("Remove in v5.1. OCSP logging is now enabled by default") +warning in <...>/log-ocsp.zeek, line 1: deprecated script loaded from <...>/test-all-policy.zeek:66 ("Remove in v5.1. OCSP logging is now enabled by default") +warning in <...>/log-ocsp.zeek, line 1: deprecated script loaded from <...>/test-all-policy.zeek:66 ("Remove in v5.1. OCSP logging is now enabled by default") warning in <...>/log-ocsp.zeek, line 1: deprecated script loaded from command line arguments ("Remove in v5.1. OCSP logging is now enabled by default") warning in <...>/notary.zeek, line 1: deprecated script loaded from <...>/__load__.zeek:5 ("Remove in v5.1. Please switch to other more modern approaches like SCT validation (validate-sct.zeek).") warning in <...>/notary.zeek, line 1: deprecated script loaded from command line arguments ("Remove in v5.1. Please switch to other more modern approaches like SCT validation (validate-sct.zeek).") From 93ea03a081fc09dc24601e993aef3ee39a9e9494 Mon Sep 17 00:00:00 2001 From: Christian Kreibich Date: Fri, 20 May 2022 16:37:26 -0700 Subject: [PATCH 12/15] Management framework: place each Zeek process in its own working dir This establishes a directory "nodes" in Management::state_dir and places each Zeek process into a subdirectory in it, named after the Zeek process. For example, node "worker-01" runs with cwd /nodes/worker-01/. Explicitly configured directories can override the naming logic, and also ignore the state directory if they're absolute paths. One exception remains: the Supervisor itself -- we'd have to use LogAscii::logdir to automatically place it too in its own directory, but that feature currently does not interoperate with log rotation. --- .../policy/frameworks/management/agent/boot.zeek | 16 +++++++++++++++- .../frameworks/management/agent/config.zeek | 15 +++++---------- .../policy/frameworks/management/agent/main.zeek | 13 +++++++++++-- .../frameworks/management/controller/boot.zeek | 16 +++++++++++++++- 4 files changed, 46 insertions(+), 14 deletions(-) diff --git a/scripts/policy/frameworks/management/agent/boot.zeek b/scripts/policy/frameworks/management/agent/boot.zeek index d906b21558..ead12665f2 100644 --- a/scripts/policy/frameworks/management/agent/boot.zeek +++ b/scripts/policy/frameworks/management/agent/boot.zeek @@ -23,8 +23,22 @@ event zeek_init() local sn = Supervisor::NodeConfig($name=epi$id, $bare_mode=T, $scripts=vector("policy/frameworks/management/agent/main.zeek")); + # Establish the agent's working directory. If one is configured + # explicitly, use as-is if absolute. Otherwise, append it to the state + # path. Without an explicit directory, fall back to the agent name. + local statedir = build_path(Management::get_state_dir(), "nodes"); + + if ( ! mkdir(statedir) ) + print(fmt("warning: could not create state dir '%s'", statedir)); + if ( Management::Agent::directory != "" ) - sn$directory = Management::Agent::directory; + sn$directory = build_path(statedir, Management::Agent::directory); + else + sn$directory = build_path(statedir, Management::Agent::get_name()); + + if ( ! mkdir(sn$directory) ) + print(fmt("warning: could not create agent state dir '%s'", sn$directory)); + if ( Management::Agent::stdout_file != "" ) sn$stdout_file = Management::Agent::stdout_file; if ( Management::Agent::stderr_file != "" ) diff --git a/scripts/policy/frameworks/management/agent/config.zeek b/scripts/policy/frameworks/management/agent/config.zeek index 53688dcbad..4e1d23774e 100644 --- a/scripts/policy/frameworks/management/agent/config.zeek +++ b/scripts/policy/frameworks/management/agent/config.zeek @@ -58,18 +58,13 @@ export { const controller = Broker::NetworkInfo($address="127.0.0.1", $bound_port=Management::Controller::network_info()$bound_port) &redef; - ## An optional custom output directory for stdout/stderr. Agent and - ## controller currently only log locally, not via the Zeek cluster's - ## logger node. This means that if both write to the same log file, - ## output gets garbled. + ## An optional working directory for the agent. Agent and controller + ## currently only log locally, not via the Zeek cluster's logger + ## node. This means that if multiple agents and/or controllers work from + ## the same directory, output may get garbled. When not set, defaults to + ## a directory named after the agent (as per its get_name() result). const directory = "" &redef; - ## The working directory for Zeek cluster nodes created by this - ## agent. If you make this a relative path, note that the path is - ## relative to the agent's working directory, since it creates data - ## cluster nodes. - const cluster_directory = "" &redef; - ## Returns the effective name of this agent. global get_name: function(): string; diff --git a/scripts/policy/frameworks/management/agent/main.zeek b/scripts/policy/frameworks/management/agent/main.zeek index a623fa4e5b..574d2a7674 100644 --- a/scripts/policy/frameworks/management/agent/main.zeek +++ b/scripts/policy/frameworks/management/agent/main.zeek @@ -189,8 +189,17 @@ event Management::Agent::API::set_configuration_request(reqid: string, config: M nc = Supervisor::NodeConfig($name=nodename); - if ( Management::Agent::cluster_directory != "" ) - nc$directory = Management::Agent::cluster_directory; + local statedir = build_path(Management::get_state_dir(), "nodes"); + + if ( ! mkdir(statedir) ) + Management::Log::warning(fmt("could not create state dir '%s'", statedir)); + + statedir = build_path(statedir, nodename); + + if ( ! mkdir(statedir) ) + Management::Log::warning(fmt("could not create node state dir '%s'", statedir)); + + nc$directory = statedir; if ( node?$interface ) nc$interface = node$interface; diff --git a/scripts/policy/frameworks/management/controller/boot.zeek b/scripts/policy/frameworks/management/controller/boot.zeek index aaafca8c96..6470385d94 100644 --- a/scripts/policy/frameworks/management/controller/boot.zeek +++ b/scripts/policy/frameworks/management/controller/boot.zeek @@ -18,8 +18,22 @@ event zeek_init() local sn = Supervisor::NodeConfig($name=epi$id, $bare_mode=T, $scripts=vector("policy/frameworks/management/controller/main.zeek")); + # Establish the controller's working directory. If one is configured + # explicitly, use as-is if absolute. Otherwise, append it to the state + # path. Without an explicit directory, fall back to the agent name. + local statedir = build_path(Management::get_state_dir(), "nodes"); + + if ( ! mkdir(statedir) ) + print(fmt("warning: could not create state dir '%s'", statedir)); + if ( Management::Controller::directory != "" ) - sn$directory = Management::Controller::directory; + sn$directory = build_path(statedir, Management::Controller::directory); + else + sn$directory = build_path(statedir, Management::Controller::get_name()); + + if ( ! mkdir(sn$directory) ) + print(fmt("warning: could not create controller state dir '%s'", sn$directory)); + if ( Management::Controller::stdout_file != "" ) sn$stdout_file = Management::Controller::stdout_file; if ( Management::Controller::stderr_file != "" ) From 328e663060e408828f30a4af2829ccec6e71d824 Mon Sep 17 00:00:00 2001 From: Christian Kreibich Date: Fri, 13 May 2022 15:50:52 -0700 Subject: [PATCH 13/15] Management framework: update agent-checkin test to reflect recent changes This keeps logs produced locally and without rotation, and adopts the simpler stdout/sterr file naming. --- ...ntroller.stdout => zeek.nodes.controller.stdout} | 0 .../management/controller/agent-checkin.zeek | 13 ++++++------- 2 files changed, 6 insertions(+), 7 deletions(-) rename testing/btest/Baseline/scripts.policy.frameworks.management.controller.agent-checkin/{zeek.controller.stdout => zeek.nodes.controller.stdout} (100%) diff --git a/testing/btest/Baseline/scripts.policy.frameworks.management.controller.agent-checkin/zeek.controller.stdout b/testing/btest/Baseline/scripts.policy.frameworks.management.controller.agent-checkin/zeek.nodes.controller.stdout similarity index 100% rename from testing/btest/Baseline/scripts.policy.frameworks.management.controller.agent-checkin/zeek.controller.stdout rename to testing/btest/Baseline/scripts.policy.frameworks.management.controller.agent-checkin/zeek.nodes.controller.stdout diff --git a/testing/btest/scripts/policy/frameworks/management/controller/agent-checkin.zeek b/testing/btest/scripts/policy/frameworks/management/controller/agent-checkin.zeek index ecdc3b17ed..3d248848c7 100644 --- a/testing/btest/scripts/policy/frameworks/management/controller/agent-checkin.zeek +++ b/testing/btest/scripts/policy/frameworks/management/controller/agent-checkin.zeek @@ -1,16 +1,15 @@ # This test verifies basic agent-controller communication in the Management # framework. We launch agent and controller via the supervisor, add an extra # handler for the notify_agent_hello event that travels agent -> controller, and -# verify its print output in the controller's stdout log. +# verify that it prints receipt of the event to stdout. -# The following env vars is known to the controller framework +# The following environment variables are known to the controller framework: # @TEST-PORT: ZEEK_CONTROLLER_PORT # @TEST-PORT: BROKER_PORT -# A bit of a detour to get the port number into the agent configuration -# @TEST-EXEC: btest-bg-run zeek zeek -j %INPUT +# @TEST-EXEC: ZEEK_MANAGEMENT_TESTING=1 btest-bg-run zeek zeek -j %INPUT # @TEST-EXEC: btest-bg-wait 10 -# @TEST-EXEC: btest-diff zeek/controller.stdout +# @TEST-EXEC: btest-diff zeek/nodes/controller/stdout @load policy/frameworks/management/agent @load policy/frameworks/management/controller @@ -34,7 +33,7 @@ event zeek_init() # We're using the controller to shut everything down once the # notify_agent_hello event has arrived. The controller doesn't normally # talk to the supervisor, so connect to it. - if ( Supervisor::node()$name == "controller" ) + if ( Management::role == Management::CONTROLLER ) { Broker::peer(getenv("ZEEK_DEFAULT_LISTEN_ADDRESS"), Broker::default_port, Broker::default_listen_retry); Broker::auto_publish(SupervisorControl::topic_prefix, SupervisorControl::stop_request); @@ -43,7 +42,7 @@ event zeek_init() event Management::Agent::API::notify_agent_hello(instance: string, host: addr, api_version: count) { - if ( Supervisor::node()$name == "controller" ) + if ( Management::role == Management::CONTROLLER ) { # On rare occasion it can happen that we log this twice, which'll need # investigating. For now we ensure we only do so once. From aeb29413a2bdd203295fb8497ce0c8fdd2ca948c Mon Sep 17 00:00:00 2001 From: Christian Kreibich Date: Sun, 22 May 2022 17:03:14 -0700 Subject: [PATCH 14/15] Management framework: bump external cluster testsuite --- testing/external/commit-hash.zeek-testing-cluster | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testing/external/commit-hash.zeek-testing-cluster b/testing/external/commit-hash.zeek-testing-cluster index dbf1ad03e2..c0bc40c1c9 100644 --- a/testing/external/commit-hash.zeek-testing-cluster +++ b/testing/external/commit-hash.zeek-testing-cluster @@ -1 +1 @@ -fa9e808baedfeb23b4125f390cb3021c535a7d2b +b5fd49e31c51838f412b736ad6b697eb1f06abb4 From 1165193df852b6ff0ab290cc51e6e5585851f2f8 Mon Sep 17 00:00:00 2001 From: Christian Kreibich Date: Sun, 22 May 2022 17:03:43 -0700 Subject: [PATCH 15/15] Management framework: bump zeek-client to pull in instance serialization fixes --- auxil/zeek-client | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/auxil/zeek-client b/auxil/zeek-client index c43dbef420..6a3d1b5516 160000 --- a/auxil/zeek-client +++ b/auxil/zeek-client @@ -1 +1 @@ -Subproject commit c43dbef4204d5e9f1b682f1dea27dbec01c18d70 +Subproject commit 6a3d1b5516e5c9343072466e3c627aa13324f2d0