diff --git a/CHANGES b/CHANGES index 869658bb60..1347d711f9 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,25 @@ +5.0.0-dev.505 | 2022-05-26 16:08:42 -0700 + + * Management framework updates (Christian Kreibich, Corelight) + + - bump zeek-client to pull in instance serialization fixes + - bump external cluster testsuite + - update agent-checkin test to reflect recent changes + - place each Zeek process in its own working dir + - set defaults for log rotation and persistent state + - add spool and state directory config settings + - establish stdout/stderr files also for cluster nodes + - default to having agents check in with the (local) controller + - move role variable from logging into framework-wide config + - distinguish supervisor/supervisee when loading agent/controller + - simplify agent and controller stdout/stderr files + - prefix the management logs with "management-" + - comment and layouting tweaks, no functional change + - rename env var that labels agents/controllers + - increase robustness of agent/controller naming + + * Add some missing NEWS entries (Tim Wojtulewicz, Corelight) + 5.0.0-dev.488 | 2022-05-26 08:23:42 -0700 * GH-2054: Allow nulls as separators for join_string_vec (Tim Wojtulewicz, Corelight) diff --git a/VERSION b/VERSION index 19f3585480..f9a6f2c8c0 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -5.0.0-dev.488 +5.0.0-dev.505 diff --git a/auxil/zeek-client b/auxil/zeek-client index c43dbef420..6a3d1b5516 160000 --- a/auxil/zeek-client +++ b/auxil/zeek-client @@ -1 +1 @@ -Subproject commit c43dbef4204d5e9f1b682f1dea27dbec01c18d70 +Subproject commit 6a3d1b5516e5c9343072466e3c627aa13324f2d0 diff --git a/scripts/policy/frameworks/management/__load__.zeek b/scripts/policy/frameworks/management/__load__.zeek index 96192ea366..8dc27de8c4 100644 --- a/scripts/policy/frameworks/management/__load__.zeek +++ b/scripts/policy/frameworks/management/__load__.zeek @@ -6,6 +6,7 @@ @load ./config @load ./log +@load ./persistence @load ./request @load ./types @load ./util diff --git a/scripts/policy/frameworks/management/agent/__load__.zeek b/scripts/policy/frameworks/management/agent/__load__.zeek index 57cefe0757..590325ed9a 100644 --- a/scripts/policy/frameworks/management/agent/__load__.zeek +++ b/scripts/policy/frameworks/management/agent/__load__.zeek @@ -1,4 +1,16 @@ ##! The entry point for the Management framework's cluster agent. It runs -##! bootstrap logic for launching the agent process via Zeek's Supervisor. +##! bootstrap logic for launching an agent process via Zeek's Supervisor. +# When the user sources this from other scripts, the intent may not be just to +# create an agent, but also access Management framework infrastructure, for +# example to reconfigure ports and other settings. So we always load that +# infrastructure, but initiate the agent launch only when this is actually the +# Supervisor process. + +@if ( Supervisor::is_supervised() ) +@load policy/frameworks/management/agent/config +@endif + +@if ( Supervisor::is_supervisor() ) @load ./boot +@endif diff --git a/scripts/policy/frameworks/management/agent/api.zeek b/scripts/policy/frameworks/management/agent/api.zeek index 8ba47ee67d..f8dc1239a7 100644 --- a/scripts/policy/frameworks/management/agent/api.zeek +++ b/scripts/policy/frameworks/management/agent/api.zeek @@ -79,6 +79,7 @@ export { ## nodes: a set of cluster node names (e.g. "worker-01") to retrieve ## the values from. An empty set, supplied by default, means ## retrieval from all nodes managed by the agent. + ## global node_dispatch_request: event(reqid: string, action: vector of string, nodes: set[string] &default=set()); @@ -93,6 +94,7 @@ export { ## agent. Upon success, each :zeek:see:`Management::Result` record's ## data member contains the dispatches' response in a data type ## appropriate for the respective dispatch. + ## global node_dispatch_response: event(reqid: string, result: Management::ResultVec); @@ -145,7 +147,8 @@ export { ## communicate with. It is a controller-level equivalent of ## `:zeek:see:`Broker::peer_added`. ## - ## instance: an instance name, really the agent's name as per :zeek:see:`Management::Agent::name`. + ## instance: an instance name, really the agent's name as per + ## :zeek:see:`Management::Agent::get_name`. ## ## host: the IP address of the agent. (This may change in the future.) ## @@ -168,4 +171,4 @@ export { # Report informational message. global notify_log: event(instance: string, msg: string, node: string &default=""); - } +} diff --git a/scripts/policy/frameworks/management/agent/boot.zeek b/scripts/policy/frameworks/management/agent/boot.zeek index 7b8bedd088..ead12665f2 100644 --- a/scripts/policy/frameworks/management/agent/boot.zeek +++ b/scripts/policy/frameworks/management/agent/boot.zeek @@ -4,6 +4,8 @@ ##! ##! If the current process is not the Zeek supervisor, this does nothing. +@load base/utils/paths + @load ./config # The agent needs the supervisor to listen for node management requests. We @@ -21,15 +23,29 @@ event zeek_init() local sn = Supervisor::NodeConfig($name=epi$id, $bare_mode=T, $scripts=vector("policy/frameworks/management/agent/main.zeek")); - if ( Management::Agent::directory != "" ) - sn$directory = Management::Agent::directory; - if ( Management::Agent::stdout_file_suffix != "" ) - sn$stdout_file = epi$id + "." + Management::Agent::stdout_file_suffix; - if ( Management::Agent::stderr_file_suffix != "" ) - sn$stderr_file = epi$id + "." + Management::Agent::stderr_file_suffix; + # Establish the agent's working directory. If one is configured + # explicitly, use as-is if absolute. Otherwise, append it to the state + # path. Without an explicit directory, fall back to the agent name. + local statedir = build_path(Management::get_state_dir(), "nodes"); - # This helps Zeek run controller and agent with a minimal set of scripts. - sn$env["ZEEK_CLUSTER_MGMT_NODE"] = "AGENT"; + if ( ! mkdir(statedir) ) + print(fmt("warning: could not create state dir '%s'", statedir)); + + if ( Management::Agent::directory != "" ) + sn$directory = build_path(statedir, Management::Agent::directory); + else + sn$directory = build_path(statedir, Management::Agent::get_name()); + + if ( ! mkdir(sn$directory) ) + print(fmt("warning: could not create agent state dir '%s'", sn$directory)); + + if ( Management::Agent::stdout_file != "" ) + sn$stdout_file = Management::Agent::stdout_file; + if ( Management::Agent::stderr_file != "" ) + sn$stderr_file = Management::Agent::stderr_file; + + # This helps identify Management framework nodes reliably. + sn$env["ZEEK_MANAGEMENT_NODE"] = "AGENT"; local res = Supervisor::create(sn); diff --git a/scripts/policy/frameworks/management/agent/config.zeek b/scripts/policy/frameworks/management/agent/config.zeek index 29567b8f5e..4e1d23774e 100644 --- a/scripts/policy/frameworks/management/agent/config.zeek +++ b/scripts/policy/frameworks/management/agent/config.zeek @@ -1,7 +1,10 @@ ##! Configuration settings for a cluster agent. -@load policy/frameworks/management/config -@load policy/frameworks/management/types +@load policy/frameworks/management + +# We source the controller configuration to obtain its network coordinates, so +# we can default to connecting to it. +@load policy/frameworks/management/controller/config module Management::Agent; @@ -14,18 +17,16 @@ export { ## Agent stdout log configuration. If the string is non-empty, Zeek will ## produce a free-form log (i.e., not one governed by Zeek's logging - ## framework) in Zeek's working directory. The final log's name is - ## ".", where the name is taken from :zeek:see:`Management::Agent::name`, - ## and the suffix is defined by the following variable. If left empty, - ## no such log results. + ## framework) in the agent's working directory. If left empty, no such + ## log results. ## ## Note that the agent also establishes a "proper" Zeek log via the ## :zeek:see:`Management::Log` module. - const stdout_file_suffix = "agent.stdout" &redef; + const stdout_file = "stdout" &redef; - ## Agent stderr log configuration. Like :zeek:see:`Management::Agent::stdout_file_suffix`, + ## Agent stderr log configuration. Like :zeek:see:`Management::Agent::stdout_file`, ## but for the stderr stream. - const stderr_file_suffix = "agent.stderr" &redef; + const stderr_file = "stderr" &redef; ## The network address the agent listens on. This only takes effect if ## the agent isn't configured to connect to the controller (see @@ -44,27 +45,28 @@ export { const default_port = 2151/tcp &redef; ## The agent's Broker topic prefix. For its own communication, the agent - ## suffixes this with "/", based on :zeek:see:`Management::Agent::name`. + ## suffixes this with "/", based on :zeek:see:`Management::Agent::get_name`. const topic_prefix = "zeek/management/agent" &redef; - ## The network coordinates of the controller. When defined, the agent - ## peers with (and connects to) the controller; otherwise the controller - ## will peer (and connect to) the agent, listening as defined by - ## :zeek:see:`Management::Agent::listen_address` and :zeek:see:`Management::Agent::listen_port`. - const controller: Broker::NetworkInfo = [ - $address="0.0.0.0", $bound_port=0/unknown] &redef; + ## The network coordinates of the controller. By default, the agent + ## connects locally to the controller at its default port. Assigning + ## a :zeek:see:`Broker::NetworkInfo` record with IP address "0.0.0.0" + ## means the controller should instead connect to the agent. If you'd + ## like to use that mode, make sure to set + ## :zeek:see:`Management::Agent::listen_address` and + ## :zeek:see:`Management::Agent::listen_port` as needed. + const controller = Broker::NetworkInfo($address="127.0.0.1", + $bound_port=Management::Controller::network_info()$bound_port) &redef; - ## An optional custom output directory for stdout/stderr. Agent and - ## controller currently only log locally, not via the data cluster's - ## logger node. This means that if both write to the same log file, - ## output gets garbled. + ## An optional working directory for the agent. Agent and controller + ## currently only log locally, not via the Zeek cluster's logger + ## node. This means that if multiple agents and/or controllers work from + ## the same directory, output may get garbled. When not set, defaults to + ## a directory named after the agent (as per its get_name() result). const directory = "" &redef; - ## The working directory for data cluster nodes created by this - ## agent. If you make this a relative path, note that the path is - ## relative to the agent's working directory, since it creates data - ## cluster nodes. - const cluster_directory = "" &redef; + ## Returns the effective name of this agent. + global get_name: function(): string; ## Returns a :zeek:see:`Management::Instance` describing this ## instance (its agent name plus listening address/port, as applicable). @@ -76,6 +78,14 @@ export { global endpoint_info: function(): Broker::EndpointInfo; } +function get_name(): string + { + if ( name != "" ) + return name; + + return fmt("agent-%s", gethostname()); + } + function instance(): Management::Instance { local epi = endpoint_info(); @@ -89,10 +99,7 @@ function endpoint_info(): Broker::EndpointInfo local epi: Broker::EndpointInfo; local network: Broker::NetworkInfo; - if ( Management::Agent::name != "" ) - epi$id = Management::Agent::name; - else - epi$id = fmt("agent-%s", gethostname()); + epi$id = get_name(); if ( Management::Agent::listen_address != "" ) network$address = Management::Agent::listen_address; diff --git a/scripts/policy/frameworks/management/agent/main.zeek b/scripts/policy/frameworks/management/agent/main.zeek index 45993b29c4..574d2a7674 100644 --- a/scripts/policy/frameworks/management/agent/main.zeek +++ b/scripts/policy/frameworks/management/agent/main.zeek @@ -4,6 +4,8 @@ ##! supervisor. @load base/frameworks/broker +@load base/utils/paths + @load policy/frameworks/management @load policy/frameworks/management/node/api @load policy/frameworks/management/node/config @@ -40,7 +42,7 @@ redef record Management::Request::Request += { }; # Tag our logs correctly -redef Management::Log::role = Management::AGENT; +redef Management::role = Management::AGENT; # The global configuration as passed to us by the controller global g_config: Management::Configuration; @@ -77,7 +79,7 @@ event SupervisorControl::create_response(reqid: string, result: string) Management::Log::error(msg); Broker::publish(agent_topic(), Management::Agent::API::notify_error, - Management::Agent::name, msg, name); + Management::Agent::get_name(), msg, name); } Management::Request::finish(reqid); @@ -97,7 +99,7 @@ event SupervisorControl::destroy_response(reqid: string, result: bool) Management::Log::error(msg); Broker::publish(agent_topic(), Management::Agent::API::notify_error, - Management::Agent::name, msg, name); + Management::Agent::get_name(), msg, name); } Management::Request::finish(reqid); @@ -150,7 +152,7 @@ event Management::Agent::API::set_configuration_request(reqid: string, config: M for ( node in config$nodes ) { - if ( node$instance == Management::Agent::name ) + if ( node$instance == Management::Agent::get_name() ) g_nodes[node$name] = node; # The cluster and supervisor frameworks require a port for every @@ -172,7 +174,13 @@ event Management::Agent::API::set_configuration_request(reqid: string, config: M g_cluster[node$name] = cep; } - # Apply the new configuration via the supervisor + # Apply the new configuration via the supervisor. + # + # XXX this should launch in the nodes in controlled order (loggers -> + # manager -> proxies -> workers), ideally checking that one stage is up + # before launching the next. This is tricky because that's not the point + # of the Supervisor's response event. Until we have this, bootstrap + # might be noisy, particular in the Broker log. for ( nodename in g_nodes ) { @@ -181,8 +189,17 @@ event Management::Agent::API::set_configuration_request(reqid: string, config: M nc = Supervisor::NodeConfig($name=nodename); - if ( Management::Agent::cluster_directory != "" ) - nc$directory = Management::Agent::cluster_directory; + local statedir = build_path(Management::get_state_dir(), "nodes"); + + if ( ! mkdir(statedir) ) + Management::Log::warning(fmt("could not create state dir '%s'", statedir)); + + statedir = build_path(statedir, nodename); + + if ( ! mkdir(statedir) ) + Management::Log::warning(fmt("could not create node state dir '%s'", statedir)); + + nc$directory = statedir; if ( node?$interface ) nc$interface = node$interface; @@ -198,6 +215,11 @@ event Management::Agent::API::set_configuration_request(reqid: string, config: M # node. nc$scripts[|nc$scripts|] = "policy/frameworks/management/node"; + if ( Management::Node::stdout_file != "" ) + nc$stdout_file = Management::Node::stdout_file; + if ( Management::Node::stderr_file != "" ) + nc$stderr_file = Management::Node::stderr_file; + # XXX could use options to enable per-node overrides for # directory, stdout, stderr, others? @@ -214,7 +236,7 @@ event Management::Agent::API::set_configuration_request(reqid: string, config: M { local res = Management::Result( $reqid = reqid, - $instance = Management::Agent::name); + $instance = Management::Agent::get_name()); Management::Log::info(fmt("tx Management::Agent::API::set_configuration_response %s", Management::result_to_string(res))); @@ -232,7 +254,7 @@ event SupervisorControl::status_response(reqid: string, result: Supervisor::Stat Management::Request::finish(reqid); local res = Management::Result( - $reqid = req$parent_id, $instance = Management::Agent::name); + $reqid = req$parent_id, $instance = Management::Agent::get_name()); local node_statuses: Management::NodeStatusVec; @@ -264,9 +286,9 @@ event SupervisorControl::status_response(reqid: string, result: Supervisor::Stat } else { - if ( "ZEEK_CLUSTER_MGMT_NODE" in sns$node$env ) + if ( "ZEEK_MANAGEMENT_NODE" in sns$node$env ) { - local role = sns$node$env["ZEEK_CLUSTER_MGMT_NODE"]; + local role = sns$node$env["ZEEK_MANAGEMENT_NODE"]; if ( role == "CONTROLLER" ) { cns$mgmt_role = Management::CONTROLLER; @@ -494,7 +516,7 @@ event Management::Agent::API::agent_welcome_request(reqid: string) local res = Management::Result( $reqid = reqid, - $instance = Management::Agent::name); + $instance = Management::Agent::get_name()); Management::Log::info(fmt("tx Management::Agent::API::agent_welcome_response %s", Management::result_to_string(res))); @@ -515,7 +537,7 @@ event Management::Agent::API::agent_standby_request(reqid: string) local res = Management::Result( $reqid = reqid, - $instance = Management::Agent::name); + $instance = Management::Agent::get_name()); Management::Log::info(fmt("tx Management::Agent::API::agent_standby_response %s", Management::result_to_string(res))); diff --git a/scripts/policy/frameworks/management/config.zeek b/scripts/policy/frameworks/management/config.zeek index 7b87655ae5..9ca80caf85 100644 --- a/scripts/policy/frameworks/management/config.zeek +++ b/scripts/policy/frameworks/management/config.zeek @@ -5,9 +5,18 @@ ##! anyway). For role-specific settings, see management/controller/config.zeek ##! and management/agent/config.zeek. +@load base/misc/installation + +@load ./types + module Management; export { + ## The role of this process in cluster management. Use this to + ## differentiate code based on the type of node in which it ends up + ## running. + const role = Management::NONE &redef; + ## The fallback listen address if more specific adddresses, such as ## the controller's :zeek:see:`Management::Controller::listen_address` ## remains empty. Unless redefined, this uses Broker's own default @@ -17,4 +26,41 @@ export { ## The retry interval for Broker connnects. Defaults to a more ## aggressive value compared to Broker's 30s. const connect_retry = 1sec &redef; + + ## The toplevel directory in which the Management framework creates + ## spool state for any Zeek nodes, including the Zeek cluster, agents, + ## and the controller. Don't use this directly, use the + ## :zeek:see:`Management::get_spool_dir` function. + const spool_dir = getenv("ZEEK_MANAGEMENT_SPOOL_DIR") &redef; + + ## The toplevel directory for variable state, such as Broker data + ## stores. Don't use this directly, use the + ## :zeek:see:`Management::get_state_dir` function. + const state_dir = getenv("ZEEK_MANAGEMENT_STATE_DIR") &redef; + + ## Returns the effective spool directory for the management framework. + ## That's :zeek:see:`Management::spool_dir` when set, otherwise the + ## installation's spool directory. + global get_spool_dir: function(): string; + + ## Returns the effective state directory for the management framework. + ## That's :zeek:see:`Management::state_dir` when set, otherwise the + ## installation's state directory. + global get_state_dir: function(): string; } + +function get_spool_dir(): string + { + if ( spool_dir != "" ) + return spool_dir; + + return Installation::spool_dir; + } + +function get_state_dir(): string + { + if ( state_dir != "" ) + return state_dir; + + return Installation::state_dir; + } diff --git a/scripts/policy/frameworks/management/controller/__load__.zeek b/scripts/policy/frameworks/management/controller/__load__.zeek index 47af9762c5..16534d86d4 100644 --- a/scripts/policy/frameworks/management/controller/__load__.zeek +++ b/scripts/policy/frameworks/management/controller/__load__.zeek @@ -1,4 +1,16 @@ ##! The entry point for the Management framework's cluster controller. It runs -##! bootstrap logic for launching the controller process via Zeek's Supervisor. +##! bootstrap logic for launching a controller process via Zeek's Supervisor. +# When the user sources this from other scripts, the intent may not be just to +# create a controller, but also access Management framework infrastructure, for +# example to reconfigure ports and other settings. So we always load that +# infrastructure, but initiate the controller launch only when this is actually +# the Supervisor process. + +@if ( Supervisor::is_supervised() ) +@load policy/frameworks/management/controller/config +@endif + +@if ( Supervisor::is_supervisor() ) @load ./boot +@endif diff --git a/scripts/policy/frameworks/management/controller/api.zeek b/scripts/policy/frameworks/management/controller/api.zeek index d3822f3d02..b840aecab6 100644 --- a/scripts/policy/frameworks/management/controller/api.zeek +++ b/scripts/policy/frameworks/management/controller/api.zeek @@ -97,6 +97,7 @@ export { ## member is a vector of :zeek:see:`Management::NodeStatus` ## records, covering the nodes at that instance. Results may also indicate ## failure, with error messages indicating what went wrong. + ## global get_nodes_response: event(reqid: string, result: Management::ResultVec); @@ -115,6 +116,7 @@ export { ## nodes: a set of cluster node names (e.g. "worker-01") to retrieve ## the values from. An empty set, supplied by default, means ## retrieval from all current cluster nodes. + ## global get_id_value_request: event(reqid: string, id: string, nodes: set[string] &default=set()); @@ -128,6 +130,7 @@ export { ## data field contains a string with the JSON rendering (as produced ## by :zeek:id:`to_json`, including the error strings it potentially ## returns). + ## global get_id_value_response: event(reqid: string, result: Management::ResultVec); @@ -167,4 +170,4 @@ export { ## instances: the set of instance names now ready. ## global notify_agents_ready: event(instances: set[string]); - } +} diff --git a/scripts/policy/frameworks/management/controller/boot.zeek b/scripts/policy/frameworks/management/controller/boot.zeek index a3c679c257..6470385d94 100644 --- a/scripts/policy/frameworks/management/controller/boot.zeek +++ b/scripts/policy/frameworks/management/controller/boot.zeek @@ -5,6 +5,8 @@ ##! ##! If the current process is not the Zeek supervisor, this does nothing. +@load base/utils/paths + @load ./config event zeek_init() @@ -16,15 +18,29 @@ event zeek_init() local sn = Supervisor::NodeConfig($name=epi$id, $bare_mode=T, $scripts=vector("policy/frameworks/management/controller/main.zeek")); + # Establish the controller's working directory. If one is configured + # explicitly, use as-is if absolute. Otherwise, append it to the state + # path. Without an explicit directory, fall back to the agent name. + local statedir = build_path(Management::get_state_dir(), "nodes"); + + if ( ! mkdir(statedir) ) + print(fmt("warning: could not create state dir '%s'", statedir)); + if ( Management::Controller::directory != "" ) - sn$directory = Management::Controller::directory; + sn$directory = build_path(statedir, Management::Controller::directory); + else + sn$directory = build_path(statedir, Management::Controller::get_name()); + + if ( ! mkdir(sn$directory) ) + print(fmt("warning: could not create controller state dir '%s'", sn$directory)); + if ( Management::Controller::stdout_file != "" ) sn$stdout_file = Management::Controller::stdout_file; if ( Management::Controller::stderr_file != "" ) sn$stderr_file = Management::Controller::stderr_file; - # This helps Zeek run controller and agent with a minimal set of scripts. - sn$env["ZEEK_CLUSTER_MGMT_NODE"] = "CONTROLLER"; + # This helps identify Management framework nodes reliably. + sn$env["ZEEK_MANAGEMENT_NODE"] = "CONTROLLER"; local res = Supervisor::create(sn); diff --git a/scripts/policy/frameworks/management/controller/config.zeek b/scripts/policy/frameworks/management/controller/config.zeek index c97c11bb6a..2728d01ec3 100644 --- a/scripts/policy/frameworks/management/controller/config.zeek +++ b/scripts/policy/frameworks/management/controller/config.zeek @@ -1,7 +1,6 @@ ##! Configuration settings for the cluster controller. -@load policy/frameworks/management/config -@load policy/frameworks/management/types +@load policy/frameworks/management module Management::Controller; @@ -12,18 +11,18 @@ export { ## "controller-". const name = getenv("ZEEK_CONTROLLER_NAME") &redef; - ## The controller's stdout log name. If the string is non-empty, Zeek will - ## produce a free-form log (i.e., not one governed by Zeek's logging - ## framework) in Zeek's working directory. If left empty, no such log - ## results. + ## The controller's stdout log name. If the string is non-empty, Zeek + ## will produce a free-form log (i.e., not one governed by Zeek's + ## logging framework) in the controller's working directory. If left + ## empty, no such log results. ## ## Note that the controller also establishes a "proper" Zeek log via the ## :zeek:see:`Management::Log` module. - const stdout_file = "controller.stdout" &redef; + const stdout_file = "stdout" &redef; ## The controller's stderr log name. Like :zeek:see:`Management::Controller::stdout_file`, ## but for the stderr stream. - const stderr_file = "controller.stderr" &redef; + const stderr_file = "stderr" &redef; ## The network address the controller listens on. By default this uses ## the value of the ZEEK_CONTROLLER_ADDR environment variable, but you @@ -44,11 +43,14 @@ export { const topic = "zeek/management/controller" &redef; ## An optional custom output directory for stdout/stderr. Agent and - ## controller currently only log locally, not via the data cluster's + ## controller currently only log locally, not via the Zeek cluster's ## logger node. This means that if both write to the same log file, ## output gets garbled. const directory = "" &redef; + ## Returns the effective name of the controller. + global get_name: function(): string; + ## Returns a :zeek:see:`Broker::NetworkInfo` record describing the controller. global network_info: function(): Broker::NetworkInfo; @@ -56,6 +58,14 @@ export { global endpoint_info: function(): Broker::EndpointInfo; } +function get_name(): string + { + if ( name != "" ) + return name; + + return fmt("controller-%s", gethostname()); + } + function network_info(): Broker::NetworkInfo { local ni: Broker::NetworkInfo; @@ -79,11 +89,7 @@ function endpoint_info(): Broker::EndpointInfo { local epi: Broker::EndpointInfo; - if ( Management::Controller::name != "" ) - epi$id = Management::Controller::name; - else - epi$id = fmt("controller-%s", gethostname()); - + epi$id = Management::Controller::get_name(); epi$network = network_info(); return epi; diff --git a/scripts/policy/frameworks/management/controller/main.zeek b/scripts/policy/frameworks/management/controller/main.zeek index 728ca36518..f9d3c2d0a3 100644 --- a/scripts/policy/frameworks/management/controller/main.zeek +++ b/scripts/policy/frameworks/management/controller/main.zeek @@ -71,7 +71,7 @@ redef record Management::Request::Request += { }; # Tag our logs correctly -redef Management::Log::role = Management::CONTROLLER; +redef Management::role = Management::CONTROLLER; global check_instances_ready: function(); global add_instance: function(inst: Management::Instance); @@ -81,11 +81,10 @@ global null_config: function(): Management::Configuration; global is_null_config: function(config: Management::Configuration): bool; # Checks whether the given instance is one that we know with different -# communication settings: a a different peering direction, a different listening +# communication settings: a different peering direction, a different listening # port, etc. Used as a predicate to indicate when we need to drop the existing # one from our internal state. -global is_instance_connectivity_change: function - (inst: Management::Instance): bool; +global is_instance_connectivity_change: function(inst: Management::Instance): bool; # The set of agents the controller interacts with to manage to currently # configured cluster. This may be a subset of all the agents known to the diff --git a/scripts/policy/frameworks/management/log.zeek b/scripts/policy/frameworks/management/log.zeek index e8732df1bc..a6f8d37571 100644 --- a/scripts/policy/frameworks/management/log.zeek +++ b/scripts/policy/frameworks/management/log.zeek @@ -3,7 +3,7 @@ ##! supervisor. In this setting Zeek's logging framework operates locally, i.e., ##! this does not involve logger nodes. -@load ./types +@load ./config module Management::Log; @@ -64,10 +64,6 @@ export { ## message: the message to log. ## global error: function(message: string); - - ## The role of this process in cluster management. Agent and controller - ## both redefine this, and we use it during logging. - const role = Management::NONE &redef; } # Enum translations to strings. This avoids those enums being reported @@ -93,7 +89,7 @@ function debug(message: string) local node = Supervisor::node(); Log::write(LOG, [$ts=network_time(), $node=node$name, $level=l2s[DEBUG], - $role=r2s[role], $message=message]); + $role=r2s[Management::role], $message=message]); } function info(message: string) @@ -103,7 +99,7 @@ function info(message: string) local node = Supervisor::node(); Log::write(LOG, [$ts=network_time(), $node=node$name, $level=l2s[INFO], - $role=r2s[role], $message=message]); + $role=r2s[Management::role], $message=message]); } function warning(message: string) @@ -113,7 +109,7 @@ function warning(message: string) local node = Supervisor::node(); Log::write(LOG, [$ts=network_time(), $node=node$name, $level=l2s[WARNING], - $role=r2s[role], $message=message]); + $role=r2s[Management::role], $message=message]); } function error(message: string) @@ -123,7 +119,7 @@ function error(message: string) local node = Supervisor::node(); Log::write(LOG, [$ts=network_time(), $node=node$name, $level=l2s[ERROR], - $role=r2s[role], $message=message]); + $role=r2s[Management::role], $message=message]); } event zeek_init() @@ -136,7 +132,7 @@ event zeek_init() # Defining the stream outside of the stream creation call sidesteps # the coverage.find-bro-logs test, which tries to inventory all logs. # This log isn't yet ready for that level of scrutiny. - local stream = Log::Stream($columns=Info, $path=fmt("cluster-%s", node$name), + local stream = Log::Stream($columns=Info, $path=fmt("management-%s", node$name), $policy=log_policy); Log::create_stream(Management::Log::LOG, stream); diff --git a/scripts/policy/frameworks/management/node/config.zeek b/scripts/policy/frameworks/management/node/config.zeek index d17fd663a1..e15631aa87 100644 --- a/scripts/policy/frameworks/management/node/config.zeek +++ b/scripts/policy/frameworks/management/node/config.zeek @@ -6,4 +6,17 @@ export { ## The nodes' Broker topic. Cluster nodes automatically subscribe ## to it, to receive request events from the Management framework. const node_topic = "zeek/management/node" &redef; + + ## Cluster node stdout log configuration. If the string is non-empty, + ## Zeek will produce a free-form log (i.e., not one governed by Zeek's + ## logging framework) in the node's working directory. If left empty, no + ## such log results. + ## + ## Note that cluster nodes also establish a "proper" management log via + ## the :zeek:see:`Management::Log` module. + const stdout_file = "stdout" &redef; + + ## Cluster node stderr log configuration. Like + ## :zeek:see:`Management::Node::stdout_file`, but for the stderr stream. + const stderr_file = "stderr" &redef; } diff --git a/scripts/policy/frameworks/management/node/main.zeek b/scripts/policy/frameworks/management/node/main.zeek index 52aa5f1e96..df65d754b9 100644 --- a/scripts/policy/frameworks/management/node/main.zeek +++ b/scripts/policy/frameworks/management/node/main.zeek @@ -1,10 +1,14 @@ ##! This module provides Management framework functionality present in every ##! cluster node, to allowing Management agents to interact with the nodes. +@load base/frameworks/broker/store @load base/frameworks/cluster +@load base/frameworks/logging/writers/ascii +@load base/misc/installation +@load base/utils/paths +@load policy/frameworks/management @load policy/frameworks/management/agent/config -@load policy/frameworks/management/log @load ./api @load ./config @@ -12,7 +16,7 @@ module Management::Node; # Tag our logs correctly -redef Management::Log::role = Management::NODE; +redef Management::role = Management::NODE; ## The type of dispatch callbacks. These implement a particular dispatch action, ## using the provided string vector as arguments, filling results into the @@ -103,6 +107,13 @@ event Broker::peer_added(peer: Broker::EndpointInfo, msg: string) event zeek_init() { + if ( Broker::table_store_db_directory != "" && ! mkdir(Broker::table_store_db_directory) ) + Management::Log::error(fmt("could not create Broker data store directory '%s'", + Broker::table_store_db_directory)); + if ( Cluster::default_store_dir != "" && ! mkdir(Cluster::default_store_dir) ) + Management::Log::error(fmt("could not create Cluster store directory '%s'", + Cluster::default_store_dir)); + local epi = Management::Agent::endpoint_info(); Broker::peer(epi$network$address, epi$network$bound_port, Management::connect_retry); diff --git a/scripts/policy/frameworks/management/persistence.zeek b/scripts/policy/frameworks/management/persistence.zeek new file mode 100644 index 0000000000..bb5731ced0 --- /dev/null +++ b/scripts/policy/frameworks/management/persistence.zeek @@ -0,0 +1,47 @@ +##! Common adjustments for any kind of Zeek node when we run the Management +##! framework. + +@load base/misc/installation +@load base/utils/paths + +@load ./config + +# For testing, keep persistent state local to the current working directory, +# and disable log rotation. +@if ( getenv("ZEEK_MANAGEMENT_TESTING") != "" ) + +redef Management::spool_dir = "."; +redef Management::state_dir = "."; +redef Log::default_rotation_interval = 0 secs; + +@else + +# For any kind of Zeek process we steer rotated logs awaiting archival into a +# queue directory in the spool. The name "log-queue" matches logger nodes' default +# config with the Supervisor; see base/frameworks/cluster/nodes/logger.zeek. +redef Log::default_rotation_dir = build_path(Management::get_spool_dir(), "log-queue"); + +@if ( getenv("ZEEK_MANAGEMENT_NODE") != "" ) + +# Management agents and controllers don't have loggers, nor their configuration, +# so establish a similar one here: + +function archiver_rotation_format_func(ri: Log::RotationFmtInfo): Log::RotationPath + { + local open_str = strftime(Log::default_rotation_date_format, ri$open); + local close_str = strftime(Log::default_rotation_date_format, ri$close); + local base = fmt("%s__%s__%s__", ri$path, open_str, close_str); + local rval = Log::RotationPath($file_basename=base); + return rval; + } + +redef Log::default_rotation_interval = 1 hrs; +redef Log::enable_local_logging = T; +redef Log::enable_remote_logging = T; +redef Log::rotation_format_func = archiver_rotation_format_func; + +redef LogAscii::enable_leftover_log_rotation = T; + +@endif # ZEEK_MANAGEMENT_NODE + +@endif # ZEEK_MANAGEMENT_TESTING diff --git a/scripts/policy/frameworks/management/types.zeek b/scripts/policy/frameworks/management/types.zeek index 6d89fbda1a..db3ac8da55 100644 --- a/scripts/policy/frameworks/management/types.zeek +++ b/scripts/policy/frameworks/management/types.zeek @@ -81,7 +81,7 @@ export { state: State; ## Role the node plays in cluster management. mgmt_role: Role &default=NONE; - ## Role the node plays in the data cluster. + ## Role the node plays in the Zeek cluster. cluster_role: Supervisor::ClusterRole &default=Supervisor::NONE; ## Process ID of the node. This is optional because the Supervisor may not have ## a PID when a node is still bootstrapping. diff --git a/scripts/test-all-policy.zeek b/scripts/test-all-policy.zeek index 0f2de90609..2dc50cd8af 100644 --- a/scripts/test-all-policy.zeek +++ b/scripts/test-all-policy.zeek @@ -24,6 +24,7 @@ @load frameworks/management/__load__.zeek @load frameworks/management/config.zeek @load frameworks/management/log.zeek +@load frameworks/management/persistence.zeek # @load frameworks/management/node/__load__.zeek @load frameworks/management/node/api.zeek @load frameworks/management/node/config.zeek diff --git a/testing/btest/Baseline/coverage.bare-mode-errors/errors b/testing/btest/Baseline/coverage.bare-mode-errors/errors index bc9bd28f83..6e8c09e1bd 100644 --- a/testing/btest/Baseline/coverage.bare-mode-errors/errors +++ b/testing/btest/Baseline/coverage.bare-mode-errors/errors @@ -2,8 +2,8 @@ ### NOTE: This file has been sorted with diff-sort. warning in <...>/extract-certs-pem.zeek, line 1: deprecated script loaded from <...>/__load__.zeek:15 "Remove in v5.1. Use log-certs-base64.zeek instead." warning in <...>/extract-certs-pem.zeek, line 1: deprecated script loaded from command line arguments "Remove in v5.1. Use log-certs-base64.zeek instead." -warning in <...>/log-ocsp.zeek, line 1: deprecated script loaded from <...>/test-all-policy.zeek:65 ("Remove in v5.1. OCSP logging is now enabled by default") -warning in <...>/log-ocsp.zeek, line 1: deprecated script loaded from <...>/test-all-policy.zeek:65 ("Remove in v5.1. OCSP logging is now enabled by default") +warning in <...>/log-ocsp.zeek, line 1: deprecated script loaded from <...>/test-all-policy.zeek:66 ("Remove in v5.1. OCSP logging is now enabled by default") +warning in <...>/log-ocsp.zeek, line 1: deprecated script loaded from <...>/test-all-policy.zeek:66 ("Remove in v5.1. OCSP logging is now enabled by default") warning in <...>/log-ocsp.zeek, line 1: deprecated script loaded from command line arguments ("Remove in v5.1. OCSP logging is now enabled by default") warning in <...>/notary.zeek, line 1: deprecated script loaded from <...>/__load__.zeek:5 ("Remove in v5.1. Please switch to other more modern approaches like SCT validation (validate-sct.zeek).") warning in <...>/notary.zeek, line 1: deprecated script loaded from command line arguments ("Remove in v5.1. Please switch to other more modern approaches like SCT validation (validate-sct.zeek).") diff --git a/testing/btest/Baseline/scripts.policy.frameworks.management.controller.agent-checkin/zeek.controller.stdout b/testing/btest/Baseline/scripts.policy.frameworks.management.controller.agent-checkin/zeek.nodes.controller.stdout similarity index 100% rename from testing/btest/Baseline/scripts.policy.frameworks.management.controller.agent-checkin/zeek.controller.stdout rename to testing/btest/Baseline/scripts.policy.frameworks.management.controller.agent-checkin/zeek.nodes.controller.stdout diff --git a/testing/btest/scripts/policy/frameworks/management/controller/agent-checkin.zeek b/testing/btest/scripts/policy/frameworks/management/controller/agent-checkin.zeek index ecdc3b17ed..3d248848c7 100644 --- a/testing/btest/scripts/policy/frameworks/management/controller/agent-checkin.zeek +++ b/testing/btest/scripts/policy/frameworks/management/controller/agent-checkin.zeek @@ -1,16 +1,15 @@ # This test verifies basic agent-controller communication in the Management # framework. We launch agent and controller via the supervisor, add an extra # handler for the notify_agent_hello event that travels agent -> controller, and -# verify its print output in the controller's stdout log. +# verify that it prints receipt of the event to stdout. -# The following env vars is known to the controller framework +# The following environment variables are known to the controller framework: # @TEST-PORT: ZEEK_CONTROLLER_PORT # @TEST-PORT: BROKER_PORT -# A bit of a detour to get the port number into the agent configuration -# @TEST-EXEC: btest-bg-run zeek zeek -j %INPUT +# @TEST-EXEC: ZEEK_MANAGEMENT_TESTING=1 btest-bg-run zeek zeek -j %INPUT # @TEST-EXEC: btest-bg-wait 10 -# @TEST-EXEC: btest-diff zeek/controller.stdout +# @TEST-EXEC: btest-diff zeek/nodes/controller/stdout @load policy/frameworks/management/agent @load policy/frameworks/management/controller @@ -34,7 +33,7 @@ event zeek_init() # We're using the controller to shut everything down once the # notify_agent_hello event has arrived. The controller doesn't normally # talk to the supervisor, so connect to it. - if ( Supervisor::node()$name == "controller" ) + if ( Management::role == Management::CONTROLLER ) { Broker::peer(getenv("ZEEK_DEFAULT_LISTEN_ADDRESS"), Broker::default_port, Broker::default_listen_retry); Broker::auto_publish(SupervisorControl::topic_prefix, SupervisorControl::stop_request); @@ -43,7 +42,7 @@ event zeek_init() event Management::Agent::API::notify_agent_hello(instance: string, host: addr, api_version: count) { - if ( Supervisor::node()$name == "controller" ) + if ( Management::role == Management::CONTROLLER ) { # On rare occasion it can happen that we log this twice, which'll need # investigating. For now we ensure we only do so once. diff --git a/testing/external/commit-hash.zeek-testing-cluster b/testing/external/commit-hash.zeek-testing-cluster index dbf1ad03e2..461b2d5856 100644 --- a/testing/external/commit-hash.zeek-testing-cluster +++ b/testing/external/commit-hash.zeek-testing-cluster @@ -1 +1 @@ -fa9e808baedfeb23b4125f390cb3021c535a7d2b +01e1d1ad94cea81091c74e829d86815fdef0dd62