From fcef7f4925c0c6eb893af3bba7b9e9ac11d7a9f3 Mon Sep 17 00:00:00 2001 From: Christian Kreibich Date: Fri, 15 Apr 2022 18:27:26 -0700 Subject: [PATCH] Management framework: improve handling of node run states When agents receive a configuration, we don't currently honor requested run states (there's no such thing as registering a node but not running it, for example). To reflect this, we now start off nodes in state PENDING as we launch them via the Supervisor, and move them to RUNNING when they check in with us via Management::Node::API::notify_node_hello. --- .../frameworks/management/agent/main.zeek | 36 ++++++++++++++----- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/scripts/policy/frameworks/management/agent/main.zeek b/scripts/policy/frameworks/management/agent/main.zeek index 004b874888..7771e7bdf5 100644 --- a/scripts/policy/frameworks/management/agent/main.zeek +++ b/scripts/policy/frameworks/management/agent/main.zeek @@ -132,11 +132,10 @@ event Management::Agent::API::set_configuration_request(reqid: string, config: M for ( nodename in g_nodes ) supervisor_destroy(nodename); - g_nodes = table(); - # Refresh the cluster and nodes tables - + g_nodes = table(); g_cluster = table(); + for ( node in config$nodes ) { if ( node$instance == Management::Agent::name ) @@ -166,6 +165,8 @@ event Management::Agent::API::set_configuration_request(reqid: string, config: M for ( nodename in g_nodes ) { node = g_nodes[nodename]; + node$state = Management::PENDING; + nc = Supervisor::NodeConfig($name=nodename); if ( Management::Agent::cluster_directory != "" ) @@ -237,6 +238,11 @@ event SupervisorControl::status_response(reqid: string, result: Supervisor::Stat { cns$cluster_role = sns$node$cluster[node]$role; + # For cluster nodes, copy run state from g_nodes, our + # live node status table. + if ( node in g_nodes ) + cns$state = g_nodes[node]$state; + # The supervisor's responses use 0/tcp (not 0/unknown) # when indicating an unused port because its internal # serialization always assumes TCP. @@ -251,12 +257,22 @@ event SupervisorControl::status_response(reqid: string, result: Supervisor::Stat if ( role == "CONTROLLER" ) { cns$mgmt_role = Management::CONTROLLER; + + # Automatically declare the controller in running state + # here -- we'd not have received a request that brought + # us here otherwise. + cns$state = Management::RUNNING; + # The controller always listens, so the Zeek client can connect. cns$p = Management::Agent::endpoint_info()$network$bound_port; } else if ( role == "AGENT" ) { cns$mgmt_role = Management::AGENT; + + # Similarly to above, always declare agent running. We are. :) + cns$state = Management::RUNNING; + # If we have a controller address, the agent connects to it # and does not listen. See zeek_init() below for similar logic. if ( Management::Agent::controller$address == "0.0.0.0" ) @@ -268,13 +284,9 @@ event SupervisorControl::status_response(reqid: string, result: Supervisor::Stat } } - # A PID is available if a supervised node has fully launched - # and is therefore running. + # A PID is available if a supervised node has fully launched. if ( sns?$pid ) - { cns$pid = sns$pid; - cns$state = Management::RUNNING; - } node_statuses += cns; } @@ -409,6 +421,14 @@ event Management::Agent::API::agent_standby_request(reqid: string) event Management::Agent::API::agent_standby_response(reqid, res); } +event Management::Node::API::notify_node_hello(node: string) + { + Management::Log::info(fmt("rx Management::Node::API::notify_node_hello %s", node)); + + if ( node in g_nodes ) + g_nodes[node]$state = Management::RUNNING; + } + event Broker::peer_added(peer: Broker::EndpointInfo, msg: string) { # This does not (cannot?) immediately verify that the new peer