From 892a3a845273ab93047ad9e57f51deff2936de6e Mon Sep 17 00:00:00 2001 From: Christian Kreibich Date: Wed, 1 Jun 2022 17:41:20 -0700 Subject: [PATCH 1/3] Add Intel::send_store_on_node_up boolean to control min_data_store delivery This adds a redefinable const to the internals of the Intel framework, to allow suppression of the manager sending its current min_data_store when a worker connects. This feature is desirable for nodes that check in "late" to bring them up to speed, but during testing it introduces nondeterminism. --- scripts/base/frameworks/intel/cluster.zeek | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/scripts/base/frameworks/intel/cluster.zeek b/scripts/base/frameworks/intel/cluster.zeek index 53ac323fa5..d890e455f9 100644 --- a/scripts/base/frameworks/intel/cluster.zeek +++ b/scripts/base/frameworks/intel/cluster.zeek @@ -11,6 +11,11 @@ module Intel; global insert_item: event(item: Item) &is_used; global insert_indicator: event(item: Item) &is_used; +# By default the manager sends its current min_data_store to connecting workers. +# During testing it's handy to suppress this, since receipt of the store +# introduces nondeterminism when mixed with explicit data insertions. +const send_store_on_node_up = T &redef; + # If this process is not a manager process, we don't want the full metadata. @if ( Cluster::local_node_type() != Cluster::MANAGER ) redef have_full_data = F; @@ -25,9 +30,10 @@ event zeek_init() # Handling of new worker nodes. event Cluster::node_up(name: string, id: string) { - # When a worker connects, send it the complete minimal data store. - # It will be kept up to date after this by the insert_indicator event. - if ( name in Cluster::nodes && Cluster::nodes[name]$node_type == Cluster::WORKER ) + # When a worker connects, send it the complete minimal data store unless + # we turned off that feature. The store will be kept up to date after + # this by the insert_indicator event. + if ( send_store_on_node_up && name in Cluster::nodes && Cluster::nodes[name]$node_type == Cluster::WORKER ) { Broker::publish_id(Cluster::node_topic(name), "Intel::min_data_store"); } From ed5d60f758c3acedc8989864c3e22287f86e8380 Mon Sep 17 00:00:00 2001 From: Christian Kreibich Date: Wed, 1 Jun 2022 22:18:51 -0700 Subject: [PATCH 2/3] Fix races in scripts.base.frameworks.intel.cluster-transparency-with-proxy test This test was unstable for two reasons: - Nothing verified whether the two workers had checked in with the proxy, meaning that messages between the workers and proxies could get lost. This adds an extra node_up event that the proxy generates synthetically, with values recognizable to the manager, once the proxy sees both workers connected. This is a test-level workaround for what should really be a cluster-is-ready event in the cluster framework proper. - More subtle: the Intel framework makes the manager send its current min_data_store to newly connected workers, which in the case of this tests introduces a race: since the data store, arriving at the worker, replaces the existing value, it could actually remove already established items if timing was right. This would lead to the count in the test reaching 3, assuming that 3 intel items are available, when in reality it was less, causing the Intel::seen() call to do nothing. We now disable the sending of the data store upon connect, via the global added in the previous commit. This also expands the test slightly so that both workers call Intel::seen() for the items inserted by the other worker. This is added validation for the second point above, because in the presence of that race one occasionally sees one log entry make it, and the other fail. --- .../manager-1.intel.log | 4 +- .../worker-1..stdout | 1 + .../worker-2..stdout | 2 +- .../cluster-transparency-with-proxy.zeek | 70 ++++++++++++++----- 4 files changed, 58 insertions(+), 19 deletions(-) diff --git a/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency-with-proxy/manager-1.intel.log b/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency-with-proxy/manager-1.intel.log index 9a4fd38664..adeb0aa95a 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency-with-proxy/manager-1.intel.log +++ b/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency-with-proxy/manager-1.intel.log @@ -1,4 +1,5 @@ ### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +### NOTE: This file has been sorted with diff-sort. #separator \x09 #set_separator , #empty_field (empty) @@ -7,5 +8,6 @@ #open XXXX-XX-XX-XX-XX-XX #fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p seen.indicator seen.indicator_type seen.where seen.node matched sources fuid file_mime_type file_desc #types time string addr port addr port string enum enum string set[enum] set[string] string string string -XXXXXXXXXX.XXXXXX - - - - - 123.123.123.123 Intel::ADDR Intel::IN_ANYWHERE worker-2 Intel::ADDR worker-1 - - - #close XXXX-XX-XX-XX-XX-XX +XXXXXXXXXX.XXXXXX - - - - - 123.123.123.123 Intel::ADDR Intel::IN_ANYWHERE worker-2 Intel::ADDR worker-1 - - - +XXXXXXXXXX.XXXXXX - - - - - 4.3.2.1 Intel::ADDR Intel::IN_ANYWHERE worker-1 Intel::ADDR worker-2 - - - diff --git a/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency-with-proxy/worker-1..stdout b/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency-with-proxy/worker-1..stdout index c0bb0895b8..ef71aa240d 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency-with-proxy/worker-1..stdout +++ b/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency-with-proxy/worker-1..stdout @@ -4,3 +4,4 @@ new_indicator: 1.2.3.4 inserted by manager new_indicator: 123.123.123.123 inserted by worker-1 new_indicator: 4.3.2.1 inserted by worker-2 new_item triggered for 123.123.123.123 by worker-1 on worker-1 +seeing 4.3.2.1 diff --git a/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency-with-proxy/worker-2..stdout b/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency-with-proxy/worker-2..stdout index 5611abf8a4..13c25895bb 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency-with-proxy/worker-2..stdout +++ b/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency-with-proxy/worker-2..stdout @@ -1,7 +1,7 @@ ### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. ### NOTE: This file has been sorted with diff-sort. -Doing a lookup new_indicator: 1.2.3.4 inserted by manager new_indicator: 123.123.123.123 inserted by worker-1 new_indicator: 4.3.2.1 inserted by worker-2 new_item triggered for 4.3.2.1 by worker-2 on worker-2 +seeing 123.123.123.123 diff --git a/testing/btest/scripts/base/frameworks/intel/cluster-transparency-with-proxy.zeek b/testing/btest/scripts/base/frameworks/intel/cluster-transparency-with-proxy.zeek index 0bbe1b5e19..ffaefcbc9b 100644 --- a/testing/btest/scripts/base/frameworks/intel/cluster-transparency-with-proxy.zeek +++ b/testing/btest/scripts/base/frameworks/intel/cluster-transparency-with-proxy.zeek @@ -1,3 +1,7 @@ +# This test verifies intel data propagation via a cluster with a proxy. The +# manager and both workers insert intel items, and both workers do lookups that +# we expect to hit. + # @TEST-PORT: BROKER_PORT1 # @TEST-PORT: BROKER_PORT2 # @TEST-PORT: BROKER_PORT3 @@ -11,7 +15,7 @@ # @TEST-EXEC: TEST_DIFF_CANONIFIER=$SCRIPTS/diff-sort btest-diff manager-1/.stdout # @TEST-EXEC: TEST_DIFF_CANONIFIER=$SCRIPTS/diff-sort btest-diff worker-1/.stdout # @TEST-EXEC: TEST_DIFF_CANONIFIER=$SCRIPTS/diff-sort btest-diff worker-2/.stdout -# @TEST-EXEC: btest-diff manager-1/intel.log +# @TEST-EXEC: TEST_DIFF_CANONIFIER=$SCRIPTS/diff-remove-timestamps-and-sort btest-diff manager-1/intel.log @TEST-START-FILE cluster-layout.zeek redef Cluster::nodes = { @@ -27,27 +31,51 @@ redef Cluster::nodes = { module Intel; -redef Log::default_rotation_interval=0sec; +redef Log::default_rotation_interval = 0sec; + +# Disable the initial send of min_data_store to the workers. Its arrival at the +# workers introduces nondeterminism that can trip up this test, because even +# though the worker_data counter below reaches 3, less than 3 intel items may be +# in the worker's local store. +redef Intel::send_store_on_node_up = F; + +global log_writes = 0; +global worker_data = 0; +global proxy_ready = F; +global sent_data = F; event Cluster::node_up(name: string, id: string) { - # Insert the data once both workers are connected. - if ( Cluster::local_node_type() == Cluster::MANAGER && Cluster::worker_count == 2 && Cluster::proxy_pool$alive_count == 1 ) + if ( Cluster::local_node_type() == Cluster::PROXY && Cluster::worker_count == 2 ) { - Intel::insert([$indicator="1.2.3.4", $indicator_type=Intel::ADDR, $meta=[$source="manager"]]); + # Make the proxy tell the manager explicitly when both workers + # have checked in. The cluster framework normally generates this + # event with the Broker ID as second argument. We borrow the + # event to signal readiness, using recognizable arguments. + Broker::publish(Cluster::manager_topic, Cluster::node_up, Cluster::node, Cluster::node); + return; + } + + if ( Cluster::local_node_type() == Cluster::MANAGER ) + { + if ( name == "proxy-1" && id == "proxy-1" ) + proxy_ready = T; + + # Insert data once both workers and the proxy are connected, and + # the proxy has indicated that it too has both workers connected. + if ( Cluster::worker_count == 2 && Cluster::proxy_pool$alive_count == 1 && proxy_ready ) + Intel::insert([$indicator="1.2.3.4", $indicator_type=Intel::ADDR, $meta=[$source="manager"]]); } } -global worker2_data = 0; -global sent_data = F; -# Watch for new indicators send to workers. +# Watch for new indicators sent to workers. event Intel::insert_indicator(item: Intel::Item) { print fmt("new_indicator: %s inserted by %s", item$indicator, item$meta$source); if ( ! sent_data ) { - # We wait to insert data here because we can now be sure the + # We wait to insert data here because we can now be sure the # full cluster is constructed. sent_data = T; if ( Cluster::node == "worker-1" ) @@ -56,16 +84,23 @@ event Intel::insert_indicator(item: Intel::Item) Intel::insert([$indicator="4.3.2.1", $indicator_type=Intel::ADDR, $meta=[$source="worker-2"]]); } - # We're forcing worker-2 to do a lookup when it has three intelligence items - # which were distributed over the cluster (data inserted locally is resent). + # Each worker does a lookup when it has 3 intel items which were + # distributed over the cluster (data inserted locally is resent). + # Worker 1 observes the host inserted by worker 2, and vice versa. + if ( Cluster::node == "worker-1" ) + { + if ( ++worker_data == 3 ) + { + print "seeing 4.3.2.1"; + Intel::seen([$host=4.3.2.1, $where=Intel::IN_ANYWHERE]); + } + } + if ( Cluster::node == "worker-2" ) { - ++worker2_data; - if ( worker2_data == 3 ) + if ( ++worker_data == 3 ) { - # Now that everything is inserted, see if we can match on the data inserted - # by worker-1. - print "Doing a lookup"; + print "seeing 123.123.123.123"; Intel::seen([$host=123.123.123.123, $where=Intel::IN_ANYWHERE]); } } @@ -86,7 +121,8 @@ event Intel::new_item(item: Intel::Item) event Intel::log_intel(rec: Intel::Info) { - terminate(); + if ( ++log_writes == 2 ) + terminate(); } event Cluster::node_down(name: string, id: string) From 984d6c0136b9b617a7fe33fe95eda9e59bca2d6c Mon Sep 17 00:00:00 2001 From: Christian Kreibich Date: Wed, 1 Jun 2022 22:26:25 -0700 Subject: [PATCH 3/3] Expand scripts.base.frameworks.intel.cluster-transparency test This simply expands this test to match the behavior of cluster-transparency-with-proxy, since the two are so similar. This test does not seem to need disabling the worker's initial send of the data store. --- .../manager-1.intel.log | 4 ++- .../worker-1..stdout | 1 + .../worker-2..stdout | 2 +- .../intel/cluster-transparency.zeek | 33 +++++++++++++------ 4 files changed, 28 insertions(+), 12 deletions(-) diff --git a/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/manager-1.intel.log b/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/manager-1.intel.log index 9a4fd38664..adeb0aa95a 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/manager-1.intel.log +++ b/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/manager-1.intel.log @@ -1,4 +1,5 @@ ### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +### NOTE: This file has been sorted with diff-sort. #separator \x09 #set_separator , #empty_field (empty) @@ -7,5 +8,6 @@ #open XXXX-XX-XX-XX-XX-XX #fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p seen.indicator seen.indicator_type seen.where seen.node matched sources fuid file_mime_type file_desc #types time string addr port addr port string enum enum string set[enum] set[string] string string string -XXXXXXXXXX.XXXXXX - - - - - 123.123.123.123 Intel::ADDR Intel::IN_ANYWHERE worker-2 Intel::ADDR worker-1 - - - #close XXXX-XX-XX-XX-XX-XX +XXXXXXXXXX.XXXXXX - - - - - 123.123.123.123 Intel::ADDR Intel::IN_ANYWHERE worker-2 Intel::ADDR worker-1 - - - +XXXXXXXXXX.XXXXXX - - - - - 4.3.2.1 Intel::ADDR Intel::IN_ANYWHERE worker-1 Intel::ADDR worker-2 - - - diff --git a/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/worker-1..stdout b/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/worker-1..stdout index c0bb0895b8..ef71aa240d 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/worker-1..stdout +++ b/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/worker-1..stdout @@ -4,3 +4,4 @@ new_indicator: 1.2.3.4 inserted by manager new_indicator: 123.123.123.123 inserted by worker-1 new_indicator: 4.3.2.1 inserted by worker-2 new_item triggered for 123.123.123.123 by worker-1 on worker-1 +seeing 4.3.2.1 diff --git a/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/worker-2..stdout b/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/worker-2..stdout index 5611abf8a4..13c25895bb 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/worker-2..stdout +++ b/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/worker-2..stdout @@ -1,7 +1,7 @@ ### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. ### NOTE: This file has been sorted with diff-sort. -Doing a lookup new_indicator: 1.2.3.4 inserted by manager new_indicator: 123.123.123.123 inserted by worker-1 new_indicator: 4.3.2.1 inserted by worker-2 new_item triggered for 4.3.2.1 by worker-2 on worker-2 +seeing 123.123.123.123 diff --git a/testing/btest/scripts/base/frameworks/intel/cluster-transparency.zeek b/testing/btest/scripts/base/frameworks/intel/cluster-transparency.zeek index 8ba417cec3..6d20dd3c64 100644 --- a/testing/btest/scripts/base/frameworks/intel/cluster-transparency.zeek +++ b/testing/btest/scripts/base/frameworks/intel/cluster-transparency.zeek @@ -1,3 +1,6 @@ +# This test verifies intel data propagation via a cluster. The manager and both +# workers insert intel items, and both workers do lookups that we expect to hit. + # @TEST-PORT: BROKER_PORT1 # @TEST-PORT: BROKER_PORT2 # @TEST-PORT: BROKER_PORT3 @@ -9,7 +12,7 @@ # @TEST-EXEC: TEST_DIFF_CANONIFIER=$SCRIPTS/diff-sort btest-diff manager-1/.stdout # @TEST-EXEC: TEST_DIFF_CANONIFIER=$SCRIPTS/diff-sort btest-diff worker-1/.stdout # @TEST-EXEC: TEST_DIFF_CANONIFIER=$SCRIPTS/diff-sort btest-diff worker-2/.stdout -# @TEST-EXEC: btest-diff manager-1/intel.log +# @TEST-EXEC: TEST_DIFF_CANONIFIER=$SCRIPTS/diff-remove-timestamps-and-sort btest-diff manager-1/intel.log @TEST-START-FILE cluster-layout.zeek redef Cluster::nodes = { @@ -35,8 +38,10 @@ event Cluster::node_up(name: string, id: string) } } -global worker2_data = 0; +global log_writes = 0; +global worker_data = 0; global sent_data = F; + # Watch for new indicators send to workers. event Intel::insert_indicator(item: Intel::Item) { @@ -53,16 +58,23 @@ event Intel::insert_indicator(item: Intel::Item) Intel::insert([$indicator="4.3.2.1", $indicator_type=Intel::ADDR, $meta=[$source="worker-2"]]); } - # We're forcing worker-2 to do a lookup when it has three intelligence items - # which were distributed over the cluster (data inserted locally is resent). + # Each worker does a lookup when it has 3 intel items which were + # distributed over the cluster (data inserted locally is resent). + # Worker 1 observes the host inserted by worker 2, and vice versa. + if ( Cluster::node == "worker-1" ) + { + if ( ++worker_data == 3 ) + { + print "seeing 4.3.2.1"; + Intel::seen([$host=4.3.2.1, $where=Intel::IN_ANYWHERE]); + } + } + if ( Cluster::node == "worker-2" ) { - ++worker2_data; - if ( worker2_data == 3 ) + if ( ++worker_data == 3 ) { - # Now that everything is inserted, see if we can match on the data inserted - # by worker-1. - print "Doing a lookup"; + print "seeing 123.123.123.123"; Intel::seen([$host=123.123.123.123, $where=Intel::IN_ANYWHERE]); } } @@ -83,7 +95,8 @@ event Intel::new_item(item: Intel::Item) event Intel::log_intel(rec: Intel::Info) { - terminate(); + if ( ++log_writes == 2 ) + terminate(); } event Cluster::node_down(name: string, id: string)