Fix races in scripts.base.frameworks.intel.cluster-transparency-with-proxy test

This test was unstable for two reasons:

- Nothing verified whether the two workers had checked in with the proxy,
meaning that messages between the workers and proxies could get lost. This adds
an extra node_up event that the proxy generates synthetically, with values
recognizable to the manager, once the proxy sees both workers connected. This is
a test-level workaround for what should really be a cluster-is-ready event in
the cluster framework proper.

- More subtle: the Intel framework makes the manager send its current
min_data_store to newly connected workers, which in the case of this tests
introduces a race: since the data store, arriving at the worker, replaces the
existing value, it could actually remove already established items if timing was
right. This would lead to the count in the test reaching 3, assuming that 3
intel items are available, when in reality it was less, causing the
Intel::seen() call to do nothing. We now disable the sending of the data store
upon connect, via the global added in the previous commit.

This also expands the test slightly so that both workers call Intel::seen() for
the items inserted by the other worker. This is added validation for the second
point above, because in the presence of that race one occasionally sees one log
entry make it, and the other fail.
This commit is contained in:
Christian Kreibich 2022-06-01 22:18:51 -07:00
parent 892a3a8452
commit ed5d60f758
4 changed files with 58 additions and 19 deletions

View file

@ -1,4 +1,5 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. ### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
### NOTE: This file has been sorted with diff-sort.
#separator \x09 #separator \x09
#set_separator , #set_separator ,
#empty_field (empty) #empty_field (empty)
@ -7,5 +8,6 @@
#open XXXX-XX-XX-XX-XX-XX #open XXXX-XX-XX-XX-XX-XX
#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p seen.indicator seen.indicator_type seen.where seen.node matched sources fuid file_mime_type file_desc #fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p seen.indicator seen.indicator_type seen.where seen.node matched sources fuid file_mime_type file_desc
#types time string addr port addr port string enum enum string set[enum] set[string] string string string #types time string addr port addr port string enum enum string set[enum] set[string] string string string
XXXXXXXXXX.XXXXXX - - - - - 123.123.123.123 Intel::ADDR Intel::IN_ANYWHERE worker-2 Intel::ADDR worker-1 - - -
#close XXXX-XX-XX-XX-XX-XX #close XXXX-XX-XX-XX-XX-XX
XXXXXXXXXX.XXXXXX - - - - - 123.123.123.123 Intel::ADDR Intel::IN_ANYWHERE worker-2 Intel::ADDR worker-1 - - -
XXXXXXXXXX.XXXXXX - - - - - 4.3.2.1 Intel::ADDR Intel::IN_ANYWHERE worker-1 Intel::ADDR worker-2 - - -

View file

@ -4,3 +4,4 @@ new_indicator: 1.2.3.4 inserted by manager
new_indicator: 123.123.123.123 inserted by worker-1 new_indicator: 123.123.123.123 inserted by worker-1
new_indicator: 4.3.2.1 inserted by worker-2 new_indicator: 4.3.2.1 inserted by worker-2
new_item triggered for 123.123.123.123 by worker-1 on worker-1 new_item triggered for 123.123.123.123 by worker-1 on worker-1
seeing 4.3.2.1

View file

@ -1,7 +1,7 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. ### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
### NOTE: This file has been sorted with diff-sort. ### NOTE: This file has been sorted with diff-sort.
Doing a lookup
new_indicator: 1.2.3.4 inserted by manager new_indicator: 1.2.3.4 inserted by manager
new_indicator: 123.123.123.123 inserted by worker-1 new_indicator: 123.123.123.123 inserted by worker-1
new_indicator: 4.3.2.1 inserted by worker-2 new_indicator: 4.3.2.1 inserted by worker-2
new_item triggered for 4.3.2.1 by worker-2 on worker-2 new_item triggered for 4.3.2.1 by worker-2 on worker-2
seeing 123.123.123.123

View file

@ -1,3 +1,7 @@
# This test verifies intel data propagation via a cluster with a proxy. The
# manager and both workers insert intel items, and both workers do lookups that
# we expect to hit.
# @TEST-PORT: BROKER_PORT1 # @TEST-PORT: BROKER_PORT1
# @TEST-PORT: BROKER_PORT2 # @TEST-PORT: BROKER_PORT2
# @TEST-PORT: BROKER_PORT3 # @TEST-PORT: BROKER_PORT3
@ -11,7 +15,7 @@
# @TEST-EXEC: TEST_DIFF_CANONIFIER=$SCRIPTS/diff-sort btest-diff manager-1/.stdout # @TEST-EXEC: TEST_DIFF_CANONIFIER=$SCRIPTS/diff-sort btest-diff manager-1/.stdout
# @TEST-EXEC: TEST_DIFF_CANONIFIER=$SCRIPTS/diff-sort btest-diff worker-1/.stdout # @TEST-EXEC: TEST_DIFF_CANONIFIER=$SCRIPTS/diff-sort btest-diff worker-1/.stdout
# @TEST-EXEC: TEST_DIFF_CANONIFIER=$SCRIPTS/diff-sort btest-diff worker-2/.stdout # @TEST-EXEC: TEST_DIFF_CANONIFIER=$SCRIPTS/diff-sort btest-diff worker-2/.stdout
# @TEST-EXEC: btest-diff manager-1/intel.log # @TEST-EXEC: TEST_DIFF_CANONIFIER=$SCRIPTS/diff-remove-timestamps-and-sort btest-diff manager-1/intel.log
@TEST-START-FILE cluster-layout.zeek @TEST-START-FILE cluster-layout.zeek
redef Cluster::nodes = { redef Cluster::nodes = {
@ -29,18 +33,42 @@ module Intel;
redef Log::default_rotation_interval = 0sec; redef Log::default_rotation_interval = 0sec;
# Disable the initial send of min_data_store to the workers. Its arrival at the
# workers introduces nondeterminism that can trip up this test, because even
# though the worker_data counter below reaches 3, less than 3 intel items may be
# in the worker's local store.
redef Intel::send_store_on_node_up = F;
global log_writes = 0;
global worker_data = 0;
global proxy_ready = F;
global sent_data = F;
event Cluster::node_up(name: string, id: string) event Cluster::node_up(name: string, id: string)
{ {
# Insert the data once both workers are connected. if ( Cluster::local_node_type() == Cluster::PROXY && Cluster::worker_count == 2 )
if ( Cluster::local_node_type() == Cluster::MANAGER && Cluster::worker_count == 2 && Cluster::proxy_pool$alive_count == 1 )
{ {
# Make the proxy tell the manager explicitly when both workers
# have checked in. The cluster framework normally generates this
# event with the Broker ID as second argument. We borrow the
# event to signal readiness, using recognizable arguments.
Broker::publish(Cluster::manager_topic, Cluster::node_up, Cluster::node, Cluster::node);
return;
}
if ( Cluster::local_node_type() == Cluster::MANAGER )
{
if ( name == "proxy-1" && id == "proxy-1" )
proxy_ready = T;
# Insert data once both workers and the proxy are connected, and
# the proxy has indicated that it too has both workers connected.
if ( Cluster::worker_count == 2 && Cluster::proxy_pool$alive_count == 1 && proxy_ready )
Intel::insert([$indicator="1.2.3.4", $indicator_type=Intel::ADDR, $meta=[$source="manager"]]); Intel::insert([$indicator="1.2.3.4", $indicator_type=Intel::ADDR, $meta=[$source="manager"]]);
} }
} }
global worker2_data = 0; # Watch for new indicators sent to workers.
global sent_data = F;
# Watch for new indicators send to workers.
event Intel::insert_indicator(item: Intel::Item) event Intel::insert_indicator(item: Intel::Item)
{ {
print fmt("new_indicator: %s inserted by %s", item$indicator, item$meta$source); print fmt("new_indicator: %s inserted by %s", item$indicator, item$meta$source);
@ -56,16 +84,23 @@ event Intel::insert_indicator(item: Intel::Item)
Intel::insert([$indicator="4.3.2.1", $indicator_type=Intel::ADDR, $meta=[$source="worker-2"]]); Intel::insert([$indicator="4.3.2.1", $indicator_type=Intel::ADDR, $meta=[$source="worker-2"]]);
} }
# We're forcing worker-2 to do a lookup when it has three intelligence items # Each worker does a lookup when it has 3 intel items which were
# which were distributed over the cluster (data inserted locally is resent). # distributed over the cluster (data inserted locally is resent).
# Worker 1 observes the host inserted by worker 2, and vice versa.
if ( Cluster::node == "worker-1" )
{
if ( ++worker_data == 3 )
{
print "seeing 4.3.2.1";
Intel::seen([$host=4.3.2.1, $where=Intel::IN_ANYWHERE]);
}
}
if ( Cluster::node == "worker-2" ) if ( Cluster::node == "worker-2" )
{ {
++worker2_data; if ( ++worker_data == 3 )
if ( worker2_data == 3 )
{ {
# Now that everything is inserted, see if we can match on the data inserted print "seeing 123.123.123.123";
# by worker-1.
print "Doing a lookup";
Intel::seen([$host=123.123.123.123, $where=Intel::IN_ANYWHERE]); Intel::seen([$host=123.123.123.123, $where=Intel::IN_ANYWHERE]);
} }
} }
@ -86,6 +121,7 @@ event Intel::new_item(item: Intel::Item)
event Intel::log_intel(rec: Intel::Info) event Intel::log_intel(rec: Intel::Info)
{ {
if ( ++log_writes == 2 )
terminate(); terminate();
} }