zeek/testing/btest/scripts/base/frameworks/intel/cluster-transparency-with-proxy.zeek
Christian Kreibich ed5d60f758 Fix races in scripts.base.frameworks.intel.cluster-transparency-with-proxy test
This test was unstable for two reasons:

- Nothing verified whether the two workers had checked in with the proxy,
meaning that messages between the workers and proxies could get lost. This adds
an extra node_up event that the proxy generates synthetically, with values
recognizable to the manager, once the proxy sees both workers connected. This is
a test-level workaround for what should really be a cluster-is-ready event in
the cluster framework proper.

- More subtle: the Intel framework makes the manager send its current
min_data_store to newly connected workers, which in the case of this tests
introduces a race: since the data store, arriving at the worker, replaces the
existing value, it could actually remove already established items if timing was
right. This would lead to the count in the test reaching 3, assuming that 3
intel items are available, when in reality it was less, causing the
Intel::seen() call to do nothing. We now disable the sending of the data store
upon connect, via the global added in the previous commit.

This also expands the test slightly so that both workers call Intel::seen() for
the items inserted by the other worker. This is added validation for the second
point above, because in the presence of that race one occasionally sees one log
entry make it, and the other fail.
2022-06-01 22:23:07 -07:00

132 lines
4.7 KiB
Text

# This test verifies intel data propagation via a cluster with a proxy. The
# manager and both workers insert intel items, and both workers do lookups that
# we expect to hit.
# @TEST-PORT: BROKER_PORT1
# @TEST-PORT: BROKER_PORT2
# @TEST-PORT: BROKER_PORT3
# @TEST-PORT: BROKER_PORT4
#
# @TEST-EXEC: btest-bg-run manager-1 ZEEKPATH=$ZEEKPATH:.. CLUSTER_NODE=manager-1 zeek -b %INPUT
# @TEST-EXEC: btest-bg-run proxy-1 ZEEKPATH=$ZEEKPATH:.. CLUSTER_NODE=proxy-1 zeek -b %INPUT
# @TEST-EXEC: btest-bg-run worker-1 ZEEKPATH=$ZEEKPATH:.. CLUSTER_NODE=worker-1 zeek -b %INPUT
# @TEST-EXEC: btest-bg-run worker-2 ZEEKPATH=$ZEEKPATH:.. CLUSTER_NODE=worker-2 zeek -b %INPUT
# @TEST-EXEC: btest-bg-wait 30
# @TEST-EXEC: TEST_DIFF_CANONIFIER=$SCRIPTS/diff-sort btest-diff manager-1/.stdout
# @TEST-EXEC: TEST_DIFF_CANONIFIER=$SCRIPTS/diff-sort btest-diff worker-1/.stdout
# @TEST-EXEC: TEST_DIFF_CANONIFIER=$SCRIPTS/diff-sort btest-diff worker-2/.stdout
# @TEST-EXEC: TEST_DIFF_CANONIFIER=$SCRIPTS/diff-remove-timestamps-and-sort btest-diff manager-1/intel.log
@TEST-START-FILE cluster-layout.zeek
redef Cluster::nodes = {
["manager-1"] = [$node_type=Cluster::MANAGER, $ip=127.0.0.1, $p=to_port(getenv("BROKER_PORT1"))],
["worker-1"] = [$node_type=Cluster::WORKER, $ip=127.0.0.1, $p=to_port(getenv("BROKER_PORT2")), $manager="manager-1"],
["worker-2"] = [$node_type=Cluster::WORKER, $ip=127.0.0.1, $p=to_port(getenv("BROKER_PORT3")), $manager="manager-1"],
["proxy-1"] = [$node_type=Cluster::PROXY, $ip=127.0.0.1, $p=to_port(getenv("BROKER_PORT4")), $manager="manager-1"],
};
@TEST-END-FILE
@load base/frameworks/cluster
@load base/frameworks/intel
module Intel;
redef Log::default_rotation_interval = 0sec;
# Disable the initial send of min_data_store to the workers. Its arrival at the
# workers introduces nondeterminism that can trip up this test, because even
# though the worker_data counter below reaches 3, less than 3 intel items may be
# in the worker's local store.
redef Intel::send_store_on_node_up = F;
global log_writes = 0;
global worker_data = 0;
global proxy_ready = F;
global sent_data = F;
event Cluster::node_up(name: string, id: string)
{
if ( Cluster::local_node_type() == Cluster::PROXY && Cluster::worker_count == 2 )
{
# Make the proxy tell the manager explicitly when both workers
# have checked in. The cluster framework normally generates this
# event with the Broker ID as second argument. We borrow the
# event to signal readiness, using recognizable arguments.
Broker::publish(Cluster::manager_topic, Cluster::node_up, Cluster::node, Cluster::node);
return;
}
if ( Cluster::local_node_type() == Cluster::MANAGER )
{
if ( name == "proxy-1" && id == "proxy-1" )
proxy_ready = T;
# Insert data once both workers and the proxy are connected, and
# the proxy has indicated that it too has both workers connected.
if ( Cluster::worker_count == 2 && Cluster::proxy_pool$alive_count == 1 && proxy_ready )
Intel::insert([$indicator="1.2.3.4", $indicator_type=Intel::ADDR, $meta=[$source="manager"]]);
}
}
# Watch for new indicators sent to workers.
event Intel::insert_indicator(item: Intel::Item)
{
print fmt("new_indicator: %s inserted by %s", item$indicator, item$meta$source);
if ( ! sent_data )
{
# We wait to insert data here because we can now be sure the
# full cluster is constructed.
sent_data = T;
if ( Cluster::node == "worker-1" )
Intel::insert([$indicator="123.123.123.123", $indicator_type=Intel::ADDR, $meta=[$source="worker-1"]]);
if ( Cluster::node == "worker-2" )
Intel::insert([$indicator="4.3.2.1", $indicator_type=Intel::ADDR, $meta=[$source="worker-2"]]);
}
# Each worker does a lookup when it has 3 intel items which were
# distributed over the cluster (data inserted locally is resent).
# Worker 1 observes the host inserted by worker 2, and vice versa.
if ( Cluster::node == "worker-1" )
{
if ( ++worker_data == 3 )
{
print "seeing 4.3.2.1";
Intel::seen([$host=4.3.2.1, $where=Intel::IN_ANYWHERE]);
}
}
if ( Cluster::node == "worker-2" )
{
if ( ++worker_data == 3 )
{
print "seeing 123.123.123.123";
Intel::seen([$host=123.123.123.123, $where=Intel::IN_ANYWHERE]);
}
}
}
# Watch for remote inserts sent to the manager.
event Intel::insert_item(item: Intel::Item)
{
print fmt("insert_item: %s inserted by %s", item$indicator, item$meta$source);
}
# Watch for new items.
event Intel::new_item(item: Intel::Item)
{
print fmt("new_item triggered for %s by %s on %s", item$indicator,
item$meta$source, Cluster::node);
}
event Intel::log_intel(rec: Intel::Info)
{
if ( ++log_writes == 2 )
terminate();
}
event Cluster::node_down(name: string, id: string)
{
# Cascading termination
terminate();
}