Reworked cluster intelligence data distribution mechanism and fixed tests.

- Intel data distribution on clusters is now pushed in whole
  by the manager when a worker connects.  Additions after that point
  are managed by the normal single-item distribution mechanism already
  built into the intelligence framework.

- The manager maintains the complete "minimal" data store that the
  workers use to do their matching so that full "minimal" data
  distribution is very easy.

- Tests are cleaned up and work.
This commit is contained in:
Seth Hall 2012-10-03 16:25:02 -04:00
parent 38468f9daa
commit bf9651b323
16 changed files with 84 additions and 177 deletions

View file

@ -6,29 +6,23 @@
module Intel;
redef record Item += {
## This field is used internally for cluster transparency to avoid
## re-dispatching intelligence items over and over from workers.
first_dispatch: bool &default=T;
};
# If this process is not a manager process, we don't want the full metadata
@if ( Cluster::local_node_type() != Cluster::MANAGER )
redef have_full_data = F;
@endif
global cluster_new_item: event(item: Item);
global cluster_updated_item: event(item: Item);
redef record Item += {
## This field is solely used internally for cluster transparency with
## the intelligence framework to avoid storms of intelligence data
## swirling forever. It allows data to propagate only a single time.
first_dispatch: bool &default=T;
};
# Primary intelligence distribution comes from manager.
redef Cluster::manager2worker_events += /^Intel::cluster_.*$/;
redef Cluster::manager2worker_events += /^Intel::(cluster_new_item)$/;
# If a worker finds intelligence and adds it, it should share it back to the manager.
redef Cluster::worker2manager_events += /^Intel::(cluster_.*|match_no_items)$/;
@if ( Cluster::local_node_type() != Cluster::MANAGER )
redef Intel::data_store &synchronized;
@endif
redef Cluster::worker2manager_events += /^Intel::(cluster_new_item|match_no_items)$/;
@if ( Cluster::local_node_type() == Cluster::MANAGER )
event Intel::match_no_items(s: Seen) &priority=5
@ -36,19 +30,13 @@ event Intel::match_no_items(s: Seen) &priority=5
event Intel::match(s, Intel::get_items(s));
}
global initial_sync = F;
event remote_connection_handshake_done(p: event_peer)
{
# Insert the data once something is connected.
# This should only push the data to a single host where the
# normal Bro synchronization should take over.
if ( ! initial_sync )
# When a worker connects, send it the complete minimal data store.
# It will be kept up to date after this by the cluster_new_item event.
if ( Cluster::nodes[p$descr]$node_type == Cluster::WORKER )
{
initial_sync = T;
for ( net in data_store$net_data )
event Intel::cluster_new_item([$net=net, $meta=[$source=""]]);
for ( [str, str_type] in data_store$string_data )
event Intel::cluster_new_item([$str=str, $str_type=str_type, $meta=[$source=""]]);
send_id(p, "min_data_store");
}
}
@endif
@ -60,34 +48,14 @@ event Intel::cluster_new_item(item: Intel::Item) &priority=5
Intel::insert(item);
}
event Intel::cluster_updated_item(item: Intel::Item) &priority=5
{
# Ignore locally generated events to avoid event storms.
if ( is_remote_event() )
Intel::insert(item);
}
event Intel::new_item(item: Intel::Item) &priority=5
{
# The cluster manager always rebroadcasts intelligence.
# Workers redistribute it if it was locally generated on
# the worker.
# Workers redistribute it if it was locally generated.
if ( Cluster::local_node_type() == Cluster::MANAGER ||
item$first_dispatch )
{
item$first_dispatch = F;
item$first_dispatch=F;
event Intel::cluster_new_item(item);
}
}
event Intel::updated_item(item: Intel::Item) &priority=5
{
# If this is the first time this item has been dispatched or this
# is a manager, send it over the cluster.
if ( Cluster::local_node_type() == Cluster::MANAGER ||
item$first_dispatch )
{
item$first_dispatch = F;
event Intel::cluster_updated_item(item);
}
}

View file

@ -26,3 +26,4 @@ event bro_init() &priority=5
$ev=Intel::read_entry]);
}
}

View file

@ -129,6 +129,16 @@ type DataStore: record {
};
global data_store: DataStore &redef;
# The inmemory data structure for holding the barest matchable intelligence.
# This is primarily for workers to do the initial quick matches and store
# a minimal amount of data for the full match to happen on the manager.
type MinDataStore: record {
net_data: set[subnet];
string_data: set[string, StrType];
};
global min_data_store: MinDataStore &redef;
event bro_init() &priority=5
{
Log::create_stream(LOG, [$columns=Info, $ev=log_intel]);
@ -137,12 +147,14 @@ event bro_init() &priority=5
function find(s: Seen): bool
{
if ( s?$host &&
s$host in data_store$net_data )
((have_full_data && s$host in data_store$net_data) ||
(s$host in min_data_store$net_data)))
{
return T;
}
else if ( s?$str && s?$str_type &&
[s$str, s$str_type] in data_store$string_data )
((have_full_data && [s$str, s$str_type] in data_store$string_data) ||
([s$str, s$str_type] in min_data_store$string_data)))
{
return T;
}
@ -232,7 +244,7 @@ function has_meta(check: MetaData, metas: set[MetaData]): bool
return F;
}
event Intel::match(s: Seen, items: set[Item])
event Intel::match(s: Seen, items: set[Item]) &priority=5
{
local empty_set: set[string] = set();
local info: Info = [$ts=network_time(), $seen=s, $sources=empty_set];
@ -264,24 +276,39 @@ function insert(item: Item)
if ( item?$host )
{
local host = mask_addr(item$host, is_v4_addr(item$host) ? 32 : 128);
if ( host !in data_store$net_data )
data_store$net_data[host] = set();
if ( have_full_data )
{
if ( host !in data_store$net_data )
data_store$net_data[host] = set();
metas = data_store$net_data[host];
metas = data_store$net_data[host];
}
add min_data_store$net_data[host];
}
else if ( item?$net )
{
if ( item$net !in data_store$net_data )
data_store$net_data[item$net] = set();
if ( have_full_data )
{
if ( item$net !in data_store$net_data )
data_store$net_data[item$net] = set();
metas = data_store$net_data[item$net];
metas = data_store$net_data[item$net];
}
add min_data_store$net_data[item$net];
}
else if ( item?$str )
{
if ( [item$str, item$str_type] !in data_store$string_data )
data_store$string_data[item$str, item$str_type] = set();
if ( have_full_data )
{
if ( [item$str, item$str_type] !in data_store$string_data )
data_store$string_data[item$str, item$str_type] = set();
metas = data_store$string_data[item$str, item$str_type];
metas = data_store$string_data[item$str, item$str_type];
}
add min_data_store$string_data[item$str, item$str_type];
}
local updated = F;

View file

@ -1,2 +0,0 @@
cluster_new_item: 123.123.123.123 from source worker-1 (from peer: worker-1)
cluster_new_item: 4.3.2.1 from source worker-2 (from peer: worker-2)

View file

@ -1,10 +0,0 @@
#separator \x09
#set_separator ,
#empty_field (empty)
#unset_field -
#path intel
#open 2012-09-28-18-50-43
#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p seen.host seen.str seen.str_type seen.where
#types time string addr port addr port addr string enum enum
1348858243.346443 - - - - - 123.123.123.123 - - Intel::IN_ANYWHERE
#close 2012-09-28-18-50-53

View file

@ -1,3 +0,0 @@
cluster_new_item: 1.2.3.4 from source manager (from peer: manager-1)
cluster_new_item: 123.123.123.123 from source worker-1 (from peer: manager-1)
cluster_new_item: 4.3.2.1 from source worker-2 (from peer: manager-1)

View file

@ -1,4 +0,0 @@
cluster_new_item: 1.2.3.4 from source manager (from peer: manager-1)
cluster_new_item: 123.123.123.123 from source worker-1 (from peer: manager-1)
cluster_new_item: 4.3.2.1 from source worker-2 (from peer: manager-1)
Doing a lookup

View file

@ -1,3 +0,0 @@
It matched!
bad.com
Intel::DNS_ZONE

View file

@ -1,3 +0,0 @@
Number of matching intel items: 2 (should be 2)
Number of matching intel items: 2 (should still be 2)
Number of matching intel items: 3 (should be 3)

View file

@ -1,3 +0,0 @@
VALID
VALID
VALID

View file

@ -39,7 +39,7 @@ event Intel::cluster_new_item(item: Intel::Item)
if ( ! is_remote_event() )
return;
print fmt("cluster_new_item: %s from source %s (from peer: %s)", item$host, item$meta$source, get_event_peer()$descr);
print fmt("cluster_new_item: %s inserted by %s (from peer: %s)", item$host, item$meta$source, get_event_peer()$descr);
if ( ! sent_data )
{

View file

@ -1,36 +1,40 @@
# @TEST-EXEC: bro %INPUT >out
# @TEST-EXEC: btest-diff out
# @TEST-SERIALIZE: comm
# @TEST-EXEC: btest-bg-run broproc bro %INPUT
# @TEST-EXEC: btest-bg-wait -k 5
# @TEST-EXEC: btest-diff broproc/intel.log
@TEST-START-FILE intel.dat
#fields ip net str subtype meta.source meta.class meta.desc meta.url meta.tags
1.2.3.4 - - - source1 Intel::MALICIOUS this host is just plain baaad http://some-data-distributor.com/1234 foo,bar
1.2.3.4 - - - source1 Intel::MALICIOUS this host is just plain baaad http://some-data-distributor.com/1234 foo,bar
- - e@mail.com Intel::EMAIL source1 Intel::MALICIOUS Phishing email source http://some-data-distributor.com/100000 -
#fields host net str str_type meta.source meta.desc meta.url
1.2.3.4 - - - source1 this host is just plain baaad http://some-data-distributor.com/1234
1.2.3.4 - - - source1 this host is just plain baaad http://some-data-distributor.com/1234
- - e@mail.com Intel::EMAIL source1 Phishing email source http://some-data-distributor.com/100000
@TEST-END-FILE
@load frameworks/communication/listen
redef Intel::read_files += { "intel.dat" };
redef Intel::read_files += { "../intel.dat" };
redef enum Intel::Where += { SOMEWHERE };
event do_it(allowed_loops: count)
event do_it()
{
if ( Intel::matcher([$str="e@mail.com", $subtype=Intel::EMAIL, $class=Intel::MALICIOUS]) &&
Intel::matcher([$ip=1.2.3.4, $class=Intel::MALICIOUS]) )
{
# Once the match happens a single time we print and shutdown.
print "Matched it!";
terminate_communication();
return;
}
Intel::seen([$str="e@mail.com",
$str_type=Intel::EMAIL,
$where=SOMEWHERE]);
if ( allowed_loops > 0 )
schedule 100msecs { do_it(allowed_loops-1) };
else
terminate_communication();
Intel::seen([$host=1.2.3.4,
$where=SOMEWHERE]);
}
event bro_init()
global log_lines = 0;
event Intel::log_intel(rec: Intel::Info)
{
event do_it(20);
++log_lines;
if ( log_lines == 2 )
terminate();
}
event bro_init() &priority=-10
{
schedule 1sec { do_it() };
}

View file

@ -1,23 +0,0 @@
# @TEST-EXEC: bro %INPUT >out
# @TEST-EXEC: btest-diff out
event bro_init()
{
Intel::insert([$ip=1.2.3.4, $meta=[$source="source1-feed1", $class=Intel::MALICIOUS, $tags=set("foo")]]);
Intel::insert([$ip=1.2.3.4, $meta=[$source="source2-special-sauce", $class=Intel::MALICIOUS, $tags=set("foo","bar")]]);
# Lookup should return the items matching the query.
local items = Intel::lookup([$ip=1.2.3.4]);
print fmt("Number of matching intel items: %d (should be 2)", |items|);
# This can be considered an update of a previous value since the
# data, source, and class are the matching points for determining sameness.
Intel::insert([$ip=1.2.3.4, $meta=[$source="source2-special-sauce", $class=Intel::MALICIOUS, $tags=set("foobar", "testing")]]);
items = Intel::lookup([$ip=1.2.3.4]);
print fmt("Number of matching intel items: %d (should still be 2)", |items|);
# This is a new value.
Intel::insert([$ip=1.2.3.4, $meta=[$source="source3", $class=Intel::MALICIOUS]]);
items = Intel::lookup([$ip=1.2.3.4]);
print fmt("Number of matching intel items: %d (should be 3)", |items|);
}

View file

@ -1,38 +0,0 @@
#
# @TEST-EXEC: bro %INPUT >out
# @TEST-EXEC: btest-diff out
event bro_init()
{
Intel::insert([$ip=1.2.3.4, $meta=[$source="zeus-tracker", $class=Intel::MALICIOUS, $tags=set("example-tag1", "example-tag2")]]);
Intel::insert([$str="http://www.google.com/", $subtype=Intel::URL, $meta=[$source="source2", $class=Intel::MALICIOUS, $tags=set("infrastructure", "google")]]);
}
event bro_done()
{
local orig_h = 1.2.3.4;
if ( Intel::matcher([$ip=orig_h, $and_tags=set("example-tag1", "example-tag2")]) )
print "VALID";
if ( Intel::matcher([$ip=orig_h, $and_tags=set("don't match")]) )
print "INVALID";
if ( Intel::matcher([$ip=orig_h, $pred=function(meta: Intel::Item): bool { return T; } ]) )
print "VALID";
if ( Intel::matcher([$ip=4.3.2.1, $pred=function(meta: Intel::Item): bool { return T; } ]) )
print "INVALID";
if ( Intel::matcher([$ip=orig_h, $pred=function(meta: Intel::Item): bool { return F; } ]) )
print "INVALID";
if ( Intel::matcher([$str="http://www.google.com/", $subtype=Intel::URL, $and_tags=set("google")]) )
print "VALID";
if ( Intel::matcher([$str="http://www.google.com/", $subtype=Intel::URL, $and_tags=set("woah")]) )
print "INVALID";
if ( Intel::matcher([$str="http://www.example.com", $subtype=Intel::URL]) )
print "INVALID";
}