Reworked cluster intelligence data distribution mechanism and fixed tests.

- Intel data distribution on clusters is now pushed in whole
  by the manager when a worker connects.  Additions after that point
  are managed by the normal single-item distribution mechanism already
  built into the intelligence framework.

- The manager maintains the complete "minimal" data store that the
  workers use to do their matching so that full "minimal" data
  distribution is very easy.

- Tests are cleaned up and work.
This commit is contained in:
Seth Hall 2012-10-03 16:25:02 -04:00
parent 38468f9daa
commit bf9651b323
16 changed files with 84 additions and 177 deletions

View file

@ -6,29 +6,23 @@
module Intel; module Intel;
redef record Item += {
## This field is used internally for cluster transparency to avoid
## re-dispatching intelligence items over and over from workers.
first_dispatch: bool &default=T;
};
# If this process is not a manager process, we don't want the full metadata # If this process is not a manager process, we don't want the full metadata
@if ( Cluster::local_node_type() != Cluster::MANAGER ) @if ( Cluster::local_node_type() != Cluster::MANAGER )
redef have_full_data = F; redef have_full_data = F;
@endif @endif
global cluster_new_item: event(item: Item); global cluster_new_item: event(item: Item);
global cluster_updated_item: event(item: Item);
redef record Item += {
## This field is solely used internally for cluster transparency with
## the intelligence framework to avoid storms of intelligence data
## swirling forever. It allows data to propagate only a single time.
first_dispatch: bool &default=T;
};
# Primary intelligence distribution comes from manager. # Primary intelligence distribution comes from manager.
redef Cluster::manager2worker_events += /^Intel::cluster_.*$/; redef Cluster::manager2worker_events += /^Intel::(cluster_new_item)$/;
# If a worker finds intelligence and adds it, it should share it back to the manager. # If a worker finds intelligence and adds it, it should share it back to the manager.
redef Cluster::worker2manager_events += /^Intel::(cluster_.*|match_no_items)$/; redef Cluster::worker2manager_events += /^Intel::(cluster_new_item|match_no_items)$/;
@if ( Cluster::local_node_type() != Cluster::MANAGER )
redef Intel::data_store &synchronized;
@endif
@if ( Cluster::local_node_type() == Cluster::MANAGER ) @if ( Cluster::local_node_type() == Cluster::MANAGER )
event Intel::match_no_items(s: Seen) &priority=5 event Intel::match_no_items(s: Seen) &priority=5
@ -36,19 +30,13 @@ event Intel::match_no_items(s: Seen) &priority=5
event Intel::match(s, Intel::get_items(s)); event Intel::match(s, Intel::get_items(s));
} }
global initial_sync = F;
event remote_connection_handshake_done(p: event_peer) event remote_connection_handshake_done(p: event_peer)
{ {
# Insert the data once something is connected. # When a worker connects, send it the complete minimal data store.
# This should only push the data to a single host where the # It will be kept up to date after this by the cluster_new_item event.
# normal Bro synchronization should take over. if ( Cluster::nodes[p$descr]$node_type == Cluster::WORKER )
if ( ! initial_sync )
{ {
initial_sync = T; send_id(p, "min_data_store");
for ( net in data_store$net_data )
event Intel::cluster_new_item([$net=net, $meta=[$source=""]]);
for ( [str, str_type] in data_store$string_data )
event Intel::cluster_new_item([$str=str, $str_type=str_type, $meta=[$source=""]]);
} }
} }
@endif @endif
@ -60,34 +48,14 @@ event Intel::cluster_new_item(item: Intel::Item) &priority=5
Intel::insert(item); Intel::insert(item);
} }
event Intel::cluster_updated_item(item: Intel::Item) &priority=5
{
# Ignore locally generated events to avoid event storms.
if ( is_remote_event() )
Intel::insert(item);
}
event Intel::new_item(item: Intel::Item) &priority=5 event Intel::new_item(item: Intel::Item) &priority=5
{ {
# The cluster manager always rebroadcasts intelligence. # The cluster manager always rebroadcasts intelligence.
# Workers redistribute it if it was locally generated on # Workers redistribute it if it was locally generated.
# the worker.
if ( Cluster::local_node_type() == Cluster::MANAGER || if ( Cluster::local_node_type() == Cluster::MANAGER ||
item$first_dispatch ) item$first_dispatch )
{ {
item$first_dispatch = F; item$first_dispatch=F;
event Intel::cluster_new_item(item); event Intel::cluster_new_item(item);
} }
} }
event Intel::updated_item(item: Intel::Item) &priority=5
{
# If this is the first time this item has been dispatched or this
# is a manager, send it over the cluster.
if ( Cluster::local_node_type() == Cluster::MANAGER ||
item$first_dispatch )
{
item$first_dispatch = F;
event Intel::cluster_updated_item(item);
}
}

View file

@ -26,3 +26,4 @@ event bro_init() &priority=5
$ev=Intel::read_entry]); $ev=Intel::read_entry]);
} }
} }

View file

@ -129,6 +129,16 @@ type DataStore: record {
}; };
global data_store: DataStore &redef; global data_store: DataStore &redef;
# The inmemory data structure for holding the barest matchable intelligence.
# This is primarily for workers to do the initial quick matches and store
# a minimal amount of data for the full match to happen on the manager.
type MinDataStore: record {
net_data: set[subnet];
string_data: set[string, StrType];
};
global min_data_store: MinDataStore &redef;
event bro_init() &priority=5 event bro_init() &priority=5
{ {
Log::create_stream(LOG, [$columns=Info, $ev=log_intel]); Log::create_stream(LOG, [$columns=Info, $ev=log_intel]);
@ -137,12 +147,14 @@ event bro_init() &priority=5
function find(s: Seen): bool function find(s: Seen): bool
{ {
if ( s?$host && if ( s?$host &&
s$host in data_store$net_data ) ((have_full_data && s$host in data_store$net_data) ||
(s$host in min_data_store$net_data)))
{ {
return T; return T;
} }
else if ( s?$str && s?$str_type && else if ( s?$str && s?$str_type &&
[s$str, s$str_type] in data_store$string_data ) ((have_full_data && [s$str, s$str_type] in data_store$string_data) ||
([s$str, s$str_type] in min_data_store$string_data)))
{ {
return T; return T;
} }
@ -232,7 +244,7 @@ function has_meta(check: MetaData, metas: set[MetaData]): bool
return F; return F;
} }
event Intel::match(s: Seen, items: set[Item]) event Intel::match(s: Seen, items: set[Item]) &priority=5
{ {
local empty_set: set[string] = set(); local empty_set: set[string] = set();
local info: Info = [$ts=network_time(), $seen=s, $sources=empty_set]; local info: Info = [$ts=network_time(), $seen=s, $sources=empty_set];
@ -264,24 +276,39 @@ function insert(item: Item)
if ( item?$host ) if ( item?$host )
{ {
local host = mask_addr(item$host, is_v4_addr(item$host) ? 32 : 128); local host = mask_addr(item$host, is_v4_addr(item$host) ? 32 : 128);
if ( host !in data_store$net_data ) if ( have_full_data )
data_store$net_data[host] = set(); {
if ( host !in data_store$net_data )
metas = data_store$net_data[host]; data_store$net_data[host] = set();
metas = data_store$net_data[host];
}
add min_data_store$net_data[host];
} }
else if ( item?$net ) else if ( item?$net )
{ {
if ( item$net !in data_store$net_data ) if ( have_full_data )
data_store$net_data[item$net] = set(); {
if ( item$net !in data_store$net_data )
data_store$net_data[item$net] = set();
metas = data_store$net_data[item$net]; metas = data_store$net_data[item$net];
}
add min_data_store$net_data[item$net];
} }
else if ( item?$str ) else if ( item?$str )
{ {
if ( [item$str, item$str_type] !in data_store$string_data ) if ( have_full_data )
data_store$string_data[item$str, item$str_type] = set(); {
if ( [item$str, item$str_type] !in data_store$string_data )
data_store$string_data[item$str, item$str_type] = set();
metas = data_store$string_data[item$str, item$str_type]; metas = data_store$string_data[item$str, item$str_type];
}
add min_data_store$string_data[item$str, item$str_type];
} }
local updated = F; local updated = F;

View file

@ -1,2 +0,0 @@
cluster_new_item: 123.123.123.123 from source worker-1 (from peer: worker-1)
cluster_new_item: 4.3.2.1 from source worker-2 (from peer: worker-2)

View file

@ -1,10 +0,0 @@
#separator \x09
#set_separator ,
#empty_field (empty)
#unset_field -
#path intel
#open 2012-09-28-18-50-43
#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p seen.host seen.str seen.str_type seen.where
#types time string addr port addr port addr string enum enum
1348858243.346443 - - - - - 123.123.123.123 - - Intel::IN_ANYWHERE
#close 2012-09-28-18-50-53

View file

@ -1,3 +0,0 @@
cluster_new_item: 1.2.3.4 from source manager (from peer: manager-1)
cluster_new_item: 123.123.123.123 from source worker-1 (from peer: manager-1)
cluster_new_item: 4.3.2.1 from source worker-2 (from peer: manager-1)

View file

@ -1,4 +0,0 @@
cluster_new_item: 1.2.3.4 from source manager (from peer: manager-1)
cluster_new_item: 123.123.123.123 from source worker-1 (from peer: manager-1)
cluster_new_item: 4.3.2.1 from source worker-2 (from peer: manager-1)
Doing a lookup

View file

@ -1,3 +0,0 @@
It matched!
bad.com
Intel::DNS_ZONE

View file

@ -1,3 +0,0 @@
Number of matching intel items: 2 (should be 2)
Number of matching intel items: 2 (should still be 2)
Number of matching intel items: 3 (should be 3)

View file

@ -1,3 +0,0 @@
VALID
VALID
VALID

View file

@ -39,7 +39,7 @@ event Intel::cluster_new_item(item: Intel::Item)
if ( ! is_remote_event() ) if ( ! is_remote_event() )
return; return;
print fmt("cluster_new_item: %s from source %s (from peer: %s)", item$host, item$meta$source, get_event_peer()$descr); print fmt("cluster_new_item: %s inserted by %s (from peer: %s)", item$host, item$meta$source, get_event_peer()$descr);
if ( ! sent_data ) if ( ! sent_data )
{ {

View file

@ -1,36 +1,40 @@
# @TEST-EXEC: bro %INPUT >out # @TEST-SERIALIZE: comm
# @TEST-EXEC: btest-diff out
# @TEST-EXEC: btest-bg-run broproc bro %INPUT
# @TEST-EXEC: btest-bg-wait -k 5
# @TEST-EXEC: btest-diff broproc/intel.log
@TEST-START-FILE intel.dat @TEST-START-FILE intel.dat
#fields ip net str subtype meta.source meta.class meta.desc meta.url meta.tags #fields host net str str_type meta.source meta.desc meta.url
1.2.3.4 - - - source1 Intel::MALICIOUS this host is just plain baaad http://some-data-distributor.com/1234 foo,bar 1.2.3.4 - - - source1 this host is just plain baaad http://some-data-distributor.com/1234
1.2.3.4 - - - source1 Intel::MALICIOUS this host is just plain baaad http://some-data-distributor.com/1234 foo,bar 1.2.3.4 - - - source1 this host is just plain baaad http://some-data-distributor.com/1234
- - e@mail.com Intel::EMAIL source1 Intel::MALICIOUS Phishing email source http://some-data-distributor.com/100000 - - - e@mail.com Intel::EMAIL source1 Phishing email source http://some-data-distributor.com/100000
@TEST-END-FILE @TEST-END-FILE
@load frameworks/communication/listen @load frameworks/communication/listen
redef Intel::read_files += { "intel.dat" }; redef Intel::read_files += { "../intel.dat" };
redef enum Intel::Where += { SOMEWHERE };
event do_it(allowed_loops: count) event do_it()
{ {
if ( Intel::matcher([$str="e@mail.com", $subtype=Intel::EMAIL, $class=Intel::MALICIOUS]) && Intel::seen([$str="e@mail.com",
Intel::matcher([$ip=1.2.3.4, $class=Intel::MALICIOUS]) ) $str_type=Intel::EMAIL,
{ $where=SOMEWHERE]);
# Once the match happens a single time we print and shutdown.
print "Matched it!"; Intel::seen([$host=1.2.3.4,
terminate_communication(); $where=SOMEWHERE]);
return;
}
if ( allowed_loops > 0 )
schedule 100msecs { do_it(allowed_loops-1) };
else
terminate_communication();
} }
global log_lines = 0;
event bro_init() event Intel::log_intel(rec: Intel::Info)
{ {
event do_it(20); ++log_lines;
if ( log_lines == 2 )
terminate();
}
event bro_init() &priority=-10
{
schedule 1sec { do_it() };
} }

View file

@ -1,23 +0,0 @@
# @TEST-EXEC: bro %INPUT >out
# @TEST-EXEC: btest-diff out
event bro_init()
{
Intel::insert([$ip=1.2.3.4, $meta=[$source="source1-feed1", $class=Intel::MALICIOUS, $tags=set("foo")]]);
Intel::insert([$ip=1.2.3.4, $meta=[$source="source2-special-sauce", $class=Intel::MALICIOUS, $tags=set("foo","bar")]]);
# Lookup should return the items matching the query.
local items = Intel::lookup([$ip=1.2.3.4]);
print fmt("Number of matching intel items: %d (should be 2)", |items|);
# This can be considered an update of a previous value since the
# data, source, and class are the matching points for determining sameness.
Intel::insert([$ip=1.2.3.4, $meta=[$source="source2-special-sauce", $class=Intel::MALICIOUS, $tags=set("foobar", "testing")]]);
items = Intel::lookup([$ip=1.2.3.4]);
print fmt("Number of matching intel items: %d (should still be 2)", |items|);
# This is a new value.
Intel::insert([$ip=1.2.3.4, $meta=[$source="source3", $class=Intel::MALICIOUS]]);
items = Intel::lookup([$ip=1.2.3.4]);
print fmt("Number of matching intel items: %d (should be 3)", |items|);
}

View file

@ -1,38 +0,0 @@
#
# @TEST-EXEC: bro %INPUT >out
# @TEST-EXEC: btest-diff out
event bro_init()
{
Intel::insert([$ip=1.2.3.4, $meta=[$source="zeus-tracker", $class=Intel::MALICIOUS, $tags=set("example-tag1", "example-tag2")]]);
Intel::insert([$str="http://www.google.com/", $subtype=Intel::URL, $meta=[$source="source2", $class=Intel::MALICIOUS, $tags=set("infrastructure", "google")]]);
}
event bro_done()
{
local orig_h = 1.2.3.4;
if ( Intel::matcher([$ip=orig_h, $and_tags=set("example-tag1", "example-tag2")]) )
print "VALID";
if ( Intel::matcher([$ip=orig_h, $and_tags=set("don't match")]) )
print "INVALID";
if ( Intel::matcher([$ip=orig_h, $pred=function(meta: Intel::Item): bool { return T; } ]) )
print "VALID";
if ( Intel::matcher([$ip=4.3.2.1, $pred=function(meta: Intel::Item): bool { return T; } ]) )
print "INVALID";
if ( Intel::matcher([$ip=orig_h, $pred=function(meta: Intel::Item): bool { return F; } ]) )
print "INVALID";
if ( Intel::matcher([$str="http://www.google.com/", $subtype=Intel::URL, $and_tags=set("google")]) )
print "VALID";
if ( Intel::matcher([$str="http://www.google.com/", $subtype=Intel::URL, $and_tags=set("woah")]) )
print "INVALID";
if ( Intel::matcher([$str="http://www.example.com", $subtype=Intel::URL]) )
print "INVALID";
}