mirror of
https://github.com/zeek/zeek.git
synced 2025-10-02 14:48:21 +00:00
Reworked cluster intelligence data distribution mechanism and fixed tests.
- Intel data distribution on clusters is now pushed in whole by the manager when a worker connects. Additions after that point are managed by the normal single-item distribution mechanism already built into the intelligence framework. - The manager maintains the complete "minimal" data store that the workers use to do their matching so that full "minimal" data distribution is very easy. - Tests are cleaned up and work.
This commit is contained in:
parent
38468f9daa
commit
bf9651b323
16 changed files with 84 additions and 177 deletions
|
@ -6,29 +6,23 @@
|
||||||
|
|
||||||
module Intel;
|
module Intel;
|
||||||
|
|
||||||
|
redef record Item += {
|
||||||
|
## This field is used internally for cluster transparency to avoid
|
||||||
|
## re-dispatching intelligence items over and over from workers.
|
||||||
|
first_dispatch: bool &default=T;
|
||||||
|
};
|
||||||
|
|
||||||
# If this process is not a manager process, we don't want the full metadata
|
# If this process is not a manager process, we don't want the full metadata
|
||||||
@if ( Cluster::local_node_type() != Cluster::MANAGER )
|
@if ( Cluster::local_node_type() != Cluster::MANAGER )
|
||||||
redef have_full_data = F;
|
redef have_full_data = F;
|
||||||
@endif
|
@endif
|
||||||
|
|
||||||
global cluster_new_item: event(item: Item);
|
global cluster_new_item: event(item: Item);
|
||||||
global cluster_updated_item: event(item: Item);
|
|
||||||
|
|
||||||
redef record Item += {
|
|
||||||
## This field is solely used internally for cluster transparency with
|
|
||||||
## the intelligence framework to avoid storms of intelligence data
|
|
||||||
## swirling forever. It allows data to propagate only a single time.
|
|
||||||
first_dispatch: bool &default=T;
|
|
||||||
};
|
|
||||||
|
|
||||||
# Primary intelligence distribution comes from manager.
|
# Primary intelligence distribution comes from manager.
|
||||||
redef Cluster::manager2worker_events += /^Intel::cluster_.*$/;
|
redef Cluster::manager2worker_events += /^Intel::(cluster_new_item)$/;
|
||||||
# If a worker finds intelligence and adds it, it should share it back to the manager.
|
# If a worker finds intelligence and adds it, it should share it back to the manager.
|
||||||
redef Cluster::worker2manager_events += /^Intel::(cluster_.*|match_no_items)$/;
|
redef Cluster::worker2manager_events += /^Intel::(cluster_new_item|match_no_items)$/;
|
||||||
|
|
||||||
@if ( Cluster::local_node_type() != Cluster::MANAGER )
|
|
||||||
redef Intel::data_store &synchronized;
|
|
||||||
@endif
|
|
||||||
|
|
||||||
@if ( Cluster::local_node_type() == Cluster::MANAGER )
|
@if ( Cluster::local_node_type() == Cluster::MANAGER )
|
||||||
event Intel::match_no_items(s: Seen) &priority=5
|
event Intel::match_no_items(s: Seen) &priority=5
|
||||||
|
@ -36,19 +30,13 @@ event Intel::match_no_items(s: Seen) &priority=5
|
||||||
event Intel::match(s, Intel::get_items(s));
|
event Intel::match(s, Intel::get_items(s));
|
||||||
}
|
}
|
||||||
|
|
||||||
global initial_sync = F;
|
|
||||||
event remote_connection_handshake_done(p: event_peer)
|
event remote_connection_handshake_done(p: event_peer)
|
||||||
{
|
{
|
||||||
# Insert the data once something is connected.
|
# When a worker connects, send it the complete minimal data store.
|
||||||
# This should only push the data to a single host where the
|
# It will be kept up to date after this by the cluster_new_item event.
|
||||||
# normal Bro synchronization should take over.
|
if ( Cluster::nodes[p$descr]$node_type == Cluster::WORKER )
|
||||||
if ( ! initial_sync )
|
|
||||||
{
|
{
|
||||||
initial_sync = T;
|
send_id(p, "min_data_store");
|
||||||
for ( net in data_store$net_data )
|
|
||||||
event Intel::cluster_new_item([$net=net, $meta=[$source=""]]);
|
|
||||||
for ( [str, str_type] in data_store$string_data )
|
|
||||||
event Intel::cluster_new_item([$str=str, $str_type=str_type, $meta=[$source=""]]);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@endif
|
@endif
|
||||||
|
@ -60,34 +48,14 @@ event Intel::cluster_new_item(item: Intel::Item) &priority=5
|
||||||
Intel::insert(item);
|
Intel::insert(item);
|
||||||
}
|
}
|
||||||
|
|
||||||
event Intel::cluster_updated_item(item: Intel::Item) &priority=5
|
|
||||||
{
|
|
||||||
# Ignore locally generated events to avoid event storms.
|
|
||||||
if ( is_remote_event() )
|
|
||||||
Intel::insert(item);
|
|
||||||
}
|
|
||||||
|
|
||||||
event Intel::new_item(item: Intel::Item) &priority=5
|
event Intel::new_item(item: Intel::Item) &priority=5
|
||||||
{
|
{
|
||||||
# The cluster manager always rebroadcasts intelligence.
|
# The cluster manager always rebroadcasts intelligence.
|
||||||
# Workers redistribute it if it was locally generated on
|
# Workers redistribute it if it was locally generated.
|
||||||
# the worker.
|
|
||||||
if ( Cluster::local_node_type() == Cluster::MANAGER ||
|
if ( Cluster::local_node_type() == Cluster::MANAGER ||
|
||||||
item$first_dispatch )
|
item$first_dispatch )
|
||||||
{
|
{
|
||||||
item$first_dispatch = F;
|
item$first_dispatch=F;
|
||||||
event Intel::cluster_new_item(item);
|
event Intel::cluster_new_item(item);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
event Intel::updated_item(item: Intel::Item) &priority=5
|
|
||||||
{
|
|
||||||
# If this is the first time this item has been dispatched or this
|
|
||||||
# is a manager, send it over the cluster.
|
|
||||||
if ( Cluster::local_node_type() == Cluster::MANAGER ||
|
|
||||||
item$first_dispatch )
|
|
||||||
{
|
|
||||||
item$first_dispatch = F;
|
|
||||||
event Intel::cluster_updated_item(item);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
|
@ -26,3 +26,4 @@ event bro_init() &priority=5
|
||||||
$ev=Intel::read_entry]);
|
$ev=Intel::read_entry]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -129,6 +129,16 @@ type DataStore: record {
|
||||||
};
|
};
|
||||||
global data_store: DataStore &redef;
|
global data_store: DataStore &redef;
|
||||||
|
|
||||||
|
# The inmemory data structure for holding the barest matchable intelligence.
|
||||||
|
# This is primarily for workers to do the initial quick matches and store
|
||||||
|
# a minimal amount of data for the full match to happen on the manager.
|
||||||
|
type MinDataStore: record {
|
||||||
|
net_data: set[subnet];
|
||||||
|
string_data: set[string, StrType];
|
||||||
|
};
|
||||||
|
global min_data_store: MinDataStore &redef;
|
||||||
|
|
||||||
|
|
||||||
event bro_init() &priority=5
|
event bro_init() &priority=5
|
||||||
{
|
{
|
||||||
Log::create_stream(LOG, [$columns=Info, $ev=log_intel]);
|
Log::create_stream(LOG, [$columns=Info, $ev=log_intel]);
|
||||||
|
@ -137,12 +147,14 @@ event bro_init() &priority=5
|
||||||
function find(s: Seen): bool
|
function find(s: Seen): bool
|
||||||
{
|
{
|
||||||
if ( s?$host &&
|
if ( s?$host &&
|
||||||
s$host in data_store$net_data )
|
((have_full_data && s$host in data_store$net_data) ||
|
||||||
|
(s$host in min_data_store$net_data)))
|
||||||
{
|
{
|
||||||
return T;
|
return T;
|
||||||
}
|
}
|
||||||
else if ( s?$str && s?$str_type &&
|
else if ( s?$str && s?$str_type &&
|
||||||
[s$str, s$str_type] in data_store$string_data )
|
((have_full_data && [s$str, s$str_type] in data_store$string_data) ||
|
||||||
|
([s$str, s$str_type] in min_data_store$string_data)))
|
||||||
{
|
{
|
||||||
return T;
|
return T;
|
||||||
}
|
}
|
||||||
|
@ -232,7 +244,7 @@ function has_meta(check: MetaData, metas: set[MetaData]): bool
|
||||||
return F;
|
return F;
|
||||||
}
|
}
|
||||||
|
|
||||||
event Intel::match(s: Seen, items: set[Item])
|
event Intel::match(s: Seen, items: set[Item]) &priority=5
|
||||||
{
|
{
|
||||||
local empty_set: set[string] = set();
|
local empty_set: set[string] = set();
|
||||||
local info: Info = [$ts=network_time(), $seen=s, $sources=empty_set];
|
local info: Info = [$ts=network_time(), $seen=s, $sources=empty_set];
|
||||||
|
@ -264,24 +276,39 @@ function insert(item: Item)
|
||||||
if ( item?$host )
|
if ( item?$host )
|
||||||
{
|
{
|
||||||
local host = mask_addr(item$host, is_v4_addr(item$host) ? 32 : 128);
|
local host = mask_addr(item$host, is_v4_addr(item$host) ? 32 : 128);
|
||||||
if ( host !in data_store$net_data )
|
if ( have_full_data )
|
||||||
data_store$net_data[host] = set();
|
{
|
||||||
|
if ( host !in data_store$net_data )
|
||||||
metas = data_store$net_data[host];
|
data_store$net_data[host] = set();
|
||||||
|
|
||||||
|
metas = data_store$net_data[host];
|
||||||
|
}
|
||||||
|
|
||||||
|
add min_data_store$net_data[host];
|
||||||
}
|
}
|
||||||
else if ( item?$net )
|
else if ( item?$net )
|
||||||
{
|
{
|
||||||
if ( item$net !in data_store$net_data )
|
if ( have_full_data )
|
||||||
data_store$net_data[item$net] = set();
|
{
|
||||||
|
if ( item$net !in data_store$net_data )
|
||||||
|
data_store$net_data[item$net] = set();
|
||||||
|
|
||||||
metas = data_store$net_data[item$net];
|
metas = data_store$net_data[item$net];
|
||||||
|
}
|
||||||
|
|
||||||
|
add min_data_store$net_data[item$net];
|
||||||
}
|
}
|
||||||
else if ( item?$str )
|
else if ( item?$str )
|
||||||
{
|
{
|
||||||
if ( [item$str, item$str_type] !in data_store$string_data )
|
if ( have_full_data )
|
||||||
data_store$string_data[item$str, item$str_type] = set();
|
{
|
||||||
|
if ( [item$str, item$str_type] !in data_store$string_data )
|
||||||
|
data_store$string_data[item$str, item$str_type] = set();
|
||||||
|
|
||||||
metas = data_store$string_data[item$str, item$str_type];
|
metas = data_store$string_data[item$str, item$str_type];
|
||||||
|
}
|
||||||
|
|
||||||
|
add min_data_store$string_data[item$str, item$str_type];
|
||||||
}
|
}
|
||||||
|
|
||||||
local updated = F;
|
local updated = F;
|
||||||
|
|
|
@ -1,2 +0,0 @@
|
||||||
cluster_new_item: 123.123.123.123 from source worker-1 (from peer: worker-1)
|
|
||||||
cluster_new_item: 4.3.2.1 from source worker-2 (from peer: worker-2)
|
|
|
@ -1,10 +0,0 @@
|
||||||
#separator \x09
|
|
||||||
#set_separator ,
|
|
||||||
#empty_field (empty)
|
|
||||||
#unset_field -
|
|
||||||
#path intel
|
|
||||||
#open 2012-09-28-18-50-43
|
|
||||||
#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p seen.host seen.str seen.str_type seen.where
|
|
||||||
#types time string addr port addr port addr string enum enum
|
|
||||||
1348858243.346443 - - - - - 123.123.123.123 - - Intel::IN_ANYWHERE
|
|
||||||
#close 2012-09-28-18-50-53
|
|
|
@ -1,3 +0,0 @@
|
||||||
cluster_new_item: 1.2.3.4 from source manager (from peer: manager-1)
|
|
||||||
cluster_new_item: 123.123.123.123 from source worker-1 (from peer: manager-1)
|
|
||||||
cluster_new_item: 4.3.2.1 from source worker-2 (from peer: manager-1)
|
|
|
@ -1,4 +0,0 @@
|
||||||
cluster_new_item: 1.2.3.4 from source manager (from peer: manager-1)
|
|
||||||
cluster_new_item: 123.123.123.123 from source worker-1 (from peer: manager-1)
|
|
||||||
cluster_new_item: 4.3.2.1 from source worker-2 (from peer: manager-1)
|
|
||||||
Doing a lookup
|
|
|
@ -1,3 +0,0 @@
|
||||||
It matched!
|
|
||||||
bad.com
|
|
||||||
Intel::DNS_ZONE
|
|
|
@ -1 +0,0 @@
|
||||||
Matched it!
|
|
|
@ -1,3 +0,0 @@
|
||||||
VALID
|
|
||||||
VALID
|
|
||||||
VALID
|
|
|
@ -1,3 +0,0 @@
|
||||||
Number of matching intel items: 2 (should be 2)
|
|
||||||
Number of matching intel items: 2 (should still be 2)
|
|
||||||
Number of matching intel items: 3 (should be 3)
|
|
|
@ -1,3 +0,0 @@
|
||||||
VALID
|
|
||||||
VALID
|
|
||||||
VALID
|
|
|
@ -39,7 +39,7 @@ event Intel::cluster_new_item(item: Intel::Item)
|
||||||
if ( ! is_remote_event() )
|
if ( ! is_remote_event() )
|
||||||
return;
|
return;
|
||||||
|
|
||||||
print fmt("cluster_new_item: %s from source %s (from peer: %s)", item$host, item$meta$source, get_event_peer()$descr);
|
print fmt("cluster_new_item: %s inserted by %s (from peer: %s)", item$host, item$meta$source, get_event_peer()$descr);
|
||||||
|
|
||||||
if ( ! sent_data )
|
if ( ! sent_data )
|
||||||
{
|
{
|
||||||
|
|
|
@ -1,36 +1,40 @@
|
||||||
# @TEST-EXEC: bro %INPUT >out
|
# @TEST-SERIALIZE: comm
|
||||||
# @TEST-EXEC: btest-diff out
|
|
||||||
|
# @TEST-EXEC: btest-bg-run broproc bro %INPUT
|
||||||
|
# @TEST-EXEC: btest-bg-wait -k 5
|
||||||
|
# @TEST-EXEC: btest-diff broproc/intel.log
|
||||||
|
|
||||||
@TEST-START-FILE intel.dat
|
@TEST-START-FILE intel.dat
|
||||||
#fields ip net str subtype meta.source meta.class meta.desc meta.url meta.tags
|
#fields host net str str_type meta.source meta.desc meta.url
|
||||||
1.2.3.4 - - - source1 Intel::MALICIOUS this host is just plain baaad http://some-data-distributor.com/1234 foo,bar
|
1.2.3.4 - - - source1 this host is just plain baaad http://some-data-distributor.com/1234
|
||||||
1.2.3.4 - - - source1 Intel::MALICIOUS this host is just plain baaad http://some-data-distributor.com/1234 foo,bar
|
1.2.3.4 - - - source1 this host is just plain baaad http://some-data-distributor.com/1234
|
||||||
- - e@mail.com Intel::EMAIL source1 Intel::MALICIOUS Phishing email source http://some-data-distributor.com/100000 -
|
- - e@mail.com Intel::EMAIL source1 Phishing email source http://some-data-distributor.com/100000
|
||||||
@TEST-END-FILE
|
@TEST-END-FILE
|
||||||
|
|
||||||
@load frameworks/communication/listen
|
@load frameworks/communication/listen
|
||||||
|
|
||||||
redef Intel::read_files += { "intel.dat" };
|
redef Intel::read_files += { "../intel.dat" };
|
||||||
|
redef enum Intel::Where += { SOMEWHERE };
|
||||||
|
|
||||||
event do_it(allowed_loops: count)
|
event do_it()
|
||||||
{
|
{
|
||||||
if ( Intel::matcher([$str="e@mail.com", $subtype=Intel::EMAIL, $class=Intel::MALICIOUS]) &&
|
Intel::seen([$str="e@mail.com",
|
||||||
Intel::matcher([$ip=1.2.3.4, $class=Intel::MALICIOUS]) )
|
$str_type=Intel::EMAIL,
|
||||||
{
|
$where=SOMEWHERE]);
|
||||||
# Once the match happens a single time we print and shutdown.
|
|
||||||
print "Matched it!";
|
Intel::seen([$host=1.2.3.4,
|
||||||
terminate_communication();
|
$where=SOMEWHERE]);
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if ( allowed_loops > 0 )
|
|
||||||
schedule 100msecs { do_it(allowed_loops-1) };
|
|
||||||
else
|
|
||||||
terminate_communication();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
global log_lines = 0;
|
||||||
event bro_init()
|
event Intel::log_intel(rec: Intel::Info)
|
||||||
{
|
{
|
||||||
event do_it(20);
|
++log_lines;
|
||||||
|
if ( log_lines == 2 )
|
||||||
|
terminate();
|
||||||
|
}
|
||||||
|
|
||||||
|
event bro_init() &priority=-10
|
||||||
|
{
|
||||||
|
schedule 1sec { do_it() };
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,23 +0,0 @@
|
||||||
# @TEST-EXEC: bro %INPUT >out
|
|
||||||
# @TEST-EXEC: btest-diff out
|
|
||||||
|
|
||||||
event bro_init()
|
|
||||||
{
|
|
||||||
Intel::insert([$ip=1.2.3.4, $meta=[$source="source1-feed1", $class=Intel::MALICIOUS, $tags=set("foo")]]);
|
|
||||||
Intel::insert([$ip=1.2.3.4, $meta=[$source="source2-special-sauce", $class=Intel::MALICIOUS, $tags=set("foo","bar")]]);
|
|
||||||
|
|
||||||
# Lookup should return the items matching the query.
|
|
||||||
local items = Intel::lookup([$ip=1.2.3.4]);
|
|
||||||
print fmt("Number of matching intel items: %d (should be 2)", |items|);
|
|
||||||
|
|
||||||
# This can be considered an update of a previous value since the
|
|
||||||
# data, source, and class are the matching points for determining sameness.
|
|
||||||
Intel::insert([$ip=1.2.3.4, $meta=[$source="source2-special-sauce", $class=Intel::MALICIOUS, $tags=set("foobar", "testing")]]);
|
|
||||||
items = Intel::lookup([$ip=1.2.3.4]);
|
|
||||||
print fmt("Number of matching intel items: %d (should still be 2)", |items|);
|
|
||||||
|
|
||||||
# This is a new value.
|
|
||||||
Intel::insert([$ip=1.2.3.4, $meta=[$source="source3", $class=Intel::MALICIOUS]]);
|
|
||||||
items = Intel::lookup([$ip=1.2.3.4]);
|
|
||||||
print fmt("Number of matching intel items: %d (should be 3)", |items|);
|
|
||||||
}
|
|
|
@ -1,38 +0,0 @@
|
||||||
#
|
|
||||||
# @TEST-EXEC: bro %INPUT >out
|
|
||||||
# @TEST-EXEC: btest-diff out
|
|
||||||
|
|
||||||
event bro_init()
|
|
||||||
{
|
|
||||||
Intel::insert([$ip=1.2.3.4, $meta=[$source="zeus-tracker", $class=Intel::MALICIOUS, $tags=set("example-tag1", "example-tag2")]]);
|
|
||||||
Intel::insert([$str="http://www.google.com/", $subtype=Intel::URL, $meta=[$source="source2", $class=Intel::MALICIOUS, $tags=set("infrastructure", "google")]]);
|
|
||||||
}
|
|
||||||
|
|
||||||
event bro_done()
|
|
||||||
{
|
|
||||||
local orig_h = 1.2.3.4;
|
|
||||||
|
|
||||||
if ( Intel::matcher([$ip=orig_h, $and_tags=set("example-tag1", "example-tag2")]) )
|
|
||||||
print "VALID";
|
|
||||||
|
|
||||||
if ( Intel::matcher([$ip=orig_h, $and_tags=set("don't match")]) )
|
|
||||||
print "INVALID";
|
|
||||||
|
|
||||||
if ( Intel::matcher([$ip=orig_h, $pred=function(meta: Intel::Item): bool { return T; } ]) )
|
|
||||||
print "VALID";
|
|
||||||
|
|
||||||
if ( Intel::matcher([$ip=4.3.2.1, $pred=function(meta: Intel::Item): bool { return T; } ]) )
|
|
||||||
print "INVALID";
|
|
||||||
|
|
||||||
if ( Intel::matcher([$ip=orig_h, $pred=function(meta: Intel::Item): bool { return F; } ]) )
|
|
||||||
print "INVALID";
|
|
||||||
|
|
||||||
if ( Intel::matcher([$str="http://www.google.com/", $subtype=Intel::URL, $and_tags=set("google")]) )
|
|
||||||
print "VALID";
|
|
||||||
|
|
||||||
if ( Intel::matcher([$str="http://www.google.com/", $subtype=Intel::URL, $and_tags=set("woah")]) )
|
|
||||||
print "INVALID";
|
|
||||||
|
|
||||||
if ( Intel::matcher([$str="http://www.example.com", $subtype=Intel::URL]) )
|
|
||||||
print "INVALID";
|
|
||||||
}
|
|
Loading…
Add table
Add a link
Reference in a new issue