Updates for the Intel Framework.

- Intel importing format has changed (refer to docs).

 - All string matching is now case insensitive.

 - SMTP intel script has been updated to extract email
   addresses correctly.

 - Small fix sneaking into the smtp base script to actually
   extract individual email addresses in the To: field
   correctly.
This commit is contained in:
Seth Hall 2013-07-19 13:16:12 -04:00
parent 18201afcf8
commit 9b444b2617
17 changed files with 178 additions and 166 deletions

View file

@ -29,9 +29,6 @@ Framework to be checked by loading this script in local.bro::
@load policy/frameworks/intel
(TODO: find some good mechanism for getting setup with good data
quickly)
Refer to the "Loading Intelligence" section below to see the format
for Intelligence Framework text files, then load those text files with
this line in local.bro::
@ -61,16 +58,14 @@ data out to all of the nodes that need it.
Here is an example of the intelligence data format. Note that all
whitespace separators are literal tabs and fields containing only a
hyphen a considered to be null values.::
hyphen are considered to be null values.::
#fields host net str str_type meta.source meta.desc meta.url
1.2.3.4 - - - source1 Sending phishing email http://source1.com/badhosts/1.2.3.4
- 31.131.248.0/21 - - spamhaus-drop SBL154982 - -
- - a.b.com Intel::DOMAIN source2 Name used for data exfiltration -
#fields indicator indicator_type meta.source meta.desc meta.url
1.2.3.4 Intel::ADDR source1 Sending phishing email http://source1.com/badhosts/1.2.3.4
a.b.com Intel::DOMAIN source2 Name used for data exfiltration -
For more examples of built in `str_type` values, please refer to the
autogenerated documentation for the intelligence framework (TODO:
figure out how to do this link).
For more examples of built in `indicator_type` values, please refer to the
autogenerated documentation for the intelligence framework.
To load the data once files are created, use the following example
code to define files to load with your own file names of course::
@ -90,8 +85,7 @@ When some bit of data is extracted (such as an email address in the
"From" header in a message over SMTP), the Intelligence Framework
needs to be informed that this data was discovered and it's presence
should be checked within the intelligence data set. This is
accomplished through the Intel::seen (TODO: do a reference link)
function.
accomplished through the Intel::seen function.
Typically users won't need to work with this function due to built in
hook scripts that Bro ships with that will "see" data and send it into

View file

@ -10,13 +10,14 @@ module Intel;
export {
redef enum Log::ID += { LOG };
## String data needs to be further categoried since it could represent
## and number of types of data.
type StrType: enum {
## Enum type to represent various types of intelligence data.
type Type: enum {
## An IP address.
ADDR,
## A complete URL without the prefix "http://".
URL,
## User-Agent string, typically HTTP or mail message body.
USER_AGENT,
## Software name.
SOFTWARE,
## Email address.
EMAIL,
## DNS domain name.
@ -44,16 +45,13 @@ export {
## Represents a piece of intelligence.
type Item: record {
## The IP address if the intelligence is about an IP address.
host: addr &optional;
## The network if the intelligence is about a CIDR block.
net: subnet &optional;
## The string if the intelligence is about a string.
str: string &optional;
## The type of data that is in the string if the $str field is set.
str_type: StrType &optional;
## The intelligence indicator.
indicator: string;
## Metadata for the item. Typically represents more deeply \
## The type of data that the indicator field represents.
indicator_type: Type;
## Metadata for the item. Typically represents more deeply
## descriptive data for a piece of intelligence.
meta: MetaData;
};
@ -69,12 +67,15 @@ export {
## exclusive. These records *must* represent either an IP address being
## seen or a string being seen.
type Seen: record {
## The IP address if the data seen is an IP address.
host: addr &log &optional;
## The string if the data is about a string.
str: string &log &optional;
## The type of data that is in the string if the $str field is set.
str_type: StrType &log &optional;
indicator: string &log &optional;
## The type of data that the indicator represents.
indicator_type: Type &log &optional;
## If the indicator type was :bro:enum:`Intel::ADDR`, then this
## field will be present.
host: addr &optional;
## Where the data was discovered.
where: Where &log;
@ -100,7 +101,7 @@ export {
## Where the data was seen.
seen: Seen &log;
## Sources which supplied data that resulted in this match.
sources: set[string] &log;
sources: set[string] &log &default=string_set();
};
## Intelligence data manipulation functions.
@ -135,8 +136,8 @@ const have_full_data = T &redef;
# The in memory data structure for holding intelligence.
type DataStore: record {
net_data: table[subnet] of set[MetaData];
string_data: table[string, StrType] of set[MetaData];
host_data: table[addr] of set[MetaData];
string_data: table[string, Type] of set[MetaData];
};
global data_store: DataStore &redef;
@ -144,8 +145,8 @@ global data_store: DataStore &redef;
# This is primarily for workers to do the initial quick matches and store
# a minimal amount of data for the full match to happen on the manager.
type MinDataStore: record {
net_data: set[subnet];
string_data: set[string, StrType];
host_data: set[addr];
string_data: set[string, Type];
};
global min_data_store: MinDataStore &redef;
@ -157,15 +158,13 @@ event bro_init() &priority=5
function find(s: Seen): bool
{
if ( s?$host &&
((have_full_data && s$host in data_store$net_data) ||
(s$host in min_data_store$net_data)))
if ( s?$host )
{
return T;
return ((s$host in min_data_store$host_data) ||
(have_full_data && s$host in data_store$host_data));
}
else if ( s?$str && s?$str_type &&
((have_full_data && [s$str, s$str_type] in data_store$string_data) ||
([s$str, s$str_type] in min_data_store$string_data)))
else if ( ([to_lower(s$indicator), s$indicator_type] in min_data_store$string_data) ||
(have_full_data && [to_lower(s$indicator), s$indicator_type] in data_store$string_data) )
{
return T;
}
@ -177,8 +176,7 @@ function find(s: Seen): bool
function get_items(s: Seen): set[Item]
{
local item: Item;
local return_data: set[Item] = set();
local return_data: set[Item];
if ( ! have_full_data )
{
@ -191,26 +189,23 @@ function get_items(s: Seen): set[Item]
if ( s?$host )
{
# See if the host is known about and it has meta values
if ( s$host in data_store$net_data )
if ( s$host in data_store$host_data )
{
for ( m in data_store$net_data[s$host] )
for ( m in data_store$host_data[s$host] )
{
# TODO: the lookup should be finding all and not just most specific
# and $host/$net should have the correct value.
item = [$host=s$host, $meta=m];
add return_data[item];
add return_data[Item($indicator=cat(s$host), $indicator_type=ADDR, $meta=m)];
}
}
}
else if ( s?$str && s?$str_type )
else
{
local lower_indicator = to_lower(s$indicator);
# See if the string is known about and it has meta values
if ( [s$str, s$str_type] in data_store$string_data )
if ( [lower_indicator, s$indicator_type] in data_store$string_data )
{
for ( m in data_store$string_data[s$str, s$str_type] )
for ( m in data_store$string_data[lower_indicator, s$indicator_type] )
{
item = [$str=s$str, $str_type=s$str_type, $meta=m];
add return_data[item];
add return_data[Item($indicator=s$indicator, $indicator_type=s$indicator_type, $meta=m)];
}
}
}
@ -222,6 +217,12 @@ function Intel::seen(s: Seen)
{
if ( find(s) )
{
if ( s?$host )
{
s$indicator = cat(s$host);
s$indicator_type = Intel::ADDR;
}
if ( have_full_data )
{
local items = get_items(s);
@ -250,8 +251,7 @@ function has_meta(check: MetaData, metas: set[MetaData]): bool
event Intel::match(s: Seen, items: set[Item]) &priority=5
{
local empty_set: set[string] = set();
local info: Info = [$ts=network_time(), $seen=s, $sources=empty_set];
local info: Info = [$ts=network_time(), $seen=s];
if ( s?$conn )
{
@ -267,52 +267,37 @@ event Intel::match(s: Seen, items: set[Item]) &priority=5
function insert(item: Item)
{
if ( item?$str && !item?$str_type )
{
event reporter_warning(network_time(), fmt("You must provide a str_type for strings or this item doesn't make sense. Item: %s", item), "");
return;
}
# Create and fill out the meta data item.
local meta = item$meta;
local metas: set[MetaData];
if ( item?$host )
# All intelligence is case insensitive at the moment.
local lower_indicator = to_lower(item$indicator);
if ( item$indicator_type == ADDR )
{
local host = mask_addr(item$host, is_v4_addr(item$host) ? 32 : 128);
local host = to_addr(item$indicator);
if ( have_full_data )
{
if ( host !in data_store$net_data )
data_store$net_data[host] = set();
if ( host !in data_store$host_data )
data_store$host_data[host] = set();
metas = data_store$net_data[host];
metas = data_store$host_data[host];
}
add min_data_store$net_data[host];
add min_data_store$host_data[host];
}
else if ( item?$net )
else
{
if ( have_full_data )
{
if ( item$net !in data_store$net_data )
data_store$net_data[item$net] = set();
if ( [lower_indicator, item$indicator_type] !in data_store$string_data )
data_store$string_data[lower_indicator, item$indicator_type] = set();
metas = data_store$net_data[item$net];
metas = data_store$string_data[lower_indicator, item$indicator_type];
}
add min_data_store$net_data[item$net];
}
else if ( item?$str )
{
if ( have_full_data )
{
if ( [item$str, item$str_type] !in data_store$string_data )
data_store$string_data[item$str, item$str_type] = set();
metas = data_store$string_data[item$str, item$str_type];
}
add min_data_store$string_data[item$str, item$str_type];
add min_data_store$string_data[lower_indicator, item$indicator_type];
}
local updated = F;

View file

@ -223,7 +223,10 @@ event mime_one_header(c: connection, h: mime_header_rec) &priority=5
{
if ( ! c$smtp?$to )
c$smtp$to = set();
add c$smtp$to[h$value];
local to_parts = split(h$value, /[[:blank:]]*,[[:blank:]]*/);
for ( i in to_parts )
add c$smtp$to[to_parts[i]];
}
else if ( h$name == "X-ORIGINATING-IP" )

View file

@ -2,7 +2,11 @@
@load ./where-locations
event connection_established(c: connection)
{
if ( c$orig$state == TCP_ESTABLISHED &&
c$resp$state == TCP_ESTABLISHED )
{
Intel::seen([$host=c$id$orig_h, $conn=c, $where=Conn::IN_ORIG]);
Intel::seen([$host=c$id$resp_h, $conn=c, $where=Conn::IN_RESP]);
}
}

View file

@ -3,8 +3,8 @@
event dns_request(c: connection, msg: dns_msg, query: string, qtype: count, qclass: count)
{
Intel::seen([$str=query,
$str_type=Intel::DOMAIN,
Intel::seen([$indicator=query,
$indicator_type=Intel::DOMAIN,
$conn=c,
$where=DNS::IN_REQUEST]);
}

View file

@ -4,8 +4,8 @@
event http_header(c: connection, is_orig: bool, name: string, value: string)
{
if ( is_orig && name == "HOST" )
Intel::seen([$str=value,
$str_type=Intel::DOMAIN,
Intel::seen([$indicator=value,
$indicator_type=Intel::DOMAIN,
$conn=c,
$where=HTTP::IN_HOST_HEADER]);
}

View file

@ -5,8 +5,8 @@
event http_message_done(c: connection, is_orig: bool, stat: http_message_stat)
{
if ( is_orig && c?$http )
Intel::seen([$str=HTTP::build_url(c$http),
$str_type=Intel::URL,
Intel::seen([$indicator=HTTP::build_url(c$http),
$indicator_type=Intel::URL,
$conn=c,
$where=HTTP::IN_URL]);
}

View file

@ -4,8 +4,8 @@
event http_header(c: connection, is_orig: bool, name: string, value: string)
{
if ( is_orig && name == "USER-AGENT" )
Intel::seen([$str=value,
$str_type=Intel::USER_AGENT,
Intel::seen([$indicator=value,
$indicator_type=Intel::SOFTWARE,
$conn=c,
$where=HTTP::IN_USER_AGENT_HEADER]);
}

View file

@ -13,8 +13,8 @@ event intel_mime_data(f: fa_file, data: string)
local urls = find_all_urls_without_scheme(data);
for ( url in urls )
{
Intel::seen([$str=url,
$str_type=Intel::URL,
Intel::seen([$indicator=url,
$indicator_type=Intel::URL,
$conn=c,
$where=SMTP::IN_MESSAGE]);
}

View file

@ -18,8 +18,8 @@ event mime_end_entity(c: connection)
}
if ( c$smtp?$user_agent )
Intel::seen([$str=c$smtp$user_agent,
$str_type=Intel::USER_AGENT,
Intel::seen([$indicator=c$smtp$user_agent,
$indicator_type=Intel::SOFTWARE,
$conn=c,
$where=SMTP::IN_HEADER]);
@ -29,43 +29,69 @@ event mime_end_entity(c: connection)
$where=SMTP::IN_X_ORIGINATING_IP_HEADER]);
if ( c$smtp?$mailfrom )
Intel::seen([$str=c$smtp$mailfrom,
$str_type=Intel::EMAIL,
{
local mailfromparts = split_n(c$smtp$mailfrom, /<.+>/, T, 1);
if ( |mailfromparts| > 2 )
{
Intel::seen([$indicator=mailfromparts[2][1:-2],
$indicator_type=Intel::EMAIL,
$conn=c,
$where=SMTP::IN_MAIL_FROM]);
}
}
if ( c$smtp?$rcptto )
{
for ( rcptto in c$smtp$rcptto )
{
Intel::seen([$str=rcptto,
$str_type=Intel::EMAIL,
local rcpttoparts = split_n(rcptto, /<.+>/, T, 1);
if ( |rcpttoparts| > 2 )
{
Intel::seen([$indicator=rcpttoparts[2][1:-2],
$indicator_type=Intel::EMAIL,
$conn=c,
$where=SMTP::IN_RCPT_TO]);
}
}
}
if ( c$smtp?$from )
Intel::seen([$str=c$smtp$from,
$str_type=Intel::EMAIL,
{
local fromparts = split_n(c$smtp$from, /<.+>/, T, 1);
if ( |fromparts| > 2 )
{
Intel::seen([$indicator=fromparts[2][1:-2],
$indicator_type=Intel::EMAIL,
$conn=c,
$where=SMTP::IN_FROM]);
}
}
if ( c$smtp?$to )
{
for ( email_to in c$smtp$to )
{
Intel::seen([$str=email_to,
$str_type=Intel::EMAIL,
local toparts = split_n(email_to, /<.+>/, T, 1);
if ( |toparts| > 2 )
{
Intel::seen([$indicator=toparts[2][1:-2],
$indicator_type=Intel::EMAIL,
$conn=c,
$where=SMTP::IN_TO]);
}
}
}
if ( c$smtp?$reply_to )
Intel::seen([$str=c$smtp$reply_to,
$str_type=Intel::EMAIL,
{
local replytoparts = split_n(c$smtp$reply_to, /<.+>/, T, 1);
if ( |replytoparts| > 2 )
{
Intel::seen([$indicator=replytoparts[2][1:-2],
$indicator_type=Intel::EMAIL,
$conn=c,
$where=SMTP::IN_REPLY_TO]);
}
}
}
}

View file

@ -10,14 +10,14 @@ event x509_certificate(c: connection, is_orig: bool, cert: X509, chain_idx: coun
{
local email = sub(cert$subject, /^.*emailAddress=/, "");
email = sub(email, /,.*$/, "");
Intel::seen([$str=email,
$str_type=Intel::EMAIL,
Intel::seen([$indicator=email,
$indicator_type=Intel::EMAIL,
$conn=c,
$where=(is_orig ? SSL::IN_CLIENT_CERT : SSL::IN_SERVER_CERT)]);
}
Intel::seen([$str=sha1_hash(der_cert),
$str_type=Intel::CERT_HASH,
Intel::seen([$indicator=sha1_hash(der_cert),
$indicator_type=Intel::CERT_HASH,
$conn=c,
$where=(is_orig ? SSL::IN_CLIENT_CERT : SSL::IN_SERVER_CERT)]);
}
@ -27,8 +27,8 @@ event ssl_extension(c: connection, is_orig: bool, code: count, val: string)
{
if ( is_orig && SSL::extensions[code] == "server_name" &&
c?$ssl && c$ssl?$server_name )
Intel::seen([$str=c$ssl$server_name,
$str_type=Intel::DOMAIN,
Intel::seen([$indicator=c$ssl$server_name,
$indicator_type=Intel::DOMAIN,
$conn=c,
$where=SSL::IN_SERVER_NAME]);
}

View file

@ -3,8 +3,8 @@
#empty_field (empty)
#unset_field -
#path intel
#open 2012-10-03-20-20-39
#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p seen.host seen.str seen.str_type seen.where sources
#types time string addr port addr port addr string enum enum table[string]
1349295639.424940 - - - - - 123.123.123.123 - - Intel::IN_ANYWHERE worker-1
#close 2012-10-03-20-20-49
#open 2013-07-19-17-05-48
#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p seen.indicator seen.indicator_type seen.where sources
#types time string addr port addr port string enum enum table[string]
1374253548.038580 - - - - - 123.123.123.123 Intel::ADDR Intel::IN_ANYWHERE worker-1
#close 2013-07-19-17-05-57

View file

@ -3,9 +3,9 @@
#empty_field (empty)
#unset_field -
#path intel
#open 2012-10-03-20-18-05
#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p seen.host seen.str seen.str_type seen.where sources
#types time string addr port addr port addr string enum enum table[string]
1349295485.114156 - - - - - - e@mail.com Intel::EMAIL SOMEWHERE source1
1349295485.114156 - - - - - 1.2.3.4 - - SOMEWHERE source1
#close 2012-10-03-20-18-05
#open 2013-07-19-17-04-26
#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p seen.indicator seen.indicator_type seen.where sources
#types time string addr port addr port string enum enum table[string]
1374253466.857185 - - - - - e@mail.com Intel::EMAIL SOMEWHERE source1
1374253466.857185 - - - - - 1.2.3.4 Intel::ADDR SOMEWHERE source1
#close 2013-07-19-17-04-26

View file

@ -3,11 +3,11 @@
#empty_field (empty)
#unset_field -
#path intel
#open 2012-10-10-15-05-23
#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p seen.host seen.str seen.str_type seen.where sources
#types time string addr port addr port addr string enum enum table[string]
1349881523.548946 - - - - - 1.2.3.4 - - Intel::IN_A_TEST source1
1349881523.548946 - - - - - - e@mail.com Intel::EMAIL Intel::IN_A_TEST source1
1349881524.567896 - - - - - 1.2.3.4 - - Intel::IN_A_TEST source1
1349881524.567896 - - - - - - e@mail.com Intel::EMAIL Intel::IN_A_TEST source1
#close 2012-10-10-15-05-24
#open 2013-07-19-17-06-57
#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p seen.indicator seen.indicator_type seen.where sources
#types time string addr port addr port string enum enum table[string]
1374253617.312158 - - - - - 1.2.3.4 Intel::ADDR Intel::IN_A_TEST source1
1374253617.312158 - - - - - e@mail.com Intel::EMAIL Intel::IN_A_TEST source1
1374253618.332565 - - - - - 1.2.3.4 Intel::ADDR Intel::IN_A_TEST source1
1374253618.332565 - - - - - e@mail.com Intel::EMAIL Intel::IN_A_TEST source1
#close 2013-07-19-17-07-06

View file

@ -28,7 +28,7 @@ event remote_connection_handshake_done(p: event_peer)
# Insert the data once both workers are connected.
if ( Cluster::local_node_type() == Cluster::MANAGER && Cluster::worker_count == 2 )
{
Intel::insert([$host=1.2.3.4,$meta=[$source="manager"]]);
Intel::insert([$indicator="1.2.3.4", $indicator_type=Intel::ADDR, $meta=[$source="manager"]]);
}
}
@ -39,7 +39,7 @@ event Intel::cluster_new_item(item: Intel::Item)
if ( ! is_remote_event() )
return;
print fmt("cluster_new_item: %s inserted by %s (from peer: %s)", item$host, item$meta$source, get_event_peer()$descr);
print fmt("cluster_new_item: %s inserted by %s (from peer: %s)", item$indicator, item$meta$source, get_event_peer()$descr);
if ( ! sent_data )
{
@ -47,9 +47,9 @@ event Intel::cluster_new_item(item: Intel::Item)
# full cluster is constructed.
sent_data = T;
if ( Cluster::node == "worker-1" )
Intel::insert([$host=123.123.123.123,$meta=[$source="worker-1"]]);
Intel::insert([$indicator="123.123.123.123", $indicator_type=Intel::ADDR, $meta=[$source="worker-1"]]);
if ( Cluster::node == "worker-2" )
Intel::insert([$host=4.3.2.1,$meta=[$source="worker-2"]]);
Intel::insert([$indicator="4.3.2.1", $indicator_type=Intel::ADDR, $meta=[$source="worker-2"]]);
}
# We're forcing worker-2 to do a lookup when it has three intelligence items

View file

@ -5,10 +5,10 @@
# @TEST-EXEC: btest-diff broproc/intel.log
@TEST-START-FILE intel.dat
#fields host net str str_type meta.source meta.desc meta.url
1.2.3.4 - - - source1 this host is just plain baaad http://some-data-distributor.com/1234
1.2.3.4 - - - source1 this host is just plain baaad http://some-data-distributor.com/1234
- - e@mail.com Intel::EMAIL source1 Phishing email source http://some-data-distributor.com/100000
#fields indicator indicator_type meta.source meta.desc meta.url
1.2.3.4 Intel::ADDR source1 this host is just plain baaad http://some-data-distributor.com/1234
1.2.3.4 Intel::ADDR source1 this host is just plain baaad http://some-data-distributor.com/1234
e@mail.com Intel::EMAIL source1 Phishing email source http://some-data-distributor.com/100000
@TEST-END-FILE
@load frameworks/communication/listen
@ -18,8 +18,8 @@ redef enum Intel::Where += { SOMEWHERE };
event do_it()
{
Intel::seen([$str="e@mail.com",
$str_type=Intel::EMAIL,
Intel::seen([$indicator="e@mail.com",
$indicator_type=Intel::EMAIL,
$where=SOMEWHERE]);
Intel::seen([$host=1.2.3.4,

View file

@ -19,10 +19,10 @@ redef Cluster::nodes = {
@TEST-END-FILE
@TEST-START-FILE intel.dat
#fields host net str str_type meta.source meta.desc meta.url
1.2.3.4 - - - source1 this host is just plain baaad http://some-data-distributor.com/1234
1.2.3.4 - - - source1 this host is just plain baaad http://some-data-distributor.com/1234
- - e@mail.com Intel::EMAIL source1 Phishing email source http://some-data-distributor.com/100000
#fields indicator indicator_type meta.source meta.desc meta.url
1.2.3.4 Intel::ADDR source1 this host is just plain baaad http://some-data-distributor.com/1234
1.2.3.4 Intel::ADDR source1 this host is just plain baaad http://some-data-distributor.com/1234
e@mail.com Intel::EMAIL source1 Phishing email source http://some-data-distributor.com/100000
@TEST-END-FILE
@load base/frameworks/control
@ -41,7 +41,7 @@ redef enum Intel::Where += {
event do_it()
{
Intel::seen([$host=1.2.3.4, $where=Intel::IN_A_TEST]);
Intel::seen([$str="e@mail.com", $str_type=Intel::EMAIL, $where=Intel::IN_A_TEST]);
Intel::seen([$indicator="e@mail.com", $indicator_type=Intel::EMAIL, $where=Intel::IN_A_TEST]);
}
event bro_init()