Merge branch 'topic/jgras/intel-update' of https://github.com/J-Gras/bro into topic/seth/intel-update-merge

# Conflicts:
#	testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log
#	testing/btest/Baseline/scripts.policy.frameworks.intel.seen.certs/intel-all.log
This commit is contained in:
Seth Hall 2016-08-02 15:50:43 -04:00
commit d6a7322a75
27 changed files with 905 additions and 197 deletions

View file

@ -1,5 +1,8 @@
@load ./main
# File analysis framework integration.
@load ./files
# The cluster framework must be loaded first.
@load base/frameworks/cluster

View file

@ -1,8 +1,8 @@
##! Cluster transparency support for the intelligence framework. This is mostly
##! oriented toward distributing intelligence information across clusters.
@load ./main
@load base/frameworks/cluster
@load ./input
module Intel;
@ -17,19 +17,17 @@ redef record Item += {
redef have_full_data = F;
@endif
# Internal event for cluster data distribution.
global cluster_new_item: event(item: Item);
# Primary intelligence distribution comes from manager.
redef Cluster::manager2worker_events += /^Intel::(cluster_new_item)$/;
# If a worker finds intelligence and adds it, it should share it back to the manager.
redef Cluster::worker2manager_events += /^Intel::(cluster_new_item|match_no_items)$/;
# Primary intelligence management is done by the manager:
# The manager informs the workers about new items and item removal.
redef Cluster::manager2worker_events += /^Intel::(cluster_new_item|purge_item)$/;
# A worker queries the manager to insert, remove or indicate the match of an item.
redef Cluster::worker2manager_events += /^Intel::(cluster_new_item|remove_item|match_no_items)$/;
@if ( Cluster::local_node_type() == Cluster::MANAGER )
event Intel::match_no_items(s: Seen) &priority=5
{
event Intel::match(s, Intel::get_items(s));
}
# Handling of new worker nodes.
event remote_connection_handshake_done(p: event_peer)
{
# When a worker connects, send it the complete minimal data store.
@ -39,15 +37,22 @@ event remote_connection_handshake_done(p: event_peer)
send_id(p, "Intel::min_data_store");
}
}
@endif
event Intel::cluster_new_item(item: Intel::Item) &priority=5
# Handling of matches triggered by worker nodes.
event Intel::match_no_items(s: Seen) &priority=5
{
# Ignore locally generated events to avoid event storms.
if ( is_remote_event() )
Intel::insert(item);
if ( Intel::find(s) )
event Intel::match(s, Intel::get_items(s));
}
# Handling of item removal triggered by worker nodes.
event Intel::remove_item(item: Item, purge_indicator: bool)
{
remove(item, purge_indicator);
}
@endif
# Handling of item insertion.
event Intel::new_item(item: Intel::Item) &priority=5
{
# The cluster manager always rebroadcasts intelligence.
@ -59,3 +64,11 @@ event Intel::new_item(item: Intel::Item) &priority=5
event Intel::cluster_new_item(item);
}
}
# Handling of item insertion by remote node.
event Intel::cluster_new_item(item: Intel::Item) &priority=5
{
# Ignore locally generated events to avoid event storms.
if ( is_remote_event() )
Intel::insert(item);
}

View file

@ -0,0 +1,84 @@
##! File analysis framework integration for the intelligence framework. This
##! script manages file information in intelligence framework datastructures.
@load ./main
module Intel;
export {
## Enum type to represent various types of intelligence data.
redef enum Type += {
## File hash which is non-hash type specific. It's up to the
## user to query for any relevant hash types.
FILE_HASH,
## File name. Typically with protocols with definite
## indications of a file name.
FILE_NAME,
};
## Information about a piece of "seen" data.
redef record Seen += {
## If the data was discovered within a file, the file record
## should go here to provide context to the data.
f: fa_file &optional;
## If the data was discovered within a file, the file uid should
## go here to provide context to the data. If the file record *f*
## is provided, this will be automatically filled out.
fuid: string &optional;
};
## Record used for the logging framework representing a positive
## hit within the intelligence framework.
redef record Info += {
## If a file was associated with this intelligence hit,
## this is the uid for the file.
fuid: string &log &optional;
## A mime type if the intelligence hit is related to a file.
## If the $f field is provided this will be automatically filled
## out.
file_mime_type: string &log &optional;
## Frequently files can be "described" to give a bit more context.
## If the $f field is provided this field will be automatically
## filled out.
file_desc: string &log &optional;
};
}
# Add file information to matches if available.
hook extend_match(info: Info, s: Seen, items: set[Item]) &priority=5
{
if ( s?$f )
{
s$fuid = s$f$id;
if ( s$f?$conns && |s$f$conns| == 1 )
{
for ( cid in s$f$conns )
s$conn = s$f$conns[cid];
}
if ( ! info?$file_mime_type && s$f?$info && s$f$info?$mime_type )
info$file_mime_type = s$f$info$mime_type;
if ( ! info?$file_desc )
info$file_desc = Files::describe(s$f);
}
if ( s?$fuid )
info$fuid = s$fuid;
if ( s?$conn )
{
s$uid = s$conn$uid;
info$id = s$conn$id;
}
if ( s?$uid )
info$uid = s$uid;
for ( item in items )
{
add info$sources[item$meta$source];
add info$matched[item$indicator_type];
}
}

View file

@ -1,11 +1,14 @@
##! Input handling for the intelligence framework. This script implements the
##! import of intelligence data from files using the input framework.
@load ./main
module Intel;
export {
## Intelligence files that will be read off disk. The files are
## reread every time they are updated so updates must be atomic with
## "mv" instead of writing the file in place.
## Intelligence files that will be read off disk. The files are
## reread every time they are updated so updates must be atomic
## with "mv" instead of writing the file in place.
const read_files: set[string] = {} &redef;
}

View file

@ -1,7 +1,6 @@
##! The intelligence framework provides a way to store and query IP addresses,
##! and strings (with a str_type). Metadata can
##! also be associated with the intelligence, like for making more informed
##! decisions about matching and handling of intelligence.
##! The intelligence framework provides a way to store and query intelligence data
##! (e.g. IP addresses, URLs and hashes). The intelligence items can be associated
##! with metadata to allow informed decisions about matching and handling.
@load base/frameworks/notice
@ -14,6 +13,8 @@ export {
type Type: enum {
## An IP address.
ADDR,
## A subnet in CIDR notation.
SUBNET,
## A complete URL without the prefix ``"http://"``.
URL,
## Software name.
@ -24,24 +25,20 @@ export {
DOMAIN,
## A user name.
USER_NAME,
## File hash which is non-hash type specific. It's up to the
## user to query for any relevant hash types.
FILE_HASH,
## File name. Typically with protocols with definite
## indications of a file name.
FILE_NAME,
## Certificate SHA-1 hash.
CERT_HASH,
## Public key MD5 hash. (SSH server host keys are a good example.)
PUBKEY_HASH,
};
## Set of intelligence data types.
type TypeSet: set[Type];
## Data about an :bro:type:`Intel::Item`.
type MetaData: record {
## An arbitrary string value representing the data source.
## Typically, the convention for this field will be the source
## name and feed name separated by a hyphen.
## For example: "source1-c&c".
## An arbitrary string value representing the data source. This
## value is used as unique key to identify a metadata record in
## the scope of a single intelligence item.
source: string;
## A freeform description for the data.
desc: string &optional;
@ -57,7 +54,7 @@ export {
## The type of data that the indicator field represents.
indicator_type: Type;
## Metadata for the item. Typically represents more deeply
## Metadata for the item. Typically represents more deeply
## descriptive data for a piece of intelligence.
meta: MetaData;
};
@ -96,15 +93,6 @@ export {
## If the *conn* field is provided, this will be automatically
## filled out.
uid: string &optional;
## If the data was discovered within a file, the file record
## should go here to provide context to the data.
f: fa_file &optional;
## If the data was discovered within a file, the file uid should
## go here to provide context to the data. If the *f* field is
## provided, this will be automatically filled out.
fuid: string &optional;
};
## Record used for the logging framework representing a positive
@ -120,41 +108,69 @@ export {
## this is the conn_id for the connection.
id: conn_id &log &optional;
## If a file was associated with this intelligence hit,
## this is the uid for the file.
fuid: string &log &optional;
## A mime type if the intelligence hit is related to a file.
## If the $f field is provided this will be automatically filled
## out.
file_mime_type: string &log &optional;
## Frequently files can be "described" to give a bit more context.
## If the $f field is provided this field will be automatically
## filled out.
file_desc: string &log &optional;
## Where the data was seen.
seen: Seen &log;
## Which indicator types matched.
matched: TypeSet &log;
## Sources which supplied data that resulted in this match.
sources: set[string] &log &default=string_set();
};
## Intelligence data manipulation function.
## Function to insert intelligence data. If the indicator is already
## present, the associated metadata will be added to the indicator. If
## the indicator already contains a metadata record from the same source,
## the existing metadata record will be updated.
global insert: function(item: Item);
## Function to remove intelligence data. If purge_indicator is set, the
## given metadata is ignored and the indicator is removed completely.
global remove: function(item: Item, purge_indicator: bool &default = F);
## Function to declare discovery of a piece of data in order to check
## it against known intelligence for matches.
global seen: function(s: Seen);
## Event to represent a match in the intelligence data from data that
## was seen. On clusters there is no assurance as to where this event
## was seen. On clusters there is no assurance as to when this event
## will be generated so do not assume that arbitrary global state beyond
## the given data will be available.
##
## This is the primary mechanism where a user will take actions based on
## data within the intelligence framework.
## This is the primary mechanism where a user may take actions based on
## data provided by the intelligence framework.
global match: event(s: Seen, items: set[Item]);
## This hook can be used to influence the logging of intelligence hits
## (e.g. by adding data to the Info record). The default information is
## added with a priority of 5.
##
## info: The Info record that will be logged.
##
## s: Information about the data seen.
##
## items: The intel items that match the seen data.
##
## In case the hook execution is terminated using break, the match will
## not be logged.
global extend_match: hook(info: Info, s: Seen, items: set[Item]);
## The expiration timeout for intelligence items. Once an item expires, the
## :bro:id:`item_expired` hook is called. Reinsertion of an item resets the
## timeout. A negative value disables expiration of intelligence items.
const item_expiration = -1 min &redef;
## This hook can be used to handle expiration of intelligence items.
##
## indicator: The indicator of the expired item.
##
## indicator_type: The indicator type of the expired item.
##
## metas: The set of metadata describing the expired item.
##
## If all hook handlers are executed, the expiration timeout will be reset.
## Otherwise, if one of the handlers terminates using break, the item will
## be removed.
global item_expired: hook(indicator: string, indicator_type: Type, metas: set[MetaData]);
global log_intel: event(rec: Info);
}
@ -163,16 +179,26 @@ global match_no_items: event(s: Seen);
# Internal events for cluster data distribution.
global new_item: event(item: Item);
global updated_item: event(item: Item);
global remove_item: event(item: Item, purge_indicator: bool);
global purge_item: event(item: Item);
# Optionally store metadata. This is used internally depending on
# if this is a cluster deployment or not.
const have_full_data = T &redef;
# Table of metadata, indexed by source string.
type MetaDataTable: table[string] of MetaData;
# Expiration handlers.
global expire_host_data: function(data: table[addr] of MetaDataTable, idx: addr): interval;
global expire_subnet_data: function(data: table[subnet] of MetaDataTable, idx: subnet): interval;
global expire_string_data: function(data: table[string, Type] of MetaDataTable, idx: any): interval;
# The in memory data structure for holding intelligence.
type DataStore: record {
host_data: table[addr] of set[MetaData];
string_data: table[string, Type] of set[MetaData];
host_data: table[addr] of MetaDataTable &write_expire=item_expiration &expire_func=expire_host_data;
subnet_data: table[subnet] of MetaDataTable &write_expire=item_expiration &expire_func=expire_subnet_data;
string_data: table[string, Type] of MetaDataTable &write_expire=item_expiration &expire_func=expire_string_data;
};
global data_store: DataStore &redef;
@ -181,6 +207,7 @@ global data_store: DataStore &redef;
# a minimal amount of data for the full match to happen on the manager.
type MinDataStore: record {
host_data: set[addr];
subnet_data: set[subnet];
string_data: set[string, Type];
};
global min_data_store: MinDataStore &redef;
@ -191,33 +218,78 @@ event bro_init() &priority=5
Log::create_stream(LOG, [$columns=Info, $ev=log_intel, $path="intel"]);
}
# Function that abstracts expiration of different types.
function expire_item(indicator: string, indicator_type: Type, metas: set[MetaData]): interval
{
if ( hook item_expired(indicator, indicator_type, metas) )
return item_expiration;
else
remove([$indicator=indicator, $indicator_type=indicator_type, $meta=[$source=""]], T);
return 0 sec;
}
# Expiration handler definitions.
function expire_host_data(data: table[addr] of MetaDataTable, idx: addr): interval
{
local meta_tbl: MetaDataTable = data[idx];
local metas: set[MetaData];
for ( src in meta_tbl )
add metas[meta_tbl[src]];
return expire_item(cat(idx), ADDR, metas);
}
function expire_subnet_data(data: table[subnet] of MetaDataTable, idx: subnet): interval
{
local meta_tbl: MetaDataTable = data[idx];
local metas: set[MetaData];
for ( src in meta_tbl )
add metas[meta_tbl[src]];
return expire_item(cat(idx), ADDR, metas);
}
function expire_string_data(data: table[string, Type] of MetaDataTable, idx: any): interval
{
local indicator: string;
local indicator_type: Type;
[indicator, indicator_type] = idx;
local meta_tbl: MetaDataTable = data[indicator, indicator_type];
local metas: set[MetaData];
for ( src in meta_tbl )
add metas[meta_tbl[src]];
return expire_item(indicator, indicator_type, metas);
}
# Function to check for intelligence hits.
function find(s: Seen): bool
{
local ds = have_full_data ? data_store : min_data_store;
if ( s?$host )
{
return ((s$host in min_data_store$host_data) ||
(have_full_data && s$host in data_store$host_data));
}
else if ( ([to_lower(s$indicator), s$indicator_type] in min_data_store$string_data) ||
(have_full_data && [to_lower(s$indicator), s$indicator_type] in data_store$string_data) )
{
return T;
return ((s$host in ds$host_data) ||
(|matching_subnets(addr_to_subnet(s$host), ds$subnet_data)| > 0));
}
else
{
return F;
return ([to_lower(s$indicator), s$indicator_type] in ds$string_data);
}
}
# Function to retrieve intelligence items while abstracting from different
# data stores for different indicator types.
function get_items(s: Seen): set[Item]
{
local return_data: set[Item];
local mt: MetaDataTable;
if ( ! have_full_data )
{
# A reporter warning should be generated here because this function
# should never be called from a host that doesn't have the full data.
# TODO: do a reporter warning.
Reporter::warning(fmt("Intel::get_items was called from a host (%s) that doesn't have the full data.",
peer_description));
return return_data;
}
@ -226,11 +298,23 @@ function get_items(s: Seen): set[Item]
# See if the host is known about and it has meta values
if ( s$host in data_store$host_data )
{
for ( m in data_store$host_data[s$host] )
mt = data_store$host_data[s$host];
for ( m in mt )
{
add return_data[Item($indicator=cat(s$host), $indicator_type=ADDR, $meta=m)];
add return_data[Item($indicator=cat(s$host), $indicator_type=ADDR, $meta=mt[m])];
}
}
# See if the host is part of a known subnet, which has meta values
local nets: table[subnet] of MetaDataTable;
nets = filter_subnet_table(addr_to_subnet(s$host), data_store$subnet_data);
for ( n in nets )
{
mt = nets[n];
for ( m in mt )
{
add return_data[Item($indicator=cat(n), $indicator_type=SUBNET, $meta=mt[m])];
}
}
}
else
{
@ -238,9 +322,10 @@ function get_items(s: Seen): set[Item]
# See if the string is known about and it has meta values
if ( [lower_indicator, s$indicator_type] in data_store$string_data )
{
for ( m in data_store$string_data[lower_indicator, s$indicator_type] )
mt = data_store$string_data[lower_indicator, s$indicator_type];
for ( m in mt )
{
add return_data[Item($indicator=s$indicator, $indicator_type=s$indicator_type, $meta=m)];
add return_data[Item($indicator=s$indicator, $indicator_type=s$indicator_type, $meta=mt[m])];
}
}
}
@ -275,64 +360,20 @@ function Intel::seen(s: Seen)
}
}
function has_meta(check: MetaData, metas: set[MetaData]): bool
{
local check_hash = md5_hash(check);
for ( m in metas )
{
if ( check_hash == md5_hash(m) )
return T;
}
# The records must not be equivalent if we made it this far.
return F;
}
event Intel::match(s: Seen, items: set[Item]) &priority=5
{
local info = Info($ts=network_time(), $seen=s);
local info = Info($ts=network_time(), $seen=s, $matched=TypeSet());
if ( s?$f )
{
s$fuid = s$f$id;
if ( s$f?$conns && |s$f$conns| == 1 )
{
for ( cid in s$f$conns )
s$conn = s$f$conns[cid];
}
if ( ! info?$file_mime_type && s$f?$info && s$f$info?$mime_type )
info$file_mime_type = s$f$info$mime_type;
if ( ! info?$file_desc )
info$file_desc = Files::describe(s$f);
}
if ( s?$fuid )
info$fuid = s$fuid;
if ( s?$conn )
{
s$uid = s$conn$uid;
info$id = s$conn$id;
}
if ( s?$uid )
info$uid = s$uid;
for ( item in items )
add info$sources[item$meta$source];
Log::write(Intel::LOG, info);
if ( hook extend_match(info, s, items) )
Log::write(Intel::LOG, info);
}
function insert(item: Item)
{
# Create and fill out the meta data item.
# Create and fill out the metadata item.
local meta = item$meta;
local metas: set[MetaData];
local meta_tbl: table [string] of MetaData;
local is_new: bool = T;
# All intelligence is case insensitive at the moment.
local lower_indicator = to_lower(item$indicator);
@ -343,51 +384,133 @@ function insert(item: Item)
if ( have_full_data )
{
if ( host !in data_store$host_data )
data_store$host_data[host] = set();
data_store$host_data[host] = table();
else
is_new = F;
metas = data_store$host_data[host];
meta_tbl = data_store$host_data[host];
}
add min_data_store$host_data[host];
}
else if ( item$indicator_type == SUBNET )
{
local net = to_subnet(item$indicator);
if ( have_full_data )
{
if ( !check_subnet(net, data_store$subnet_data) )
data_store$subnet_data[net] = table();
else
is_new = F;
meta_tbl = data_store$subnet_data[net];
}
add min_data_store$subnet_data[net];
}
else
{
if ( have_full_data )
{
if ( [lower_indicator, item$indicator_type] !in data_store$string_data )
data_store$string_data[lower_indicator, item$indicator_type] = set();
data_store$string_data[lower_indicator, item$indicator_type] = table();
else
is_new = F;
metas = data_store$string_data[lower_indicator, item$indicator_type];
meta_tbl = data_store$string_data[lower_indicator, item$indicator_type];
}
add min_data_store$string_data[lower_indicator, item$indicator_type];
}
local updated = F;
if ( have_full_data )
{
for ( m in metas )
{
if ( meta$source == m$source )
{
if ( has_meta(meta, metas) )
{
# It's the same item being inserted again.
return;
}
else
{
# Same source, different metadata means updated item.
updated = T;
}
}
}
add metas[item$meta];
# Insert new metadata or update if already present
meta_tbl[meta$source] = meta;
}
if ( updated )
event Intel::updated_item(item);
else
if ( is_new )
# Trigger insert for cluster in case the item is new
# or insert was called on a worker
event Intel::new_item(item);
}
# Function to remove metadata of an item. The function returns T
# if there is no metadata left for the given indicator.
function remove_meta_data(item: Item): bool
{
if ( ! have_full_data )
{
Reporter::warning(fmt("Intel::remove_meta_data was called from a host (%s) that doesn't have the full data.",
peer_description));
return F;
}
switch ( item$indicator_type )
{
case ADDR:
local host = to_addr(item$indicator);
delete data_store$host_data[host][item$meta$source];
return (|data_store$host_data[host]| == 0);
case SUBNET:
local net = to_subnet(item$indicator);
delete data_store$subnet_data[net][item$meta$source];
return (|data_store$subnet_data[net]| == 0);
default:
delete data_store$string_data[item$indicator, item$indicator_type][item$meta$source];
return (|data_store$string_data[item$indicator, item$indicator_type]| == 0);
}
}
function remove(item: Item, purge_indicator: bool)
{
# Delegate removal if we are on a worker
if ( !have_full_data )
{
event Intel::remove_item(item, purge_indicator);
return;
}
# Remove metadata from manager's data store
local no_meta_data = remove_meta_data(item);
# Remove whole indicator if necessary
if ( no_meta_data || purge_indicator )
{
switch ( item$indicator_type )
{
case ADDR:
local host = to_addr(item$indicator);
delete data_store$host_data[host];
break;
case SUBNET:
local net = to_subnet(item$indicator);
delete data_store$subnet_data[net];
break;
default:
delete data_store$string_data[item$indicator, item$indicator_type];
break;
}
# Trigger deletion in minimal data stores
event Intel::purge_item(item);
}
}
# Handling of indicator removal in minimal data stores.
event purge_item(item: Item)
{
switch ( item$indicator_type )
{
case ADDR:
local host = to_addr(item$indicator);
delete min_data_store$host_data[host];
break;
case SUBNET:
local net = to_subnet(item$indicator);
delete min_data_store$subnet_data[net];
break;
default:
delete min_data_store$string_data[item$indicator, item$indicator_type];
break;
}
}