zeek/scripts/base/frameworks/intel/main.bro

377 lines
9.6 KiB
Text

##! The intelligence framework provides a way to store and query IP addresses,
##! and strings (with a str_type). Metadata can
##! also be associated with the intelligence like for making more informated
##! decisions about matching and handling of intelligence.
#
# TODO:
# Comments
# Better Intel::Item comparison (has_meta)
# Generate a notice when messed up data is discovered.
# Complete "net" support as an intelligence type.
@load base/frameworks/notice
module Intel;
export {
redef enum Log::ID += { LOG };
redef enum Notice::Type += {
## This notice should be used in all detector scripts to indicate
## an intelligence based detection.
Detection,
};
## String data needs to be further categoried since it could represent
## and number of types of data.
type SubType: enum {
## A complete URL.
URL,
## User-Agent string, typically HTTP or mail message body.
USER_AGENT,
## Email address.
EMAIL,
## DNS domain name (DNS Zones are implemented in an intelligence plugin).
DOMAIN,
## A user name.
USER_NAME,
## File hash which is non hash type specific. It's up to the user to query
## for any relevant hash types.
FILE_HASH,
## Certificate hash. Normally for X.509 certificates from the SSL analyzer.
CERT_HASH,
};
## Why a piece of intelligence is being added or looked up. The intent a human
## placed upon the data when it was decided to be worthwhile as intelligence.
type Intent: enum {
## Data is to be considered malicious.
MALICIOUS,
## Data is to be considered sensitive. In many cases this may be
## hosts containing contractually or legally restricted data such
## as HIPPA, PCI, Sarbanes-Oxley, etc.
SENSITIVE,
## Data that is never to be seen. This acts like the "canary in
## the coal mine". A possibility could be file hashes for
## critically important files.
CANARY,
## Data that is whitelisted. The primary use for this intent is to
## locally whitelist false positive data from external feeds.
WHITELIST,
};
## Enum to represent where data came from when it was discovered.
type Where: enum {
## A catchall value to represent data of unknown provenance.
ANYWHERE,
};
## Data about an :bro:type:`Intel::Item`
type MetaData: record {
## An arbitrary string value representing the data source. Typically,
## the convention for this field will be the source name and feed name
## separated by a hyphen. For example: "source1-c&c".
source: string;
## The intent of the data.
intent: Intent;
## A freeform description for the data.
desc: string &optional;
## A URL for more information about the data.
url: string &optional;
};
type Item: record {
host: addr &optional;
net: subnet &optional;
str: string &optional;
str_type: SubType &optional;
meta: MetaData;
};
type Found: record {
host: addr &optional;
str: string &optional;
str_type: SubType &optional;
where: Where;
};
type Info: record {
ts: time &log;
## This value should be one of: "info", "warn", "error"
level: string &log;
message: string &log;
item: Item &log;
};
type Plugin: record {
index: function() &optional;
match: function(found: Found): bool &optional;
lookup: function(found: Found): set[Item] &optional;
};
## Manipulation and query API functions.
global insert: function(item: Item);
global delete_item: function(item: Item): bool;
global unique_data: function(): count;
## Function to declare discovery of a piece of data in order to check
## it against known intelligence for matches.
global found_in_conn: function(c: connection, found: Found);
## Event to represent a match happening in a connection. On clusters there
## is no assurance as to where this event will be generated so don't
## assume that arbitrary global state beyond the given data
## will be available.
global match_in_conn: event(c: connection, found: Found, items: set[Item]);
global find: function(found: Found): bool;
global lookup: function(found: Found): set[Item];
## Plugin API functions
global register_custom_matcher: function(str_type: SubType,
func: function(found: Found): bool);
global register_custom_lookup: function(str_type: SubType,
func: function(found: Found): set[Item]);
## API Events
global new_item: event(item: Item);
global updated_item: event(item: Item);
global insert_event: event(item: Item);
## Optionally store metadata. This is primarily used internally depending on
## if this is a cluster deployment or not. On clusters, workers probably
## shouldn't be storing the full metadata.
const store_metadata = T &redef;
}
# Internal handler for conn oriented matches with no metadata base on the store_metadata setting.
global match_in_conn_no_items: event(c: connection, found: Found);
type DataStore: record {
host_data: table[addr] of set[MetaData];
string_data: table[string, SubType] of set[MetaData];
};
global data_store: DataStore;
global custom_matchers: table[SubType] of set[function(found: Found): bool];
global custom_lookup: table[SubType] of set[function(found: Found): set[Item]];
event bro_init() &priority=5
{
Log::create_stream(Intel::LOG, [$columns=Info]);
}
function find(found: Found): bool
{
if ( found?$host && found$host in data_store$host_data)
{
return T;
}
else if ( found?$str && found?$str_type &&
[found$str, found$str_type] in data_store$string_data )
{
return T;
}
# Finder plugins!
for ( plugin in plugins )
{
if ( plugin?$match && plugin$match(found) )
return T;
}
return F;
}
function lookup(found: Found): set[Item]
{
local item: Item;
local return_data: set[Item] = set();
if ( found?$host )
{
# See if the host is known about and it has meta values
if ( found$host in data_store$host_data )
{
for ( m in data_store$host_data[found$host] )
{
item = [$host=found$host, $meta=m];
add return_data[item];
}
}
}
else if ( found?$str && found?$str_type )
{
# See if the string is known about and it has meta values
if ( [found$str, found$str_type] in data_store$string_data )
{
for ( m in data_store$string_data[found$str, found$str_type] )
{
item = [$str=found$str, $str_type=found$str_type, $meta=m];
add return_data[item];
}
}
# Check if there are any custom str_type lookup functions and add the values to
# the result set.
if ( found$str_type in custom_lookup )
{
for ( lookup_func in custom_lookup[found$str_type] )
{
# Iterating here because there is no way to merge sets generically.
for ( custom_lookup_item in lookup_func(found) )
add return_data[custom_lookup_item];
}
}
}
# TODO: Later we should probably track whitelist matches.
# TODO: base this on a set instead of iterating the items.
for ( item in return_data )
{
if ( item$meta$intent == WHITELIST )
{
return set();
}
}
return return_data;
}
function Intel::found_in_conn(c: connection, found: Found)
{
if ( find(found) )
{
if ( store_metadata )
{
local items = lookup(found);
event Intel::match_in_conn(c, found, items);
}
else
{
event Intel::match_in_conn_no_items(c, found);
}
}
}
function register_custom_matcher(str_type: SubType, func: function(found: Found): bool)
{
if ( str_type !in custom_matchers )
custom_matchers[str_type] = set(func);
else
add custom_matchers[str_type][func];
}
function register_custom_lookup(str_type: SubType, func: function(found: Found): set[Item])
{
if ( str_type !in custom_lookup )
custom_lookup[str_type] = set(func);
else
add custom_lookup[str_type][func];
}
function unique_data(): count
{
return |data_store$host_data| + |data_store$string_data|;
}
#function get_meta(check: MetaData, metas: set[MetaData]): MetaData
# {
# local check_hash = md5_hash(check);
# for ( m in metas )
# {
# if ( check_hash == md5_hash(m) )
# return m;
# }
#
# return [$source=""];
# }
function has_meta(check: MetaData, metas: set[MetaData]): bool
{
local check_hash = md5_hash(check);
for ( m in metas )
{
if ( check_hash == md5_hash(m) )
return T;
}
# The records must not be equivalent if we made it this far.
return F;
}
function insert(item: Item)
{
local err_msg = "";
if ( item?$str && ! item?$str_type )
err_msg = "You must provide a str_type for strings or this item doesn't make sense.";
if ( err_msg == "" )
{
# Create and fill out the meta data item.
local meta = item$meta;
local metas: set[MetaData];
if ( item?$host )
{
if ( item$host !in data_store$host_data )
data_store$host_data[item$host] = set();
metas = data_store$host_data[item$host];
}
else if ( item?$str )
{
if ( [item$str, item$str_type] !in data_store$string_data )
data_store$string_data[item$str, item$str_type] = set();
metas = data_store$string_data[item$str, item$str_type];
}
else
{
err_msg = "Malformed intelligence item";
}
for ( m in metas )
{
if ( meta$source == m$source )
{
if ( has_meta(meta, metas) )
{
# It's the same item being inserted again.
return;
}
else
{
event Intel::updated_item(item);
break;
}
}
else
{
event Intel::new_item(item);
break;
}
}
add metas[item$meta];
return;
}
if ( err_msg != "" )
Log::write(Intel::LOG, [$ts=network_time(), $level="warn", $message=err_msg, $item=item]);
return;
}
event insert_event(item: Item)
{
insert(item);
}