From 71c5b49bdc4daf8b06516509d36a297af41f4e28 Mon Sep 17 00:00:00 2001 From: Seth Hall Date: Thu, 27 Sep 2012 13:39:48 -0400 Subject: [PATCH] Major updates to fix the Intel framework API. --- scripts/base/frameworks/intel/__load__.bro | 9 +- scripts/base/frameworks/intel/cluster.bro | 45 ++- scripts/base/frameworks/intel/indexing.bro | 68 ---- scripts/base/frameworks/intel/input.bro | 4 +- scripts/base/frameworks/intel/main.bro | 308 ++++++------------ scripts/base/frameworks/intel/non-cluster | 3 - .../frameworks/intel/plugins/dns_zones.bro | 61 ---- scripts/base/frameworks/intel/plugins/set.bro | 19 -- 8 files changed, 126 insertions(+), 391 deletions(-) delete mode 100644 scripts/base/frameworks/intel/indexing.bro delete mode 100644 scripts/base/frameworks/intel/non-cluster delete mode 100644 scripts/base/frameworks/intel/plugins/dns_zones.bro delete mode 100644 scripts/base/frameworks/intel/plugins/set.bro diff --git a/scripts/base/frameworks/intel/__load__.bro b/scripts/base/frameworks/intel/__load__.bro index 8b425f6de4..c6822212c0 100644 --- a/scripts/base/frameworks/intel/__load__.bro +++ b/scripts/base/frameworks/intel/__load__.bro @@ -1,15 +1,8 @@ @load ./main @load ./input -@load ./indexing # The cluster framework must be loaded first. @load base/frameworks/cluster - @if ( Cluster::is_enabled() ) @load ./cluster -@endif - -@load ./plugins/dns_zones - - -@load ./http-user-agents \ No newline at end of file +@endif \ No newline at end of file diff --git a/scripts/base/frameworks/intel/cluster.bro b/scripts/base/frameworks/intel/cluster.bro index 6b361fc711..5b5f67e978 100644 --- a/scripts/base/frameworks/intel/cluster.bro +++ b/scripts/base/frameworks/intel/cluster.bro @@ -5,53 +5,52 @@ module Intel; -export { - global cluster_new_item: event(item: Item); - global cluster_updated_item: event(item: Item); - - redef record Item += { - ## This field is solely used internally for cluster transparency with - ## the intelligence framework to avoid storms of intelligence data - ## swirling forever. It allows data to propagate only a single time. - first_dispatch: bool &default=T; - }; -} - # If this process is not a manager process, we don't want the full metadata @if ( Cluster::local_node_type() != Cluster::MANAGER ) -redef store_metadata = F; +redef have_full_data = F; @endif +global cluster_new_item: event(item: Item); +global cluster_updated_item: event(item: Item); + +redef record Item += { + ## This field is solely used internally for cluster transparency with + ## the intelligence framework to avoid storms of intelligence data + ## swirling forever. It allows data to propagate only a single time. + first_dispatch: bool &default=T; +}; + # Primary intelligence distribution comes from manager. redef Cluster::manager2worker_events += /Intel::cluster_(new|updated)_item/; # If a worker finds intelligence and adds it, it should share it back to the manager. redef Cluster::worker2manager_events += /Intel::(match_in_.*_no_items|cluster_(new|updated)_item)/; @if ( Cluster::local_node_type() == Cluster::MANAGER ) -event Intel::match_in_conn_no_items(c: connection, found: Found) +event Intel::match_in_conn_no_items(c: connection, seen: Seen) &priority=5 { - local items = lookup(found); - event Intel::match_in_conn(c, found, items); + event Intel::match_in_conn(c, seen, Intel::get_items(seen)); } @endif -event Intel::cluster_new_item(item: Intel::Item) +event Intel::cluster_new_item(item: Intel::Item) &priority=5 { - # Ignore locally generated events. + # Ignore locally generated events to avoid event storms. if ( is_remote_event() ) Intel::insert(item); } -event Intel::cluster_updated_item(item: Intel::Item) +event Intel::cluster_updated_item(item: Intel::Item) &priority=5 { - # Ignore locally generated events. + # Ignore locally generated events to avoid event storms. if ( is_remote_event() ) Intel::insert(item); } -event Intel::new_item(item: Intel::Item) +event Intel::new_item(item: Intel::Item) &priority=5 { - # The cluster manager always rebroadcasts intelligence + # The cluster manager always rebroadcasts intelligence. + # Workers redistribute it if it was locally generated on + # the worker. if ( Cluster::local_node_type() == Cluster::MANAGER || item$first_dispatch ) { @@ -60,7 +59,7 @@ event Intel::new_item(item: Intel::Item) } } -event Intel::updated_item(item: Intel::Item) +event Intel::updated_item(item: Intel::Item) &priority=5 { # If this is the first time this item has been dispatched or this # is a manager, send it over the cluster. diff --git a/scripts/base/frameworks/intel/indexing.bro b/scripts/base/frameworks/intel/indexing.bro deleted file mode 100644 index a89ac44038..0000000000 --- a/scripts/base/frameworks/intel/indexing.bro +++ /dev/null @@ -1,68 +0,0 @@ -module Intel; - -export { - type Indexes: record { - hosts: set[addr] &default=set(); - strings: set[string, SubType] &default=set(); - }; - - redef record Plugin += { - index: function(item: Item) &optional; - } - - ## Rebuild indexes this interval after any change to data if there - ## have been no other changes. - const rebuild_indexes_min = 1min &redef; - ## Wait no longer than this interval to update indexes after any - ## change to the data. - const rebuild_indexes_max = 5min &redef; - - global indexing_done: event(); -} - -local indexes: Indexes = []; - -global last_index_rebuild = network_time(); -global last_datastore_mod = network_time(); - - -event reindex() &priority=5 - { - local tmp_indexes: Indexes; - for ( plugin in plugins ) - { - for ( m in metas$metas ) - { - add tmp_indexes$hosts[m$source]; - add tmp_indexes$strings[m$intent]; - - #for ( ip in index_plugins ) - # { - # ip$index(index, m); - # } - } - } - indexes = - event indexing_done(); - } - -event rebuild_indexes(triggered_at: time) - { - if ( network_time() - triggered_at >= rebuild_indexes_max || - network_time() - last_datastore_mod >= rebuild_indexes_min ) - { - reindex(); - } - } - -event Intel::new_item(item:: Item) &priority=5 - { - last_datastore_mod = network_time(); - schedule rebuild_indexes_min { rebuild_indexes(network_time()) }; - } - -event Intel::updated_item(item:: Item) &priority=5 - { - last_datastore_mod = network_time(); - schedule rebuild_indexes_min { rebuild_indexes(network_time()) }; - } \ No newline at end of file diff --git a/scripts/base/frameworks/intel/input.bro b/scripts/base/frameworks/intel/input.bro index 4776a0852e..fd2c0bae97 100644 --- a/scripts/base/frameworks/intel/input.bro +++ b/scripts/base/frameworks/intel/input.bro @@ -3,7 +3,9 @@ module Intel; export { - ## Files that will be read off disk + ## Intelligence files that will be read off disk. The files are + ## reread everytime they are updated so updates much be atomic with + ## "mv" instead of writing the file in place. const read_files: set[string] = {} &redef; } diff --git a/scripts/base/frameworks/intel/main.bro b/scripts/base/frameworks/intel/main.bro index 9d73915fb0..dbf40f637d 100644 --- a/scripts/base/frameworks/intel/main.bro +++ b/scripts/base/frameworks/intel/main.bro @@ -2,12 +2,6 @@ ##! and strings (with a str_type). Metadata can ##! also be associated with the intelligence like for making more informated ##! decisions about matching and handling of intelligence. -# -# TODO: -# Comments -# Better Intel::Item comparison (has_meta) -# Generate a notice when messed up data is discovered. -# Complete "net" support as an intelligence type. @load base/frameworks/notice @@ -24,7 +18,7 @@ export { ## String data needs to be further categoried since it could represent ## and number of types of data. - type SubType: enum { + type StrType: enum { ## A complete URL. URL, ## User-Agent string, typically HTTP or mail message body. @@ -41,39 +35,13 @@ export { ## Certificate hash. Normally for X.509 certificates from the SSL analyzer. CERT_HASH, }; - - ## Why a piece of intelligence is being added or looked up. The intent a human - ## placed upon the data when it was decided to be worthwhile as intelligence. - type Intent: enum { - ## Data is to be considered malicious. - MALICIOUS, - ## Data is to be considered sensitive. In many cases this may be - ## hosts containing contractually or legally restricted data such - ## as HIPPA, PCI, Sarbanes-Oxley, etc. - SENSITIVE, - ## Data that is never to be seen. This acts like the "canary in - ## the coal mine". A possibility could be file hashes for - ## critically important files. - CANARY, - ## Data that is whitelisted. The primary use for this intent is to - ## locally whitelist false positive data from external feeds. - WHITELIST, - }; - - ## Enum to represent where data came from when it was discovered. - type Where: enum { - ## A catchall value to represent data of unknown provenance. - ANYWHERE, - }; - + ## Data about an :bro:type:`Intel::Item` type MetaData: record { ## An arbitrary string value representing the data source. Typically, ## the convention for this field will be the source name and feed name ## separated by a hyphen. For example: "source1-c&c". source: string; - ## The intent of the data. - intent: Intent; ## A freeform description for the data. desc: string &optional; ## A URL for more information about the data. @@ -84,215 +52,144 @@ export { host: addr &optional; net: subnet &optional; str: string &optional; - str_type: SubType &optional; + str_type: StrType &optional; meta: MetaData; }; - type Found: record { + ## Enum to represent where data came from when it was discovered. + type Where: enum { + ## A catchall value to represent data of unknown provenance. + ANYWHERE, + }; + + type Seen: record { host: addr &optional; str: string &optional; - str_type: SubType &optional; + str_type: StrType &optional; where: Where; }; - type Info: record { - ts: time &log; - ## This value should be one of: "info", "warn", "error" - level: string &log; - message: string &log; - item: Item &log; + type PolicyItem: record { + pred: function(seen: Seen, item: Item): bool &optional; + + log_it: bool &default=T; }; - type Plugin: record { - index: function() &optional; - match: function(found: Found): bool &optional; - lookup: function(found: Found): set[Item] &optional; - }; - - ## Manipulation and query API functions. + ## Intelligence data manipulation functions. global insert: function(item: Item); global delete_item: function(item: Item): bool; - global unique_data: function(): count; ## Function to declare discovery of a piece of data in order to check ## it against known intelligence for matches. - global found_in_conn: function(c: connection, found: Found); + global seen_in_conn: function(c: connection, seen: Seen); - ## Event to represent a match happening in a connection. On clusters there - ## is no assurance as to where this event will be generated so don't - ## assume that arbitrary global state beyond the given data - ## will be available. - global match_in_conn: event(c: connection, found: Found, items: set[Item]); + ## Intelligence policy variable for handling matches. + const policy: set[PolicyItem] = {} &redef; - global find: function(found: Found): bool; - global lookup: function(found: Found): set[Item]; - - - ## Plugin API functions - global register_custom_matcher: function(str_type: SubType, - func: function(found: Found): bool); - global register_custom_lookup: function(str_type: SubType, - func: function(found: Found): set[Item]); - - ## API Events + ## API Events that indicate when various things happen internally within the + ## intelligence framework. global new_item: event(item: Item); global updated_item: event(item: Item); - global insert_event: event(item: Item); - - ## Optionally store metadata. This is primarily used internally depending on - ## if this is a cluster deployment or not. On clusters, workers probably - ## shouldn't be storing the full metadata. - const store_metadata = T &redef; } -# Internal handler for conn oriented matches with no metadata base on the store_metadata setting. -global match_in_conn_no_items: event(c: connection, found: Found); +## Event to represent a match happening in a connection. On clusters there +## is no assurance as to where this event will be generated so don't +## assume that arbitrary global state beyond the given data +## will be available. +global match_in_conn: event(c: connection, seen: Seen, items: set[Item]); + +# Internal handler for conn oriented matches with no metadata based on the have_full_data setting. +global match_in_conn_no_items: event(c: connection, seen: Seen); + +## Optionally store metadata. This is used internally depending on +## if this is a cluster deployment or not. +const have_full_data = T &redef; type DataStore: record { - host_data: table[addr] of set[MetaData]; - string_data: table[string, SubType] of set[MetaData]; + net_data: table[subnet] of set[MetaData]; + string_data: table[string, StrType] of set[MetaData]; }; global data_store: DataStore; -global custom_matchers: table[SubType] of set[function(found: Found): bool]; -global custom_lookup: table[SubType] of set[function(found: Found): set[Item]]; - - -event bro_init() &priority=5 +function find(seen: Seen): bool { - Log::create_stream(Intel::LOG, [$columns=Info]); - } - - -function find(found: Found): bool - { - if ( found?$host && found$host in data_store$host_data) + if ( seen?$host && + seen$host in data_store$net_data ) { return T; } - else if ( found?$str && found?$str_type && - [found$str, found$str_type] in data_store$string_data ) + else if ( seen?$str && seen?$str_type && + [seen$str, seen$str_type] in data_store$string_data ) { return T; } - - # Finder plugins! - for ( plugin in plugins ) + else { - if ( plugin?$match && plugin$match(found) ) - return T; + return F; } - - return F; } -function lookup(found: Found): set[Item] +function get_items(seen: Seen): set[Item] { local item: Item; local return_data: set[Item] = set(); - if ( found?$host ) + if ( ! have_full_data ) + { + # A reporter warning should be generated here because this function + # should never be called from a host that doesn't have the full data. + # TODO: do a reporter warning. + return return_data; + } + + if ( seen?$host ) { # See if the host is known about and it has meta values - if ( found$host in data_store$host_data ) + if ( seen$host in data_store$net_data ) { - for ( m in data_store$host_data[found$host] ) + for ( m in data_store$net_data[seen$host] ) { - item = [$host=found$host, $meta=m]; + # TODO: the lookup should be finding all and not just most specific + # and $host/$net should have the correct value. + item = [$host=seen$host, $meta=m]; add return_data[item]; } } } - else if ( found?$str && found?$str_type ) + else if ( seen?$str && seen?$str_type ) { # See if the string is known about and it has meta values - if ( [found$str, found$str_type] in data_store$string_data ) + if ( [seen$str, seen$str_type] in data_store$string_data ) { - for ( m in data_store$string_data[found$str, found$str_type] ) + for ( m in data_store$string_data[seen$str, seen$str_type] ) { - item = [$str=found$str, $str_type=found$str_type, $meta=m]; + item = [$str=seen$str, $str_type=seen$str_type, $meta=m]; add return_data[item]; } } - - # Check if there are any custom str_type lookup functions and add the values to - # the result set. - if ( found$str_type in custom_lookup ) - { - for ( lookup_func in custom_lookup[found$str_type] ) - { - # Iterating here because there is no way to merge sets generically. - for ( custom_lookup_item in lookup_func(found) ) - add return_data[custom_lookup_item]; - } - } - } - - - - # TODO: Later we should probably track whitelist matches. - # TODO: base this on a set instead of iterating the items. - for ( item in return_data ) - { - if ( item$meta$intent == WHITELIST ) - { - return set(); - } } return return_data; } -function Intel::found_in_conn(c: connection, found: Found) +function Intel::seen_in_conn(c: connection, seen: Seen) { - if ( find(found) ) + if ( find(seen) ) { - if ( store_metadata ) + if ( have_full_data ) { - local items = lookup(found); - event Intel::match_in_conn(c, found, items); + local items = get_items(seen); + event Intel::match_in_conn(c, seen, items); } else { - event Intel::match_in_conn_no_items(c, found); + event Intel::match_in_conn_no_items(c, seen); } } } -function register_custom_matcher(str_type: SubType, func: function(found: Found): bool) - { - if ( str_type !in custom_matchers ) - custom_matchers[str_type] = set(func); - else - add custom_matchers[str_type][func]; - } - -function register_custom_lookup(str_type: SubType, func: function(found: Found): set[Item]) - { - if ( str_type !in custom_lookup ) - custom_lookup[str_type] = set(func); - else - add custom_lookup[str_type][func]; - } - -function unique_data(): count - { - return |data_store$host_data| + |data_store$string_data|; - } - -#function get_meta(check: MetaData, metas: set[MetaData]): MetaData -# { -# local check_hash = md5_hash(check); -# for ( m in metas ) -# { -# if ( check_hash == md5_hash(m) ) -# return m; -# } -# -# return [$source=""]; -# } function has_meta(check: MetaData, metas: set[MetaData]): bool { @@ -309,35 +206,41 @@ function has_meta(check: MetaData, metas: set[MetaData]): bool function insert(item: Item) { - local err_msg = ""; - if ( item?$str && ! item?$str_type ) - err_msg = "You must provide a str_type for strings or this item doesn't make sense."; - - if ( err_msg == "" ) + if ( item?$str && !item?$str_type ) { - # Create and fill out the meta data item. - local meta = item$meta; - local metas: set[MetaData]; + event reporter_warning(network_time(), fmt("You must provide a str_type for strings or this item doesn't make sense. Item: %s", item), ""); + return; + } - if ( item?$host ) - { - if ( item$host !in data_store$host_data ) - data_store$host_data[item$host] = set(); - - metas = data_store$host_data[item$host]; - } - else if ( item?$str ) - { - if ( [item$str, item$str_type] !in data_store$string_data ) - data_store$string_data[item$str, item$str_type] = set(); + # Create and fill out the meta data item. + local meta = item$meta; + local metas: set[MetaData]; - metas = data_store$string_data[item$str, item$str_type]; - } - else - { - err_msg = "Malformed intelligence item"; - } + if ( item?$host ) + { + local host = mask_addr(item$host, is_v4_addr(item$host) ? 32 : 128); + if ( host !in data_store$net_data ) + data_store$net_data[host] = set(); + + metas = data_store$net_data[host]; + } + else if ( item?$net ) + { + if ( item$net !in data_store$net_data ) + data_store$net_data[item$net] = set(); + metas = data_store$net_data[item$net]; + } + else if ( item?$str ) + { + if ( [item$str, item$str_type] !in data_store$string_data ) + data_store$string_data[item$str, item$str_type] = set(); + + metas = data_store$string_data[item$str, item$str_type]; + } + + if ( have_full_data ) + { for ( m in metas ) { if ( meta$source == m$source ) @@ -349,6 +252,7 @@ function insert(item: Item) } else { + # Same source, different metadata means updated item. event Intel::updated_item(item); break; } @@ -359,19 +263,7 @@ function insert(item: Item) break; } } - add metas[item$meta]; - return; } - - if ( err_msg != "" ) - Log::write(Intel::LOG, [$ts=network_time(), $level="warn", $message=err_msg, $item=item]); - - return; } - -event insert_event(item: Item) - { - insert(item); - } - + \ No newline at end of file diff --git a/scripts/base/frameworks/intel/non-cluster b/scripts/base/frameworks/intel/non-cluster deleted file mode 100644 index dddf430966..0000000000 --- a/scripts/base/frameworks/intel/non-cluster +++ /dev/null @@ -1,3 +0,0 @@ - -module Intel; - diff --git a/scripts/base/frameworks/intel/plugins/dns_zones.bro b/scripts/base/frameworks/intel/plugins/dns_zones.bro deleted file mode 100644 index ba35b35421..0000000000 --- a/scripts/base/frameworks/intel/plugins/dns_zones.bro +++ /dev/null @@ -1,61 +0,0 @@ - -module Intel; - -export { - redef enum SubType += { - DNS_ZONE, - }; -} - -function dns_zone_ripper(found: Found): Found - { - local found_copy = copy(found); - - ## # We only support fourth level depth zones right now for performance. - ## if ( /(\.[^\.]+){4,}/ in found_copy$str ) - ## { - ## local parts = split_all(found_copy$str, /\./); - ## local len = |parts|; - ## found_copy$str = parts[len-6] + "." + parts[len-4] + "." + parts[len-2] + "." + parts[len]; - ## } - - # We can assume that we're getting a string and subtype because - # this function is only registered for DOMAIN and DNS_ZONE data. - local dns_name = sub(found_copy$str, /^[^\.]*\./, ""); - found_copy$str = dns_name; - # We are doing a literal search for a DNS zone at this point - found_copy$str_type = Intel::DNS_ZONE; - return found_copy; - } - -# This matcher extension adds additional matchers for domain names. -function dns_zone_matcher(found: Found): bool - { - local found_copy = dns_zone_ripper(found); - if ( found$str == found_copy$str ) - return F; - - return Intel::find(found_copy); - } - -function dns_zone_lookup(found: Found): set[Item] - { - local result_set: set[Item] = set(); - local found_copy = dns_zone_ripper(found); - if ( found$str == found_copy$str ) - return result_set; - - for ( item in Intel::lookup(found_copy) ) - add result_set[item]; - return result_set; - } - -event bro_init() &priority=10 - { - register_custom_matcher(DOMAIN, dns_zone_matcher); - register_custom_lookup(DOMAIN, dns_zone_lookup); - ## The DNS_ZONE subtype needs added because it's ultimately - ## a subset of DOMAIN and will need to be searched as well. - register_custom_matcher(DNS_ZONE, dns_zone_matcher); - register_custom_lookup(DNS_ZONE, dns_zone_lookup); - } diff --git a/scripts/base/frameworks/intel/plugins/set.bro b/scripts/base/frameworks/intel/plugins/set.bro deleted file mode 100644 index b169e91972..0000000000 --- a/scripts/base/frameworks/intel/plugins/set.bro +++ /dev/null @@ -1,19 +0,0 @@ -module Intel; - -redef record Intel::Indexes += { - hosts: set[addr] &default=set(); - strings: set[string, SubType] &default=set(); -}; - -redef plugins += { - [$index() = { - - }, - $match(found: Found): bool = { - - }, - $lookup(found: Found): set[Item] = { - - } - ] -}; \ No newline at end of file