From a4af46e1f43d67a2df1ebd78be7fd665ed391554 Mon Sep 17 00:00:00 2001 From: Seth Hall Date: Mon, 6 Aug 2012 09:34:14 -0400 Subject: [PATCH 01/22] Functional intelligence framework. - All 5 intelligence tests pass. - Some initial memory optimizations done. - More work needs done to reduce duplicate data in memory. - Input framework integration. - Define files to read in the "Bro intelligence format" in Intel::read_files. - Cluster transparency. - DNS Zones are a fully supported data type. - Queries for Intel::DOMAIN values will automatically check in DNS_ZONE intelligence. --- scripts/base/frameworks/intel/__load__.bro | 12 +- scripts/base/frameworks/intel/cluster.bro | 59 +++ scripts/base/frameworks/intel/input.bro | 28 ++ scripts/base/frameworks/intel/main.bro | 424 ++++++++++-------- .../frameworks/intel/plugins/dns_zones.bro | 53 +++ .../manager-1..stdout | 7 + .../worker-1..stdout | 7 + .../worker-2..stdout | 7 + .../out | 3 + .../out | 1 + .../out | 3 + .../out | 3 + .../frameworks/intel/cluster-transparency.bro | 44 ++ .../base/frameworks/intel/dns-zone-plugin.bro | 18 + .../base/frameworks/intel/input-and-match.bro | 36 ++ .../frameworks/intel/insert-and-matcher.bro | 34 -- .../base/frameworks/intel/item-merge.bro | 23 + .../base/frameworks/intel/matching.bro | 38 ++ 18 files changed, 580 insertions(+), 220 deletions(-) create mode 100644 scripts/base/frameworks/intel/cluster.bro create mode 100644 scripts/base/frameworks/intel/input.bro create mode 100644 scripts/base/frameworks/intel/plugins/dns_zones.bro create mode 100644 testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/manager-1..stdout create mode 100644 testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/worker-1..stdout create mode 100644 testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/worker-2..stdout create mode 100644 testing/btest/Baseline/scripts.base.frameworks.intel.dns-zone-plugin/out create mode 100644 testing/btest/Baseline/scripts.base.frameworks.intel.input-and-match/out create mode 100644 testing/btest/Baseline/scripts.base.frameworks.intel.item-merge/out create mode 100644 testing/btest/Baseline/scripts.base.frameworks.intel.matching/out create mode 100644 testing/btest/scripts/base/frameworks/intel/cluster-transparency.bro create mode 100644 testing/btest/scripts/base/frameworks/intel/dns-zone-plugin.bro create mode 100644 testing/btest/scripts/base/frameworks/intel/input-and-match.bro delete mode 100644 testing/btest/scripts/base/frameworks/intel/insert-and-matcher.bro create mode 100644 testing/btest/scripts/base/frameworks/intel/item-merge.bro create mode 100644 testing/btest/scripts/base/frameworks/intel/matching.bro diff --git a/scripts/base/frameworks/intel/__load__.bro b/scripts/base/frameworks/intel/__load__.bro index d551be57d3..c15efa2f1d 100644 --- a/scripts/base/frameworks/intel/__load__.bro +++ b/scripts/base/frameworks/intel/__load__.bro @@ -1 +1,11 @@ -@load ./main \ No newline at end of file +@load ./main +@load ./input + +# The cluster framework must be loaded first. +@load base/frameworks/cluster + +@if ( Cluster::is_enabled() ) +@load ./cluster +@endif + +@load ./plugins/dns_zones diff --git a/scripts/base/frameworks/intel/cluster.bro b/scripts/base/frameworks/intel/cluster.bro new file mode 100644 index 0000000000..b9fea57ca0 --- /dev/null +++ b/scripts/base/frameworks/intel/cluster.bro @@ -0,0 +1,59 @@ +##! Cluster transparency support for the intelligence framework. This is mostly oriented +##! toward distributing intelligence information across clusters. + +@load base/frameworks/cluster + +module Intel; + +export { + global cluster_new_item: event(item: Item); + global cluster_updated_item: event(item: Item); + + redef record Item += { + ## This field is solely used internally for cluster transparency with + ## the intelligence framework to avoid storms of intelligence data + ## swirling forever. It allows data to propagate only a single time. + first_dispatch: bool &default=T; + }; +} + +# Primary intelligence distribution comes from manager. +redef Cluster::manager2worker_events += /Intel::cluster_(new|updated)_item/; +# If a worker finds intelligence and adds it, it should share it back to the manager. +redef Cluster::worker2manager_events += /Intel::cluster_(new|updated)_item/; + +event Intel::cluster_new_item(item: Intel::Item) + { + # Ignore locally generated events. + if ( is_remote_event() ) + Intel::insert(item); + } + +event Intel::cluster_updated_item(item: Intel::Item) + { + # Ignore locally generated events. + if ( is_remote_event() ) + Intel::insert(item); + } + +event Intel::new_item(item: Intel::Item) + { + # If this is the first time this item has been dispatched, + # send it over the cluster. + if ( item$first_dispatch ) + { + item$first_dispatch = F; + event Intel::cluster_new_item(item); + } + } + +event Intel::updated_item(item: Intel::Item) + { + # If this is the first time this item has been dispatched, + # send it over the cluster. + if ( item$first_dispatch ) + { + item$first_dispatch = F; + event Intel::cluster_updated_item(item); + } + } diff --git a/scripts/base/frameworks/intel/input.bro b/scripts/base/frameworks/intel/input.bro new file mode 100644 index 0000000000..08ca3992eb --- /dev/null +++ b/scripts/base/frameworks/intel/input.bro @@ -0,0 +1,28 @@ +@load ./main + +module Intel; + +export { + ## Files that will be read off disk + const read_files: set[string] = {} &redef; + + global entry: event(desc: Input::EventDescription, tpe: Input::Event, item: Intel::Item); +} + +event Intel::entry(desc: Input::EventDescription, tpe: Input::Event, item: Intel::Item) + { + Intel::insert(item); + } + +event bro_init() &priority=5 + { + for ( a_file in read_files ) + { + Input::add_event([$source=a_file, + $reader=Input::READER_ASCII, + $mode=Input::REREAD, + $name=cat("intel-", a_file), + $fields=Intel::Item, + $ev=Intel::entry]); + } + } diff --git a/scripts/base/frameworks/intel/main.bro b/scripts/base/frameworks/intel/main.bro index 9ee1c75100..72fbd5c18e 100644 --- a/scripts/base/frameworks/intel/main.bro +++ b/scripts/base/frameworks/intel/main.bro @@ -1,31 +1,19 @@ ##! The intelligence framework provides a way to store and query IP addresses, -##! strings (with a subtype), and numeric (with a subtype) data. Metadata -##! also be associated with the intelligence like tags which are arbitrary -##! strings, time values, and longer descriptive strings. - -# Example string subtypes: -# url -# email -# domain -# software -# user_name -# file_name -# file_md5 -# x509_md5 - -# Example tags: -# infrastructure -# malicious -# sensitive -# canary -# friend +##! and strings (with a subtype). Metadata can +##! also be associated with the intelligence like for making more informated +##! decisions about matching and handling of intelligence. +# +# TODO: +# Comments +# Better Intel::Item comparison (same_meta) +# Generate a notice when messed up data is discovered. +# Complete "net" support as an intelligence type. @load base/frameworks/notice module Intel; export { - ## The intel logging stream identifier. redef enum Log::ID += { LOG }; redef enum Notice::Type += { @@ -34,158 +22,171 @@ export { Detection, }; - ## Record type used for logging information from the intelligence framework. - ## Primarily for problems or oddities with inserting and querying data. - ## This is important since the content of the intelligence framework can - ## change quite dramatically during runtime and problems may be introduced - ## into the data. + type Classification: enum { + MALICIOUS, + INFRASTRUCTURE, + SENSITIVE, + FRIEND, + CANARY, + WHITELIST, + }; + + type SubType: enum { + URL, + EMAIL, + DOMAIN, + USER_NAME, + FILE_HASH, # (non hash type specific, md5, sha1, sha256) + CERT_HASH, + ASN, + }; + type Info: record { - ## The current network time. ts: time &log; - ## Represents the severity of the message. ## This value should be one of: "info", "warn", "error" level: string &log; - ## The message. message: string &log; }; - ## Record to represent metadata associated with a single piece of - ## intelligence. type MetaData: record { - ## A description for the data. + source: string; + class: Classification; desc: string &optional; - ## A URL where more information may be found about the intelligence. url: string &optional; - ## The time at which the data was first declared to be intelligence. - first_seen: time &optional; - ## When this data was most recent inserted into the framework. - latest_seen: time &optional; - ## Arbitrary text tags for the data. - tags: set[string]; + tags: set[string] &optional; }; - ## Record to represent a singular piece of intelligence. type Item: record { - ## If the data is an IP address, this hold the address. - ip: addr &optional; - ## If the data is textual, this holds the text. - str: string &optional; - ## If the data is numeric, this holds the number. - num: int &optional; - ## The subtype of the data for when either the $str or $num fields are - ## given. If one of those fields are given, this field must be present. - subtype: string &optional; + ip: addr &optional; + net: subnet &optional; + + str: string &optional; + subtype: SubType &optional; - ## The next five fields are temporary until a better model for - ## attaching metadata to an intelligence item is created. - desc: string &optional; - url: string &optional; - first_seen: time &optional; - latest_seen: time &optional; - tags: set[string]; - - ## These single string tags are throw away until pybroccoli supports sets. - tag1: string &optional; - tag2: string &optional; - tag3: string &optional; + meta: MetaData; }; - - ## Record model used for constructing queries against the intelligence - ## framework. - type QueryItem: record { - ## If an IP address is being queried for, this field should be given. - ip: addr &optional; - ## If a string is being queried for, this field should be given. - str: string &optional; - ## If numeric data is being queried for, this field should be given. - num: int &optional; - ## If either a string or number is being queried for, this field should - ## indicate the subtype of the data. - subtype: string &optional; + + type Query: record { + ip: addr &optional; + + str: string &optional; + subtype: SubType &optional; - ## A set of tags where if a single metadata record attached to an item - ## has any one of the tags defined in this field, it will match. - or_tags: set[string] &optional; - ## A set of tags where a single metadata record attached to an item - ## must have all of the tags defined in this field. - and_tags: set[string] &optional; + class: Classification &optional; + + or_tags: set[string] &optional; + and_tags: set[string] &optional; ## The predicate can be given when searching for a match. It will - ## be tested against every :bro:type:`Intel::MetaData` item associated - ## with the data being matched on. If it returns T a single time, the - ## matcher will consider that the item has matched. This field can - ## be used for constructing arbitrarily complex queries that may not - ## be possible with the $or_tags or $and_tags fields. - pred: function(meta: Intel::MetaData): bool &optional; + ## be tested against every :bro:type:`MetaData` item associated with + ## the data being matched on. If it returns T a single time, the + ## matcher will consider that the item has matched. + pred: function(meta: Intel::Item): bool &optional; }; - ## Function to insert data into the intelligence framework. - ## - ## item: The data item. - ## - ## Returns: T if the data was successfully inserted into the framework, - ## otherwise it returns F. + type Importer: enum { + NULL_IMPORTER + }; + global insert: function(item: Item): bool; - - ## A wrapper for the :bro:id:`Intel::insert` function. This is primarily - ## used as the external API for inserting data into the intelligence - ## using Broccoli. global insert_event: event(item: Item); - - ## Function for matching data within the intelligence framework. - global matcher: function(item: QueryItem): bool; + global delete_item: function(item: Item): bool; + + global matcher: function(query: Query): bool; + global lookup: function(query: Query): set[Item]; + + global register_custom_matcher: function(subtype: SubType, + func: function(query: Query): bool); + global register_custom_lookup: function(subtype: SubType, + func: function(query: Query): set[Item]); + + global new_item: event(item: Item); + global updated_item: event(item: Item); } -type MetaDataStore: table[count] of MetaData; +## Store collections of :bro:type:`MetaData` records indexed by a source name. +type IndexedItems: table[string, Classification] of MetaData; type DataStore: record { - ip_data: table[addr] of MetaDataStore; - # The first string is the actual value and the second string is the subtype. - string_data: table[string, string] of MetaDataStore; - int_data: table[int, string] of MetaDataStore; + ip_data: table[addr] of IndexedItems; + string_data: table[string, SubType] of IndexedItems; }; global data_store: DataStore; -event bro_init() +global custom_matchers: table[SubType] of set[function(query: Query): bool]; +global custom_lookup: table[SubType] of set[function(query: Query): set[Item]]; + +event bro_init() &priority=5 { Log::create_stream(Intel::LOG, [$columns=Info]); } +function register_custom_matcher(subtype: SubType, func: function(query: Query): bool) + { + if ( subtype !in custom_matchers ) + custom_matchers[subtype] = set(); + add custom_matchers[subtype][func]; + } + +function register_custom_lookup(subtype: SubType, func: function(query: Query): set[Item]) + { + if ( subtype !in custom_lookup ) + custom_lookup[subtype] = set(); + add custom_lookup[subtype][func]; + } + + + +function same_meta(meta1: MetaData, meta2: MetaData): bool + { + # "any" type values can't be compared so this generic implementation doesn't work. + #local rf1 = record_fields(item1); + #local rf2 = record_fields(item2); + #for ( field in rf1 ) + # { + # if ( ((rf1[field]?$value && rf1[field]?$value) && + # rf1[field]$value != rf2[field]$value) || + # ! (rf1[field]?$value && rf1[field]?$value) ) + # return F; + # } + + if ( meta1$source == meta2$source && + meta1$class == meta2$class && + ((!meta1?$desc && !meta2?$desc) || (meta1?$desc && meta2?$desc && meta1$desc == meta2$desc)) && + ((!meta1?$url && !meta2?$url) || (meta1?$url && meta2?$url && meta1$url == meta2$url)) && + ((!meta1?$tags && !meta2?$tags) || (meta1?$tags && meta2?$tags && |meta1$tags| == |meta2$tags|)) ) + { + # TODO: match on all of the tag values + return T; + } + + # The records must not be equivalent if we made it this far. + return F; + } function insert(item: Item): bool { local err_msg = ""; - if ( (item?$str || item?$num) && ! item?$subtype ) - err_msg = "You must provide a subtype to insert_sync or this item doesn't make sense."; + if ( item?$str && ! item?$subtype ) + err_msg = "You must provide a subtype for strings or this item doesn't make sense."; if ( err_msg == "" ) { # Create and fill out the meta data item. - local meta: MetaData; - if ( item?$first_seen ) - meta$first_seen = item$first_seen; - if ( item?$latest_seen ) - meta$latest_seen = item$latest_seen; - if ( item?$tags ) - meta$tags = item$tags; - if ( item?$desc ) - meta$desc = item$desc; - if ( item?$url ) - meta$url = item$url; - - - # This is hopefully only temporary until pybroccoli supports sets. - if ( item?$tag1 ) - add item$tags[item$tag1]; - if ( item?$tag2 ) - add item$tags[item$tag2]; - if ( item?$tag3 ) - add item$tags[item$tag3]; - + local meta = item$meta; + if ( item?$ip ) { if ( item$ip !in data_store$ip_data ) data_store$ip_data[item$ip] = table(); - data_store$ip_data[item$ip][|data_store$ip_data[item$ip]|] = meta; + + if ( [meta$source, meta$class] !in data_store$ip_data[item$ip] ) + event Intel::new_item(item); + else if ( ! same_meta(data_store$ip_data[item$ip][meta$source, meta$class], meta) ) + event Intel::updated_item(item); + else + return F; + + data_store$ip_data[item$ip][meta$source, meta$class] = item$meta; return T; } else if ( item?$str ) @@ -193,15 +194,14 @@ function insert(item: Item): bool if ( [item$str, item$subtype] !in data_store$string_data ) data_store$string_data[item$str, item$subtype] = table(); - data_store$string_data[item$str, item$subtype][|data_store$string_data[item$str, item$subtype]|] = meta; - return T; - } - else if ( item?$num ) - { - if ( [item$num, item$subtype] !in data_store$int_data ) - data_store$int_data[item$num, item$subtype] = table(); + if ( [meta$source, meta$class] !in data_store$string_data[item$str, item$subtype] ) + event Intel::new_item(item); + else if ( ! same_meta(data_store$string_data[item$str, item$subtype][meta$source, meta$class], meta) ) + event Intel::updated_item(item); + else + return F; - data_store$int_data[item$num, item$subtype][|data_store$int_data[item$num, item$subtype]|] = meta; + data_store$string_data[item$str, item$subtype][meta$source, meta$class] = item$meta; return T; } else @@ -217,107 +217,161 @@ event insert_event(item: Item) { insert(item); } - -function match_item_with_metadata(item: QueryItem, meta: MetaData): bool + +function match_item_with_query(item: Item, query: Query): bool { - if ( item?$and_tags ) + if ( ! query?$and_tags && ! query?$or_tags && ! query?$pred ) + return T; + + if ( query?$and_tags ) { local matched = T; # Every tag given has to match in a single MetaData entry. - for ( tag in item$and_tags ) + for ( tag in query$and_tags ) { - if ( tag !in meta$tags ) + if ( item$meta?$tags && tag !in item$meta$tags ) matched = F; } if ( matched ) return T; } - else if ( item?$or_tags ) + else if ( query?$or_tags ) { # For OR tags, only a single tag has to match. - for ( tag in item$or_tags ) + for ( tag in query$or_tags ) { - if ( tag in meta$tags ) + if ( item$meta?$tags && tag in item$meta$tags ) return T; } } - else if ( item?$pred ) - return item$pred(meta); + else if ( query?$pred ) + return query$pred(item); # This indicates some sort of failure in the query return F; } -function matcher(item: QueryItem): bool +function lookup(query: Query): set[Item] + { + local meta: MetaData; + local item: Item; + local return_data: set[Item] = set(); + + if ( query?$ip ) + { + if ( query$ip in data_store$ip_data ) + { + for ( [source, class] in data_store$ip_data[query$ip] ) + { + meta = data_store$ip_data[query$ip][source, class]; + item = [$ip=query$ip,$meta=meta]; + if ( match_item_with_query(item, query) ) + add return_data[item]; + } + } + } + + else if ( query?$str ) + { + if ( [query$str, query$subtype] in data_store$string_data ) + { + for ( [source, class] in data_store$string_data[query$str, query$subtype] ) + { + meta = data_store$string_data[query$str, query$subtype][source, class]; + item = [$str=query$str,$subtype=query$subtype,$meta=meta]; + if ( match_item_with_query(item, query) ) + add return_data[item]; + } + } + + # Check if there are any custom subtype lookup functons and add the values to + # the result set. + if ( query$subtype in custom_lookup ) + { + for ( lookup_func in custom_lookup[query$subtype] ) + { + # Iterating here because there is no way to merge sets generically. + for ( custom_lookup_item in lookup_func(query) ) + add return_data[custom_lookup_item]; + } + } + } + + return return_data; + } + + +function matcher(query: Query): bool { local err_msg = ""; - if ( ! (item?$ip || item?$str || item?$num) ) - err_msg = "You must supply one of the $ip, $str, or $num fields to search on"; - else if ( (item?$or_tags || item?$and_tags) && item?$pred ) + if ( (query?$or_tags || query?$and_tags) && query?$pred ) err_msg = "You can't match with both tags and a predicate."; - else if ( item?$or_tags && item?$and_tags ) + else if ( query?$or_tags && query?$and_tags ) err_msg = "You can't match with both OR'd together tags and AND'd together tags"; - else if ( (item?$str || item?$num) && ! item?$subtype ) - err_msg = "You must provide a subtype to matcher or this item doesn't make sense."; - else if ( item?$str && item?$num ) - err_msg = "You must only provide $str or $num, not both."; + else if ( query?$str && ! query?$subtype ) + err_msg = "You must provide a subtype to matcher or this query doesn't make sense."; + local item: Item; local meta: MetaData; if ( err_msg == "" ) { - if ( item?$ip ) + if ( query?$ip ) { - if ( item$ip in data_store$ip_data ) + if ( query$ip in data_store$ip_data ) { - if ( ! item?$and_tags && ! item?$or_tags && ! item?$pred ) + if ( ! query?$and_tags && ! query?$or_tags && ! query?$pred ) return T; - - for ( i in data_store$ip_data[item$ip] ) + + for ( [source, class] in data_store$ip_data[query$ip] ) { - meta = data_store$ip_data[item$ip][i]; - if ( match_item_with_metadata(item, meta) ) + meta = data_store$ip_data[query$ip][source, class]; + item = [$ip=query$ip,$meta=meta]; + if ( match_item_with_query(item, query) ) return T; } } } - else if ( item?$str ) + else if ( query?$str ) { - if ( [item$str, item$subtype] in data_store$string_data ) + if ( [query$str, query$subtype] in data_store$string_data ) { - if ( ! item?$and_tags && ! item?$or_tags && ! item?$pred ) + if ( ! query?$and_tags && ! query?$or_tags && ! query?$pred ) return T; - for ( i in data_store$string_data[item$str, item$subtype] ) + for ( [source, class] in data_store$string_data[query$str, query$subtype] ) { - meta = data_store$string_data[item$str, item$subtype][i]; - if ( match_item_with_metadata(item, meta) ) + meta = data_store$string_data[query$str, query$subtype][source, class]; + item = [$str=query$str,$subtype=query$subtype,$meta=meta]; + if ( match_item_with_query(item, query) ) + return T; + } + } + + # Check if there are any custom subtype matchers in case we haven't matched yet. + if ( query$subtype in custom_matchers ) + { + for ( match_func in custom_matchers[query$subtype] ) + { + if ( match_func(query) ) return T; } } } - - else if ( item?$num ) - { - if ( [item$num, item$subtype] in data_store$int_data ) - { - if ( ! item?$and_tags && ! item?$or_tags && ! item?$pred ) - return T; - for ( i in data_store$int_data[item$num, item$subtype] ) - { - meta = data_store$int_data[item$num, item$subtype][i]; - if ( match_item_with_metadata(item, meta) ) - return T; - } - } - } else - err_msg = "Failed to query intelligence data for some unknown reason."; + err_msg = "You must supply one of the $ip or $str fields to search on"; } if ( err_msg != "" ) Log::write(Intel::LOG, [$ts=network_time(), $level="error", $message=fmt(err_msg)]); return F; } + +module GLOBAL; + +function INTEL(item: Intel::Query): bool + { + return Intel::matcher(item); + } \ No newline at end of file diff --git a/scripts/base/frameworks/intel/plugins/dns_zones.bro b/scripts/base/frameworks/intel/plugins/dns_zones.bro new file mode 100644 index 0000000000..3f1c30ef3d --- /dev/null +++ b/scripts/base/frameworks/intel/plugins/dns_zones.bro @@ -0,0 +1,53 @@ + +module Intel; + +export { + redef enum SubType += { + DNS_ZONE, + }; +} + +function dns_zone_ripper(query: Query): Query + { + local query_copy = copy(query); + # We can assume that we're getting a string and subtype because + # this function is only registered for DOMAIN and DNS_ZONE data. + local dns_name = sub(query_copy$str, /^[^\.]*\./, ""); + query_copy$str = dns_name; + # We are doing a literal search for a DNS zone at this point + query_copy$subtype = Intel::DNS_ZONE; + return query_copy; + } + +# This matcher extension adds additional matchers for domain names. +function dns_zone_matcher(query: Query): bool + { + local query_copy = dns_zone_ripper(query); + if ( query$str == query_copy$str ) + return F; + + return Intel::matcher(query_copy); + } + +function dns_zone_lookup(query: Query): set[Item] + { + local result_set: set[Item] = set(); + local query_copy = dns_zone_ripper(query); + if ( query$str == query_copy$str ) + return result_set; + + for ( item in Intel::lookup(query_copy) ) + add result_set[item]; + return result_set; + } + +event bro_init() &priority=10 + { + register_custom_matcher(DOMAIN, dns_zone_matcher); + # The DNS_ZONE subtype needs added because it's ultimately + # a subset of DOMAIN and will need to be searched as well. + register_custom_matcher(DNS_ZONE, dns_zone_matcher); + + register_custom_lookup(DOMAIN, dns_zone_lookup); + register_custom_lookup(DNS_ZONE, dns_zone_lookup); + } diff --git a/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/manager-1..stdout b/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/manager-1..stdout new file mode 100644 index 0000000000..59d996c821 --- /dev/null +++ b/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/manager-1..stdout @@ -0,0 +1,7 @@ +1.2.3.4 +{ +b, +c, +a +} +foobar diff --git a/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/worker-1..stdout b/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/worker-1..stdout new file mode 100644 index 0000000000..59d996c821 --- /dev/null +++ b/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/worker-1..stdout @@ -0,0 +1,7 @@ +1.2.3.4 +{ +b, +c, +a +} +foobar diff --git a/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/worker-2..stdout b/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/worker-2..stdout new file mode 100644 index 0000000000..59d996c821 --- /dev/null +++ b/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/worker-2..stdout @@ -0,0 +1,7 @@ +1.2.3.4 +{ +b, +c, +a +} +foobar diff --git a/testing/btest/Baseline/scripts.base.frameworks.intel.dns-zone-plugin/out b/testing/btest/Baseline/scripts.base.frameworks.intel.dns-zone-plugin/out new file mode 100644 index 0000000000..1eb51e2701 --- /dev/null +++ b/testing/btest/Baseline/scripts.base.frameworks.intel.dns-zone-plugin/out @@ -0,0 +1,3 @@ +It matched! +bad.com +Intel::DNS_ZONE diff --git a/testing/btest/Baseline/scripts.base.frameworks.intel.input-and-match/out b/testing/btest/Baseline/scripts.base.frameworks.intel.input-and-match/out new file mode 100644 index 0000000000..f3e4cf8e60 --- /dev/null +++ b/testing/btest/Baseline/scripts.base.frameworks.intel.input-and-match/out @@ -0,0 +1 @@ +Matched it! diff --git a/testing/btest/Baseline/scripts.base.frameworks.intel.item-merge/out b/testing/btest/Baseline/scripts.base.frameworks.intel.item-merge/out new file mode 100644 index 0000000000..c3220cd40c --- /dev/null +++ b/testing/btest/Baseline/scripts.base.frameworks.intel.item-merge/out @@ -0,0 +1,3 @@ +Number of matching intel items: 2 (should be 2) +Number of matching intel items: 2 (should still be 2) +Number of matching intel items: 3 (should be 3) diff --git a/testing/btest/Baseline/scripts.base.frameworks.intel.matching/out b/testing/btest/Baseline/scripts.base.frameworks.intel.matching/out new file mode 100644 index 0000000000..71fec4e23c --- /dev/null +++ b/testing/btest/Baseline/scripts.base.frameworks.intel.matching/out @@ -0,0 +1,3 @@ +VALID +VALID +VALID diff --git a/testing/btest/scripts/base/frameworks/intel/cluster-transparency.bro b/testing/btest/scripts/base/frameworks/intel/cluster-transparency.bro new file mode 100644 index 0000000000..3c21946938 --- /dev/null +++ b/testing/btest/scripts/base/frameworks/intel/cluster-transparency.bro @@ -0,0 +1,44 @@ +# @TEST-SERIALIZE: comm +# +# @TEST-EXEC: btest-bg-run manager-1 BROPATH=$BROPATH:.. CLUSTER_NODE=manager-1 bro %INPUT +# @TEST-EXEC: btest-bg-run worker-1 BROPATH=$BROPATH:.. CLUSTER_NODE=worker-1 bro %INPUT +# @TEST-EXEC: btest-bg-run worker-2 BROPATH=$BROPATH:.. CLUSTER_NODE=worker-2 bro %INPUT +# @TEST-EXEC: btest-bg-wait -k 3 +# @TEST-EXEC: btest-diff manager-1/.stdout +# @TEST-EXEC: btest-diff worker-1/.stdout +# @TEST-EXEC: btest-diff worker-2/.stdout + +@TEST-START-FILE cluster-layout.bro +redef Cluster::nodes = { + ["manager-1"] = [$node_type=Cluster::MANAGER, $ip=127.0.0.1, $p=37757/tcp, $workers=set("worker-1", "worker-2")], + ["worker-1"] = [$node_type=Cluster::WORKER, $ip=127.0.0.1, $p=37760/tcp, $manager="manager-1",$interface="eth0"], + ["worker-2"] = [$node_type=Cluster::WORKER, $ip=127.0.0.1, $p=37761/tcp, $manager="manager-1",$interface="eth1"], +}; +@TEST-END-FILE + +event remote_connection_handshake_done(p: event_peer) + { + # Insert the data once both workers are connected. + if ( Cluster::local_node_type() == Cluster::MANAGER && Cluster::worker_count == 2 ) + { + Intel::insert([$ip=1.2.3.4,$meta=[$source="foobar", $class=Intel::MALICIOUS, $tags=set("a","b","c")]]); + } + } + +event remote_connection_closed(p: event_peer) + { + if ( Cluster::local_node_type() == Cluster::MANAGER && Cluster::worker_count == 0 ) + terminate_communication(); + } + +# This should print out a single time on the manager and each worker +# due to the cluster transparency. +event Intel::new_item(item: Intel::Item) + { + print item$ip; + print item$meta$tags; + print item$meta$source; + + if ( Cluster::local_node_type() == Cluster::WORKER ) + terminate_communication(); + } \ No newline at end of file diff --git a/testing/btest/scripts/base/frameworks/intel/dns-zone-plugin.bro b/testing/btest/scripts/base/frameworks/intel/dns-zone-plugin.bro new file mode 100644 index 0000000000..8bcbc0ec7b --- /dev/null +++ b/testing/btest/scripts/base/frameworks/intel/dns-zone-plugin.bro @@ -0,0 +1,18 @@ +# @TEST-EXEC: bro %INPUT >out +# @TEST-EXEC: btest-diff out + +event bro_init() + { + Intel::insert([$str="bad.com", $subtype=Intel::DNS_ZONE, $meta=[$source="src1", $class=Intel::MALICIOUS]]); + local query: Intel::Query = [$str="some.host.bad.com", $subtype=Intel::DOMAIN, $class=Intel::MALICIOUS]; + if ( Intel::matcher(query) ) + { + print "It matched!"; + local items = Intel::lookup(query); + for ( item in items ) + { + print item$str; + print item$subtype; + } + } + } diff --git a/testing/btest/scripts/base/frameworks/intel/input-and-match.bro b/testing/btest/scripts/base/frameworks/intel/input-and-match.bro new file mode 100644 index 0000000000..213520442a --- /dev/null +++ b/testing/btest/scripts/base/frameworks/intel/input-and-match.bro @@ -0,0 +1,36 @@ +# @TEST-EXEC: bro %INPUT >out +# @TEST-EXEC: btest-diff out + +@TEST-START-FILE intel.dat +#fields ip net str subtype meta.source meta.class meta.desc meta.url meta.tags +1.2.3.4 - - - source1 Intel::MALICIOUS this host is just plain baaad http://some-data-distributor.com/1234 foo,bar +1.2.3.4 - - - source1 Intel::MALICIOUS this host is just plain baaad http://some-data-distributor.com/1234 foo,bar +- - e@mail.com Intel::EMAIL source1 Intel::MALICIOUS Phishing email source http://some-data-distributor.com/100000 - +@TEST-END-FILE + +@load frameworks/communication/listen + +redef Intel::read_files += { "intel.dat" }; + +event do_it(allowed_loops: count) + { + if ( Intel::matcher([$str="e@mail.com", $subtype=Intel::EMAIL, $class=Intel::MALICIOUS]) && + Intel::matcher([$ip=1.2.3.4, $class=Intel::MALICIOUS]) ) + { + # Once the match happens a single time we print and shutdown. + print "Matched it!"; + terminate_communication(); + return; + } + + if ( allowed_loops > 0 ) + schedule 100msecs { do_it(allowed_loops-1) }; + else + terminate_communication(); + } + + +event bro_init() + { + event do_it(20); + } diff --git a/testing/btest/scripts/base/frameworks/intel/insert-and-matcher.bro b/testing/btest/scripts/base/frameworks/intel/insert-and-matcher.bro deleted file mode 100644 index 67e539c176..0000000000 --- a/testing/btest/scripts/base/frameworks/intel/insert-and-matcher.bro +++ /dev/null @@ -1,34 +0,0 @@ -# -# @TEST-EXEC: bro %INPUT >out -# @TEST-EXEC: btest-diff out - -event bro_init() - { - Intel::insert([$ip=1.2.3.4, $tags=set("zeustracker.abuse.ch", "malicious")]); - Intel::insert([$str="http://www.google.com/", $subtype="url", $tags=set("infrastructure", "google")]); - Intel::insert([$str="Ab439G32F...", $subtype="x509_cert", $tags=set("bad")]); - Intel::insert([$str="Ab439G32F...", $tags=set("bad")]); - } - -event bro_done() - { - local orig_h = 1.2.3.4; - - if ( Intel::matcher([$ip=orig_h, $and_tags=set("malicious")]) ) - print "VALID"; - - if ( Intel::matcher([$ip=orig_h, $and_tags=set("don't match")]) ) - print "INVALID"; - - if ( Intel::matcher([$ip=orig_h, $pred=function(meta: Intel::MetaData): bool { return T; } ]) ) - print "VALID"; - - if ( Intel::matcher([$ip=orig_h, $pred=function(meta: Intel::MetaData): bool { return F; } ]) ) - print "INVALID"; - - if ( Intel::matcher([$str="http://www.google.com/", $subtype="url", $tags=set("google")]) ) - print "VALID"; - - if ( Intel::matcher([$str="http://www.example.com", $subtype="url"]) ) - print "INVALID"; - } diff --git a/testing/btest/scripts/base/frameworks/intel/item-merge.bro b/testing/btest/scripts/base/frameworks/intel/item-merge.bro new file mode 100644 index 0000000000..cf59b638de --- /dev/null +++ b/testing/btest/scripts/base/frameworks/intel/item-merge.bro @@ -0,0 +1,23 @@ +# @TEST-EXEC: bro %INPUT >out +# @TEST-EXEC: btest-diff out + +event bro_init() + { + Intel::insert([$ip=1.2.3.4, $meta=[$source="source1-feed1", $class=Intel::MALICIOUS, $tags=set("foo")]]); + Intel::insert([$ip=1.2.3.4, $meta=[$source="source2-special-sauce", $class=Intel::MALICIOUS, $tags=set("foo","bar")]]); + + # Lookup should return the items matching the query. + local items = Intel::lookup([$ip=1.2.3.4]); + print fmt("Number of matching intel items: %d (should be 2)", |items|); + + # This can be considered an update of a previous value since the + # data, source, and class are the matching points for determining sameness. + Intel::insert([$ip=1.2.3.4, $meta=[$source="source2-special-sauce", $class=Intel::MALICIOUS, $tags=set("foobar", "testing")]]); + items = Intel::lookup([$ip=1.2.3.4]); + print fmt("Number of matching intel items: %d (should still be 2)", |items|); + + # This is a new value. + Intel::insert([$ip=1.2.3.4, $meta=[$source="source3", $class=Intel::MALICIOUS]]); + items = Intel::lookup([$ip=1.2.3.4]); + print fmt("Number of matching intel items: %d (should be 3)", |items|); + } diff --git a/testing/btest/scripts/base/frameworks/intel/matching.bro b/testing/btest/scripts/base/frameworks/intel/matching.bro new file mode 100644 index 0000000000..79bf599c96 --- /dev/null +++ b/testing/btest/scripts/base/frameworks/intel/matching.bro @@ -0,0 +1,38 @@ +# +# @TEST-EXEC: bro %INPUT >out +# @TEST-EXEC: btest-diff out + +event bro_init() + { + Intel::insert([$ip=1.2.3.4, $meta=[$source="zeus-tracker", $class=Intel::MALICIOUS, $tags=set("example-tag1", "example-tag2")]]); + Intel::insert([$str="http://www.google.com/", $subtype=Intel::URL, $meta=[$source="source2", $class=Intel::MALICIOUS, $tags=set("infrastructure", "google")]]); + } + +event bro_done() + { + local orig_h = 1.2.3.4; + + if ( Intel::matcher([$ip=orig_h, $and_tags=set("example-tag1", "example-tag2")]) ) + print "VALID"; + + if ( Intel::matcher([$ip=orig_h, $and_tags=set("don't match")]) ) + print "INVALID"; + + if ( Intel::matcher([$ip=orig_h, $pred=function(meta: Intel::Item): bool { return T; } ]) ) + print "VALID"; + + if ( Intel::matcher([$ip=4.3.2.1, $pred=function(meta: Intel::Item): bool { return T; } ]) ) + print "INVALID"; + + if ( Intel::matcher([$ip=orig_h, $pred=function(meta: Intel::Item): bool { return F; } ]) ) + print "INVALID"; + + if ( Intel::matcher([$str="http://www.google.com/", $subtype=Intel::URL, $and_tags=set("google")]) ) + print "VALID"; + + if ( Intel::matcher([$str="http://www.google.com/", $subtype=Intel::URL, $and_tags=set("woah")]) ) + print "INVALID"; + + if ( Intel::matcher([$str="http://www.example.com", $subtype=Intel::URL]) ) + print "INVALID"; + } From 50e319a417d351891e34bbc034e88c0e57cfbf2c Mon Sep 17 00:00:00 2001 From: Seth Hall Date: Wed, 26 Sep 2012 10:08:48 -0400 Subject: [PATCH 02/22] Checkpoint commit. This is all a huge mess right now. :) --- scripts/base/frameworks/intel/__load__.bro | 4 + scripts/base/frameworks/intel/cluster.bro | 28 +- .../frameworks/intel/http-user-agents.bro | 67 ++ scripts/base/frameworks/intel/indexing.bro | 68 +++ scripts/base/frameworks/intel/input.bro | 6 +- scripts/base/frameworks/intel/main.bro | 570 +++++++++--------- scripts/base/frameworks/intel/non-cluster | 3 + .../frameworks/intel/plugins/dns_zones.bro | 44 +- scripts/base/frameworks/intel/plugins/set.bro | 19 + 9 files changed, 495 insertions(+), 314 deletions(-) create mode 100644 scripts/base/frameworks/intel/http-user-agents.bro create mode 100644 scripts/base/frameworks/intel/indexing.bro create mode 100644 scripts/base/frameworks/intel/non-cluster create mode 100644 scripts/base/frameworks/intel/plugins/set.bro diff --git a/scripts/base/frameworks/intel/__load__.bro b/scripts/base/frameworks/intel/__load__.bro index c15efa2f1d..8b425f6de4 100644 --- a/scripts/base/frameworks/intel/__load__.bro +++ b/scripts/base/frameworks/intel/__load__.bro @@ -1,5 +1,6 @@ @load ./main @load ./input +@load ./indexing # The cluster framework must be loaded first. @load base/frameworks/cluster @@ -9,3 +10,6 @@ @endif @load ./plugins/dns_zones + + +@load ./http-user-agents \ No newline at end of file diff --git a/scripts/base/frameworks/intel/cluster.bro b/scripts/base/frameworks/intel/cluster.bro index b9fea57ca0..6b361fc711 100644 --- a/scripts/base/frameworks/intel/cluster.bro +++ b/scripts/base/frameworks/intel/cluster.bro @@ -17,10 +17,23 @@ export { }; } +# If this process is not a manager process, we don't want the full metadata +@if ( Cluster::local_node_type() != Cluster::MANAGER ) +redef store_metadata = F; +@endif + # Primary intelligence distribution comes from manager. redef Cluster::manager2worker_events += /Intel::cluster_(new|updated)_item/; # If a worker finds intelligence and adds it, it should share it back to the manager. -redef Cluster::worker2manager_events += /Intel::cluster_(new|updated)_item/; +redef Cluster::worker2manager_events += /Intel::(match_in_.*_no_items|cluster_(new|updated)_item)/; + +@if ( Cluster::local_node_type() == Cluster::MANAGER ) +event Intel::match_in_conn_no_items(c: connection, found: Found) + { + local items = lookup(found); + event Intel::match_in_conn(c, found, items); + } +@endif event Intel::cluster_new_item(item: Intel::Item) { @@ -38,9 +51,9 @@ event Intel::cluster_updated_item(item: Intel::Item) event Intel::new_item(item: Intel::Item) { - # If this is the first time this item has been dispatched, - # send it over the cluster. - if ( item$first_dispatch ) + # The cluster manager always rebroadcasts intelligence + if ( Cluster::local_node_type() == Cluster::MANAGER || + item$first_dispatch ) { item$first_dispatch = F; event Intel::cluster_new_item(item); @@ -49,9 +62,10 @@ event Intel::new_item(item: Intel::Item) event Intel::updated_item(item: Intel::Item) { - # If this is the first time this item has been dispatched, - # send it over the cluster. - if ( item$first_dispatch ) + # If this is the first time this item has been dispatched or this + # is a manager, send it over the cluster. + if ( Cluster::local_node_type() == Cluster::MANAGER || + item$first_dispatch ) { item$first_dispatch = F; event Intel::cluster_updated_item(item); diff --git a/scripts/base/frameworks/intel/http-user-agents.bro b/scripts/base/frameworks/intel/http-user-agents.bro new file mode 100644 index 0000000000..c9150573c0 --- /dev/null +++ b/scripts/base/frameworks/intel/http-user-agents.bro @@ -0,0 +1,67 @@ + +@load base/protocols/http +@load base/frameworks/intel + +module HTTP; + +export { + redef enum Intel::Where += { + HTTP::IN_HEADER, + HTTP::IN_REQUEST, + HTTP::IN_HOST_HEADER, + HTTP::IN_CONN_EST, + HTTP::IN_DNS_REQUEST, + }; +} + +event connection_established(c: connection) + { + Intel::found_in_conn(c, [$host=c$id$orig_h, $where=IN_CONN_EST]); + Intel::found_in_conn(c, [$host=c$id$resp_h, $where=IN_CONN_EST]); + } + +event http_header(c: connection, is_orig: bool, name: string, value: string) + { + if ( is_orig && name == "USER-AGENT" ) + Intel::found_in_conn(c, [$str=value, + $str_type=Intel::USER_AGENT, + $where=IN_HEADER]); + + if ( is_orig && name == "HOST" ) + Intel::found_in_conn(c, [$str=value, + $str_type=Intel::DOMAIN, + $where=IN_HOST_HEADER]); + } + +event http_message_done(c: connection, is_orig: bool, stat: http_message_stat) + { + if ( c?$http ) + { + if ( c$http?$user_agent ) + Intel::found_in_conn(c, [$str=c$http$user_agent, + $str_type=Intel::USER_AGENT, + $where=IN_HEADER]); + + Intel::found_in_conn(c, [$str=HTTP::build_url(c$http), + $str_type=Intel::URL, + $where=IN_REQUEST]); + } + } + + +event dns_request(c: connection, msg: dns_msg, query: string, qtype: count, qclass: count) + { + Intel::found_in_conn(c, [$str=query, + $str_type=Intel::DOMAIN, + $where=IN_DNS_REQUEST]); + + } + +event Intel::match_in_conn(c: connection, found: Intel::Found, items: set[Intel::Item]) + { + print "matched one!"; + for ( i in items ) + { + print " " + i$meta$desc; + } + } \ No newline at end of file diff --git a/scripts/base/frameworks/intel/indexing.bro b/scripts/base/frameworks/intel/indexing.bro new file mode 100644 index 0000000000..a89ac44038 --- /dev/null +++ b/scripts/base/frameworks/intel/indexing.bro @@ -0,0 +1,68 @@ +module Intel; + +export { + type Indexes: record { + hosts: set[addr] &default=set(); + strings: set[string, SubType] &default=set(); + }; + + redef record Plugin += { + index: function(item: Item) &optional; + } + + ## Rebuild indexes this interval after any change to data if there + ## have been no other changes. + const rebuild_indexes_min = 1min &redef; + ## Wait no longer than this interval to update indexes after any + ## change to the data. + const rebuild_indexes_max = 5min &redef; + + global indexing_done: event(); +} + +local indexes: Indexes = []; + +global last_index_rebuild = network_time(); +global last_datastore_mod = network_time(); + + +event reindex() &priority=5 + { + local tmp_indexes: Indexes; + for ( plugin in plugins ) + { + for ( m in metas$metas ) + { + add tmp_indexes$hosts[m$source]; + add tmp_indexes$strings[m$intent]; + + #for ( ip in index_plugins ) + # { + # ip$index(index, m); + # } + } + } + indexes = + event indexing_done(); + } + +event rebuild_indexes(triggered_at: time) + { + if ( network_time() - triggered_at >= rebuild_indexes_max || + network_time() - last_datastore_mod >= rebuild_indexes_min ) + { + reindex(); + } + } + +event Intel::new_item(item:: Item) &priority=5 + { + last_datastore_mod = network_time(); + schedule rebuild_indexes_min { rebuild_indexes(network_time()) }; + } + +event Intel::updated_item(item:: Item) &priority=5 + { + last_datastore_mod = network_time(); + schedule rebuild_indexes_min { rebuild_indexes(network_time()) }; + } \ No newline at end of file diff --git a/scripts/base/frameworks/intel/input.bro b/scripts/base/frameworks/intel/input.bro index 08ca3992eb..4776a0852e 100644 --- a/scripts/base/frameworks/intel/input.bro +++ b/scripts/base/frameworks/intel/input.bro @@ -5,11 +5,9 @@ module Intel; export { ## Files that will be read off disk const read_files: set[string] = {} &redef; - - global entry: event(desc: Input::EventDescription, tpe: Input::Event, item: Intel::Item); } -event Intel::entry(desc: Input::EventDescription, tpe: Input::Event, item: Intel::Item) +event Intel::read_entry(desc: Input::EventDescription, tpe: Input::Event, item: Intel::Item) { Intel::insert(item); } @@ -23,6 +21,6 @@ event bro_init() &priority=5 $mode=Input::REREAD, $name=cat("intel-", a_file), $fields=Intel::Item, - $ev=Intel::entry]); + $ev=Intel::read_entry]); } } diff --git a/scripts/base/frameworks/intel/main.bro b/scripts/base/frameworks/intel/main.bro index 72fbd5c18e..9d73915fb0 100644 --- a/scripts/base/frameworks/intel/main.bro +++ b/scripts/base/frameworks/intel/main.bro @@ -1,11 +1,11 @@ ##! The intelligence framework provides a way to store and query IP addresses, -##! and strings (with a subtype). Metadata can +##! and strings (with a str_type). Metadata can ##! also be associated with the intelligence like for making more informated ##! decisions about matching and handling of intelligence. # # TODO: # Comments -# Better Intel::Item comparison (same_meta) +# Better Intel::Item comparison (has_meta) # Generate a notice when messed up data is discovered. # Complete "net" support as an intelligence type. @@ -22,195 +22,352 @@ export { Detection, }; - type Classification: enum { + ## String data needs to be further categoried since it could represent + ## and number of types of data. + type SubType: enum { + ## A complete URL. + URL, + ## User-Agent string, typically HTTP or mail message body. + USER_AGENT, + ## Email address. + EMAIL, + ## DNS domain name (DNS Zones are implemented in an intelligence plugin). + DOMAIN, + ## A user name. + USER_NAME, + ## File hash which is non hash type specific. It's up to the user to query + ## for any relevant hash types. + FILE_HASH, + ## Certificate hash. Normally for X.509 certificates from the SSL analyzer. + CERT_HASH, + }; + + ## Why a piece of intelligence is being added or looked up. The intent a human + ## placed upon the data when it was decided to be worthwhile as intelligence. + type Intent: enum { + ## Data is to be considered malicious. MALICIOUS, - INFRASTRUCTURE, + ## Data is to be considered sensitive. In many cases this may be + ## hosts containing contractually or legally restricted data such + ## as HIPPA, PCI, Sarbanes-Oxley, etc. SENSITIVE, - FRIEND, + ## Data that is never to be seen. This acts like the "canary in + ## the coal mine". A possibility could be file hashes for + ## critically important files. CANARY, + ## Data that is whitelisted. The primary use for this intent is to + ## locally whitelist false positive data from external feeds. WHITELIST, }; - type SubType: enum { - URL, - EMAIL, - DOMAIN, - USER_NAME, - FILE_HASH, # (non hash type specific, md5, sha1, sha256) - CERT_HASH, - ASN, + ## Enum to represent where data came from when it was discovered. + type Where: enum { + ## A catchall value to represent data of unknown provenance. + ANYWHERE, + }; + + ## Data about an :bro:type:`Intel::Item` + type MetaData: record { + ## An arbitrary string value representing the data source. Typically, + ## the convention for this field will be the source name and feed name + ## separated by a hyphen. For example: "source1-c&c". + source: string; + ## The intent of the data. + intent: Intent; + ## A freeform description for the data. + desc: string &optional; + ## A URL for more information about the data. + url: string &optional; }; + type Item: record { + host: addr &optional; + net: subnet &optional; + str: string &optional; + str_type: SubType &optional; + + meta: MetaData; + }; + + type Found: record { + host: addr &optional; + str: string &optional; + str_type: SubType &optional; + + where: Where; + }; + type Info: record { ts: time &log; ## This value should be one of: "info", "warn", "error" level: string &log; message: string &log; - }; - - type MetaData: record { - source: string; - class: Classification; - desc: string &optional; - url: string &optional; - tags: set[string] &optional; - }; - - type Item: record { - ip: addr &optional; - net: subnet &optional; - - str: string &optional; - subtype: SubType &optional; - - meta: MetaData; + item: Item &log; }; - type Query: record { - ip: addr &optional; - - str: string &optional; - subtype: SubType &optional; - - class: Classification &optional; - - or_tags: set[string] &optional; - and_tags: set[string] &optional; - - ## The predicate can be given when searching for a match. It will - ## be tested against every :bro:type:`MetaData` item associated with - ## the data being matched on. If it returns T a single time, the - ## matcher will consider that the item has matched. - pred: function(meta: Intel::Item): bool &optional; - }; - - type Importer: enum { - NULL_IMPORTER + type Plugin: record { + index: function() &optional; + match: function(found: Found): bool &optional; + lookup: function(found: Found): set[Item] &optional; }; - global insert: function(item: Item): bool; - global insert_event: event(item: Item); + ## Manipulation and query API functions. + global insert: function(item: Item); global delete_item: function(item: Item): bool; + global unique_data: function(): count; - global matcher: function(query: Query): bool; - global lookup: function(query: Query): set[Item]; + ## Function to declare discovery of a piece of data in order to check + ## it against known intelligence for matches. + global found_in_conn: function(c: connection, found: Found); - global register_custom_matcher: function(subtype: SubType, - func: function(query: Query): bool); - global register_custom_lookup: function(subtype: SubType, - func: function(query: Query): set[Item]); + ## Event to represent a match happening in a connection. On clusters there + ## is no assurance as to where this event will be generated so don't + ## assume that arbitrary global state beyond the given data + ## will be available. + global match_in_conn: event(c: connection, found: Found, items: set[Item]); + global find: function(found: Found): bool; + global lookup: function(found: Found): set[Item]; + + + ## Plugin API functions + global register_custom_matcher: function(str_type: SubType, + func: function(found: Found): bool); + global register_custom_lookup: function(str_type: SubType, + func: function(found: Found): set[Item]); + + ## API Events global new_item: event(item: Item); global updated_item: event(item: Item); + global insert_event: event(item: Item); + + ## Optionally store metadata. This is primarily used internally depending on + ## if this is a cluster deployment or not. On clusters, workers probably + ## shouldn't be storing the full metadata. + const store_metadata = T &redef; } -## Store collections of :bro:type:`MetaData` records indexed by a source name. -type IndexedItems: table[string, Classification] of MetaData; +# Internal handler for conn oriented matches with no metadata base on the store_metadata setting. +global match_in_conn_no_items: event(c: connection, found: Found); + type DataStore: record { - ip_data: table[addr] of IndexedItems; - string_data: table[string, SubType] of IndexedItems; + host_data: table[addr] of set[MetaData]; + string_data: table[string, SubType] of set[MetaData]; }; global data_store: DataStore; -global custom_matchers: table[SubType] of set[function(query: Query): bool]; -global custom_lookup: table[SubType] of set[function(query: Query): set[Item]]; +global custom_matchers: table[SubType] of set[function(found: Found): bool]; +global custom_lookup: table[SubType] of set[function(found: Found): set[Item]]; + event bro_init() &priority=5 { Log::create_stream(Intel::LOG, [$columns=Info]); } -function register_custom_matcher(subtype: SubType, func: function(query: Query): bool) + +function find(found: Found): bool { - if ( subtype !in custom_matchers ) - custom_matchers[subtype] = set(); - add custom_matchers[subtype][func]; - } - -function register_custom_lookup(subtype: SubType, func: function(query: Query): set[Item]) - { - if ( subtype !in custom_lookup ) - custom_lookup[subtype] = set(); - add custom_lookup[subtype][func]; - } - - - -function same_meta(meta1: MetaData, meta2: MetaData): bool - { - # "any" type values can't be compared so this generic implementation doesn't work. - #local rf1 = record_fields(item1); - #local rf2 = record_fields(item2); - #for ( field in rf1 ) - # { - # if ( ((rf1[field]?$value && rf1[field]?$value) && - # rf1[field]$value != rf2[field]$value) || - # ! (rf1[field]?$value && rf1[field]?$value) ) - # return F; - # } - - if ( meta1$source == meta2$source && - meta1$class == meta2$class && - ((!meta1?$desc && !meta2?$desc) || (meta1?$desc && meta2?$desc && meta1$desc == meta2$desc)) && - ((!meta1?$url && !meta2?$url) || (meta1?$url && meta2?$url && meta1$url == meta2$url)) && - ((!meta1?$tags && !meta2?$tags) || (meta1?$tags && meta2?$tags && |meta1$tags| == |meta2$tags|)) ) + if ( found?$host && found$host in data_store$host_data) { - # TODO: match on all of the tag values return T; } + else if ( found?$str && found?$str_type && + [found$str, found$str_type] in data_store$string_data ) + { + return T; + } + + # Finder plugins! + for ( plugin in plugins ) + { + if ( plugin?$match && plugin$match(found) ) + return T; + } + + return F; + } + +function lookup(found: Found): set[Item] + { + local item: Item; + local return_data: set[Item] = set(); + + if ( found?$host ) + { + # See if the host is known about and it has meta values + if ( found$host in data_store$host_data ) + { + for ( m in data_store$host_data[found$host] ) + { + item = [$host=found$host, $meta=m]; + add return_data[item]; + } + } + } + else if ( found?$str && found?$str_type ) + { + # See if the string is known about and it has meta values + if ( [found$str, found$str_type] in data_store$string_data ) + { + for ( m in data_store$string_data[found$str, found$str_type] ) + { + item = [$str=found$str, $str_type=found$str_type, $meta=m]; + add return_data[item]; + } + } + + # Check if there are any custom str_type lookup functions and add the values to + # the result set. + if ( found$str_type in custom_lookup ) + { + for ( lookup_func in custom_lookup[found$str_type] ) + { + # Iterating here because there is no way to merge sets generically. + for ( custom_lookup_item in lookup_func(found) ) + add return_data[custom_lookup_item]; + } + } + } + + + + # TODO: Later we should probably track whitelist matches. + # TODO: base this on a set instead of iterating the items. + for ( item in return_data ) + { + if ( item$meta$intent == WHITELIST ) + { + return set(); + } + } + + return return_data; + } + +function Intel::found_in_conn(c: connection, found: Found) + { + if ( find(found) ) + { + if ( store_metadata ) + { + local items = lookup(found); + event Intel::match_in_conn(c, found, items); + } + else + { + event Intel::match_in_conn_no_items(c, found); + } + } + } + +function register_custom_matcher(str_type: SubType, func: function(found: Found): bool) + { + if ( str_type !in custom_matchers ) + custom_matchers[str_type] = set(func); + else + add custom_matchers[str_type][func]; + } + +function register_custom_lookup(str_type: SubType, func: function(found: Found): set[Item]) + { + if ( str_type !in custom_lookup ) + custom_lookup[str_type] = set(func); + else + add custom_lookup[str_type][func]; + } + +function unique_data(): count + { + return |data_store$host_data| + |data_store$string_data|; + } + +#function get_meta(check: MetaData, metas: set[MetaData]): MetaData +# { +# local check_hash = md5_hash(check); +# for ( m in metas ) +# { +# if ( check_hash == md5_hash(m) ) +# return m; +# } +# +# return [$source=""]; +# } + +function has_meta(check: MetaData, metas: set[MetaData]): bool + { + local check_hash = md5_hash(check); + for ( m in metas ) + { + if ( check_hash == md5_hash(m) ) + return T; + } # The records must not be equivalent if we made it this far. return F; } -function insert(item: Item): bool +function insert(item: Item) { local err_msg = ""; - if ( item?$str && ! item?$subtype ) - err_msg = "You must provide a subtype for strings or this item doesn't make sense."; + if ( item?$str && ! item?$str_type ) + err_msg = "You must provide a str_type for strings or this item doesn't make sense."; if ( err_msg == "" ) { # Create and fill out the meta data item. local meta = item$meta; + local metas: set[MetaData]; - if ( item?$ip ) + if ( item?$host ) { - if ( item$ip !in data_store$ip_data ) - data_store$ip_data[item$ip] = table(); + if ( item$host !in data_store$host_data ) + data_store$host_data[item$host] = set(); - if ( [meta$source, meta$class] !in data_store$ip_data[item$ip] ) - event Intel::new_item(item); - else if ( ! same_meta(data_store$ip_data[item$ip][meta$source, meta$class], meta) ) - event Intel::updated_item(item); - else - return F; - - data_store$ip_data[item$ip][meta$source, meta$class] = item$meta; - return T; + metas = data_store$host_data[item$host]; } else if ( item?$str ) { - if ( [item$str, item$subtype] !in data_store$string_data ) - data_store$string_data[item$str, item$subtype] = table(); - - if ( [meta$source, meta$class] !in data_store$string_data[item$str, item$subtype] ) - event Intel::new_item(item); - else if ( ! same_meta(data_store$string_data[item$str, item$subtype][meta$source, meta$class], meta) ) - event Intel::updated_item(item); - else - return F; + if ( [item$str, item$str_type] !in data_store$string_data ) + data_store$string_data[item$str, item$str_type] = set(); - data_store$string_data[item$str, item$subtype][meta$source, meta$class] = item$meta; - return T; + metas = data_store$string_data[item$str, item$str_type]; } else - err_msg = "Failed to insert intelligence item for some unknown reason."; + { + err_msg = "Malformed intelligence item"; + } + + for ( m in metas ) + { + if ( meta$source == m$source ) + { + if ( has_meta(meta, metas) ) + { + # It's the same item being inserted again. + return; + } + else + { + event Intel::updated_item(item); + break; + } + } + else + { + event Intel::new_item(item); + break; + } + } + + add metas[item$meta]; + return; } if ( err_msg != "" ) - Log::write(Intel::LOG, [$ts=network_time(), $level="warn", $message=fmt(err_msg)]); - return F; + Log::write(Intel::LOG, [$ts=network_time(), $level="warn", $message=err_msg, $item=item]); + + return; } event insert_event(item: Item) @@ -218,160 +375,3 @@ event insert_event(item: Item) insert(item); } -function match_item_with_query(item: Item, query: Query): bool - { - if ( ! query?$and_tags && ! query?$or_tags && ! query?$pred ) - return T; - - if ( query?$and_tags ) - { - local matched = T; - # Every tag given has to match in a single MetaData entry. - for ( tag in query$and_tags ) - { - if ( item$meta?$tags && tag !in item$meta$tags ) - matched = F; - } - if ( matched ) - return T; - } - else if ( query?$or_tags ) - { - # For OR tags, only a single tag has to match. - for ( tag in query$or_tags ) - { - if ( item$meta?$tags && tag in item$meta$tags ) - return T; - } - } - else if ( query?$pred ) - return query$pred(item); - - # This indicates some sort of failure in the query - return F; - } - -function lookup(query: Query): set[Item] - { - local meta: MetaData; - local item: Item; - local return_data: set[Item] = set(); - - if ( query?$ip ) - { - if ( query$ip in data_store$ip_data ) - { - for ( [source, class] in data_store$ip_data[query$ip] ) - { - meta = data_store$ip_data[query$ip][source, class]; - item = [$ip=query$ip,$meta=meta]; - if ( match_item_with_query(item, query) ) - add return_data[item]; - } - } - } - - else if ( query?$str ) - { - if ( [query$str, query$subtype] in data_store$string_data ) - { - for ( [source, class] in data_store$string_data[query$str, query$subtype] ) - { - meta = data_store$string_data[query$str, query$subtype][source, class]; - item = [$str=query$str,$subtype=query$subtype,$meta=meta]; - if ( match_item_with_query(item, query) ) - add return_data[item]; - } - } - - # Check if there are any custom subtype lookup functons and add the values to - # the result set. - if ( query$subtype in custom_lookup ) - { - for ( lookup_func in custom_lookup[query$subtype] ) - { - # Iterating here because there is no way to merge sets generically. - for ( custom_lookup_item in lookup_func(query) ) - add return_data[custom_lookup_item]; - } - } - } - - return return_data; - } - - -function matcher(query: Query): bool - { - local err_msg = ""; - if ( (query?$or_tags || query?$and_tags) && query?$pred ) - err_msg = "You can't match with both tags and a predicate."; - else if ( query?$or_tags && query?$and_tags ) - err_msg = "You can't match with both OR'd together tags and AND'd together tags"; - else if ( query?$str && ! query?$subtype ) - err_msg = "You must provide a subtype to matcher or this query doesn't make sense."; - - local item: Item; - local meta: MetaData; - - if ( err_msg == "" ) - { - if ( query?$ip ) - { - if ( query$ip in data_store$ip_data ) - { - if ( ! query?$and_tags && ! query?$or_tags && ! query?$pred ) - return T; - - for ( [source, class] in data_store$ip_data[query$ip] ) - { - meta = data_store$ip_data[query$ip][source, class]; - item = [$ip=query$ip,$meta=meta]; - if ( match_item_with_query(item, query) ) - return T; - } - } - } - - else if ( query?$str ) - { - if ( [query$str, query$subtype] in data_store$string_data ) - { - if ( ! query?$and_tags && ! query?$or_tags && ! query?$pred ) - return T; - - for ( [source, class] in data_store$string_data[query$str, query$subtype] ) - { - meta = data_store$string_data[query$str, query$subtype][source, class]; - item = [$str=query$str,$subtype=query$subtype,$meta=meta]; - if ( match_item_with_query(item, query) ) - return T; - } - } - - # Check if there are any custom subtype matchers in case we haven't matched yet. - if ( query$subtype in custom_matchers ) - { - for ( match_func in custom_matchers[query$subtype] ) - { - if ( match_func(query) ) - return T; - } - } - } - - else - err_msg = "You must supply one of the $ip or $str fields to search on"; - } - - if ( err_msg != "" ) - Log::write(Intel::LOG, [$ts=network_time(), $level="error", $message=fmt(err_msg)]); - return F; - } - -module GLOBAL; - -function INTEL(item: Intel::Query): bool - { - return Intel::matcher(item); - } \ No newline at end of file diff --git a/scripts/base/frameworks/intel/non-cluster b/scripts/base/frameworks/intel/non-cluster new file mode 100644 index 0000000000..dddf430966 --- /dev/null +++ b/scripts/base/frameworks/intel/non-cluster @@ -0,0 +1,3 @@ + +module Intel; + diff --git a/scripts/base/frameworks/intel/plugins/dns_zones.bro b/scripts/base/frameworks/intel/plugins/dns_zones.bro index 3f1c30ef3d..ba35b35421 100644 --- a/scripts/base/frameworks/intel/plugins/dns_zones.bro +++ b/scripts/base/frameworks/intel/plugins/dns_zones.bro @@ -7,36 +7,45 @@ export { }; } -function dns_zone_ripper(query: Query): Query +function dns_zone_ripper(found: Found): Found { - local query_copy = copy(query); + local found_copy = copy(found); + + ## # We only support fourth level depth zones right now for performance. + ## if ( /(\.[^\.]+){4,}/ in found_copy$str ) + ## { + ## local parts = split_all(found_copy$str, /\./); + ## local len = |parts|; + ## found_copy$str = parts[len-6] + "." + parts[len-4] + "." + parts[len-2] + "." + parts[len]; + ## } + # We can assume that we're getting a string and subtype because # this function is only registered for DOMAIN and DNS_ZONE data. - local dns_name = sub(query_copy$str, /^[^\.]*\./, ""); - query_copy$str = dns_name; + local dns_name = sub(found_copy$str, /^[^\.]*\./, ""); + found_copy$str = dns_name; # We are doing a literal search for a DNS zone at this point - query_copy$subtype = Intel::DNS_ZONE; - return query_copy; + found_copy$str_type = Intel::DNS_ZONE; + return found_copy; } # This matcher extension adds additional matchers for domain names. -function dns_zone_matcher(query: Query): bool +function dns_zone_matcher(found: Found): bool { - local query_copy = dns_zone_ripper(query); - if ( query$str == query_copy$str ) + local found_copy = dns_zone_ripper(found); + if ( found$str == found_copy$str ) return F; - return Intel::matcher(query_copy); + return Intel::find(found_copy); } -function dns_zone_lookup(query: Query): set[Item] +function dns_zone_lookup(found: Found): set[Item] { local result_set: set[Item] = set(); - local query_copy = dns_zone_ripper(query); - if ( query$str == query_copy$str ) + local found_copy = dns_zone_ripper(found); + if ( found$str == found_copy$str ) return result_set; - for ( item in Intel::lookup(query_copy) ) + for ( item in Intel::lookup(found_copy) ) add result_set[item]; return result_set; } @@ -44,10 +53,9 @@ function dns_zone_lookup(query: Query): set[Item] event bro_init() &priority=10 { register_custom_matcher(DOMAIN, dns_zone_matcher); - # The DNS_ZONE subtype needs added because it's ultimately - # a subset of DOMAIN and will need to be searched as well. - register_custom_matcher(DNS_ZONE, dns_zone_matcher); - register_custom_lookup(DOMAIN, dns_zone_lookup); + ## The DNS_ZONE subtype needs added because it's ultimately + ## a subset of DOMAIN and will need to be searched as well. + register_custom_matcher(DNS_ZONE, dns_zone_matcher); register_custom_lookup(DNS_ZONE, dns_zone_lookup); } diff --git a/scripts/base/frameworks/intel/plugins/set.bro b/scripts/base/frameworks/intel/plugins/set.bro new file mode 100644 index 0000000000..b169e91972 --- /dev/null +++ b/scripts/base/frameworks/intel/plugins/set.bro @@ -0,0 +1,19 @@ +module Intel; + +redef record Intel::Indexes += { + hosts: set[addr] &default=set(); + strings: set[string, SubType] &default=set(); +}; + +redef plugins += { + [$index() = { + + }, + $match(found: Found): bool = { + + }, + $lookup(found: Found): set[Item] = { + + } + ] +}; \ No newline at end of file From 71c5b49bdc4daf8b06516509d36a297af41f4e28 Mon Sep 17 00:00:00 2001 From: Seth Hall Date: Thu, 27 Sep 2012 13:39:48 -0400 Subject: [PATCH 03/22] Major updates to fix the Intel framework API. --- scripts/base/frameworks/intel/__load__.bro | 9 +- scripts/base/frameworks/intel/cluster.bro | 45 ++- scripts/base/frameworks/intel/indexing.bro | 68 ---- scripts/base/frameworks/intel/input.bro | 4 +- scripts/base/frameworks/intel/main.bro | 308 ++++++------------ scripts/base/frameworks/intel/non-cluster | 3 - .../frameworks/intel/plugins/dns_zones.bro | 61 ---- scripts/base/frameworks/intel/plugins/set.bro | 19 -- 8 files changed, 126 insertions(+), 391 deletions(-) delete mode 100644 scripts/base/frameworks/intel/indexing.bro delete mode 100644 scripts/base/frameworks/intel/non-cluster delete mode 100644 scripts/base/frameworks/intel/plugins/dns_zones.bro delete mode 100644 scripts/base/frameworks/intel/plugins/set.bro diff --git a/scripts/base/frameworks/intel/__load__.bro b/scripts/base/frameworks/intel/__load__.bro index 8b425f6de4..c6822212c0 100644 --- a/scripts/base/frameworks/intel/__load__.bro +++ b/scripts/base/frameworks/intel/__load__.bro @@ -1,15 +1,8 @@ @load ./main @load ./input -@load ./indexing # The cluster framework must be loaded first. @load base/frameworks/cluster - @if ( Cluster::is_enabled() ) @load ./cluster -@endif - -@load ./plugins/dns_zones - - -@load ./http-user-agents \ No newline at end of file +@endif \ No newline at end of file diff --git a/scripts/base/frameworks/intel/cluster.bro b/scripts/base/frameworks/intel/cluster.bro index 6b361fc711..5b5f67e978 100644 --- a/scripts/base/frameworks/intel/cluster.bro +++ b/scripts/base/frameworks/intel/cluster.bro @@ -5,53 +5,52 @@ module Intel; -export { - global cluster_new_item: event(item: Item); - global cluster_updated_item: event(item: Item); - - redef record Item += { - ## This field is solely used internally for cluster transparency with - ## the intelligence framework to avoid storms of intelligence data - ## swirling forever. It allows data to propagate only a single time. - first_dispatch: bool &default=T; - }; -} - # If this process is not a manager process, we don't want the full metadata @if ( Cluster::local_node_type() != Cluster::MANAGER ) -redef store_metadata = F; +redef have_full_data = F; @endif +global cluster_new_item: event(item: Item); +global cluster_updated_item: event(item: Item); + +redef record Item += { + ## This field is solely used internally for cluster transparency with + ## the intelligence framework to avoid storms of intelligence data + ## swirling forever. It allows data to propagate only a single time. + first_dispatch: bool &default=T; +}; + # Primary intelligence distribution comes from manager. redef Cluster::manager2worker_events += /Intel::cluster_(new|updated)_item/; # If a worker finds intelligence and adds it, it should share it back to the manager. redef Cluster::worker2manager_events += /Intel::(match_in_.*_no_items|cluster_(new|updated)_item)/; @if ( Cluster::local_node_type() == Cluster::MANAGER ) -event Intel::match_in_conn_no_items(c: connection, found: Found) +event Intel::match_in_conn_no_items(c: connection, seen: Seen) &priority=5 { - local items = lookup(found); - event Intel::match_in_conn(c, found, items); + event Intel::match_in_conn(c, seen, Intel::get_items(seen)); } @endif -event Intel::cluster_new_item(item: Intel::Item) +event Intel::cluster_new_item(item: Intel::Item) &priority=5 { - # Ignore locally generated events. + # Ignore locally generated events to avoid event storms. if ( is_remote_event() ) Intel::insert(item); } -event Intel::cluster_updated_item(item: Intel::Item) +event Intel::cluster_updated_item(item: Intel::Item) &priority=5 { - # Ignore locally generated events. + # Ignore locally generated events to avoid event storms. if ( is_remote_event() ) Intel::insert(item); } -event Intel::new_item(item: Intel::Item) +event Intel::new_item(item: Intel::Item) &priority=5 { - # The cluster manager always rebroadcasts intelligence + # The cluster manager always rebroadcasts intelligence. + # Workers redistribute it if it was locally generated on + # the worker. if ( Cluster::local_node_type() == Cluster::MANAGER || item$first_dispatch ) { @@ -60,7 +59,7 @@ event Intel::new_item(item: Intel::Item) } } -event Intel::updated_item(item: Intel::Item) +event Intel::updated_item(item: Intel::Item) &priority=5 { # If this is the first time this item has been dispatched or this # is a manager, send it over the cluster. diff --git a/scripts/base/frameworks/intel/indexing.bro b/scripts/base/frameworks/intel/indexing.bro deleted file mode 100644 index a89ac44038..0000000000 --- a/scripts/base/frameworks/intel/indexing.bro +++ /dev/null @@ -1,68 +0,0 @@ -module Intel; - -export { - type Indexes: record { - hosts: set[addr] &default=set(); - strings: set[string, SubType] &default=set(); - }; - - redef record Plugin += { - index: function(item: Item) &optional; - } - - ## Rebuild indexes this interval after any change to data if there - ## have been no other changes. - const rebuild_indexes_min = 1min &redef; - ## Wait no longer than this interval to update indexes after any - ## change to the data. - const rebuild_indexes_max = 5min &redef; - - global indexing_done: event(); -} - -local indexes: Indexes = []; - -global last_index_rebuild = network_time(); -global last_datastore_mod = network_time(); - - -event reindex() &priority=5 - { - local tmp_indexes: Indexes; - for ( plugin in plugins ) - { - for ( m in metas$metas ) - { - add tmp_indexes$hosts[m$source]; - add tmp_indexes$strings[m$intent]; - - #for ( ip in index_plugins ) - # { - # ip$index(index, m); - # } - } - } - indexes = - event indexing_done(); - } - -event rebuild_indexes(triggered_at: time) - { - if ( network_time() - triggered_at >= rebuild_indexes_max || - network_time() - last_datastore_mod >= rebuild_indexes_min ) - { - reindex(); - } - } - -event Intel::new_item(item:: Item) &priority=5 - { - last_datastore_mod = network_time(); - schedule rebuild_indexes_min { rebuild_indexes(network_time()) }; - } - -event Intel::updated_item(item:: Item) &priority=5 - { - last_datastore_mod = network_time(); - schedule rebuild_indexes_min { rebuild_indexes(network_time()) }; - } \ No newline at end of file diff --git a/scripts/base/frameworks/intel/input.bro b/scripts/base/frameworks/intel/input.bro index 4776a0852e..fd2c0bae97 100644 --- a/scripts/base/frameworks/intel/input.bro +++ b/scripts/base/frameworks/intel/input.bro @@ -3,7 +3,9 @@ module Intel; export { - ## Files that will be read off disk + ## Intelligence files that will be read off disk. The files are + ## reread everytime they are updated so updates much be atomic with + ## "mv" instead of writing the file in place. const read_files: set[string] = {} &redef; } diff --git a/scripts/base/frameworks/intel/main.bro b/scripts/base/frameworks/intel/main.bro index 9d73915fb0..dbf40f637d 100644 --- a/scripts/base/frameworks/intel/main.bro +++ b/scripts/base/frameworks/intel/main.bro @@ -2,12 +2,6 @@ ##! and strings (with a str_type). Metadata can ##! also be associated with the intelligence like for making more informated ##! decisions about matching and handling of intelligence. -# -# TODO: -# Comments -# Better Intel::Item comparison (has_meta) -# Generate a notice when messed up data is discovered. -# Complete "net" support as an intelligence type. @load base/frameworks/notice @@ -24,7 +18,7 @@ export { ## String data needs to be further categoried since it could represent ## and number of types of data. - type SubType: enum { + type StrType: enum { ## A complete URL. URL, ## User-Agent string, typically HTTP or mail message body. @@ -41,39 +35,13 @@ export { ## Certificate hash. Normally for X.509 certificates from the SSL analyzer. CERT_HASH, }; - - ## Why a piece of intelligence is being added or looked up. The intent a human - ## placed upon the data when it was decided to be worthwhile as intelligence. - type Intent: enum { - ## Data is to be considered malicious. - MALICIOUS, - ## Data is to be considered sensitive. In many cases this may be - ## hosts containing contractually or legally restricted data such - ## as HIPPA, PCI, Sarbanes-Oxley, etc. - SENSITIVE, - ## Data that is never to be seen. This acts like the "canary in - ## the coal mine". A possibility could be file hashes for - ## critically important files. - CANARY, - ## Data that is whitelisted. The primary use for this intent is to - ## locally whitelist false positive data from external feeds. - WHITELIST, - }; - - ## Enum to represent where data came from when it was discovered. - type Where: enum { - ## A catchall value to represent data of unknown provenance. - ANYWHERE, - }; - + ## Data about an :bro:type:`Intel::Item` type MetaData: record { ## An arbitrary string value representing the data source. Typically, ## the convention for this field will be the source name and feed name ## separated by a hyphen. For example: "source1-c&c". source: string; - ## The intent of the data. - intent: Intent; ## A freeform description for the data. desc: string &optional; ## A URL for more information about the data. @@ -84,215 +52,144 @@ export { host: addr &optional; net: subnet &optional; str: string &optional; - str_type: SubType &optional; + str_type: StrType &optional; meta: MetaData; }; - type Found: record { + ## Enum to represent where data came from when it was discovered. + type Where: enum { + ## A catchall value to represent data of unknown provenance. + ANYWHERE, + }; + + type Seen: record { host: addr &optional; str: string &optional; - str_type: SubType &optional; + str_type: StrType &optional; where: Where; }; - type Info: record { - ts: time &log; - ## This value should be one of: "info", "warn", "error" - level: string &log; - message: string &log; - item: Item &log; + type PolicyItem: record { + pred: function(seen: Seen, item: Item): bool &optional; + + log_it: bool &default=T; }; - type Plugin: record { - index: function() &optional; - match: function(found: Found): bool &optional; - lookup: function(found: Found): set[Item] &optional; - }; - - ## Manipulation and query API functions. + ## Intelligence data manipulation functions. global insert: function(item: Item); global delete_item: function(item: Item): bool; - global unique_data: function(): count; ## Function to declare discovery of a piece of data in order to check ## it against known intelligence for matches. - global found_in_conn: function(c: connection, found: Found); + global seen_in_conn: function(c: connection, seen: Seen); - ## Event to represent a match happening in a connection. On clusters there - ## is no assurance as to where this event will be generated so don't - ## assume that arbitrary global state beyond the given data - ## will be available. - global match_in_conn: event(c: connection, found: Found, items: set[Item]); + ## Intelligence policy variable for handling matches. + const policy: set[PolicyItem] = {} &redef; - global find: function(found: Found): bool; - global lookup: function(found: Found): set[Item]; - - - ## Plugin API functions - global register_custom_matcher: function(str_type: SubType, - func: function(found: Found): bool); - global register_custom_lookup: function(str_type: SubType, - func: function(found: Found): set[Item]); - - ## API Events + ## API Events that indicate when various things happen internally within the + ## intelligence framework. global new_item: event(item: Item); global updated_item: event(item: Item); - global insert_event: event(item: Item); - - ## Optionally store metadata. This is primarily used internally depending on - ## if this is a cluster deployment or not. On clusters, workers probably - ## shouldn't be storing the full metadata. - const store_metadata = T &redef; } -# Internal handler for conn oriented matches with no metadata base on the store_metadata setting. -global match_in_conn_no_items: event(c: connection, found: Found); +## Event to represent a match happening in a connection. On clusters there +## is no assurance as to where this event will be generated so don't +## assume that arbitrary global state beyond the given data +## will be available. +global match_in_conn: event(c: connection, seen: Seen, items: set[Item]); + +# Internal handler for conn oriented matches with no metadata based on the have_full_data setting. +global match_in_conn_no_items: event(c: connection, seen: Seen); + +## Optionally store metadata. This is used internally depending on +## if this is a cluster deployment or not. +const have_full_data = T &redef; type DataStore: record { - host_data: table[addr] of set[MetaData]; - string_data: table[string, SubType] of set[MetaData]; + net_data: table[subnet] of set[MetaData]; + string_data: table[string, StrType] of set[MetaData]; }; global data_store: DataStore; -global custom_matchers: table[SubType] of set[function(found: Found): bool]; -global custom_lookup: table[SubType] of set[function(found: Found): set[Item]]; - - -event bro_init() &priority=5 +function find(seen: Seen): bool { - Log::create_stream(Intel::LOG, [$columns=Info]); - } - - -function find(found: Found): bool - { - if ( found?$host && found$host in data_store$host_data) + if ( seen?$host && + seen$host in data_store$net_data ) { return T; } - else if ( found?$str && found?$str_type && - [found$str, found$str_type] in data_store$string_data ) + else if ( seen?$str && seen?$str_type && + [seen$str, seen$str_type] in data_store$string_data ) { return T; } - - # Finder plugins! - for ( plugin in plugins ) + else { - if ( plugin?$match && plugin$match(found) ) - return T; + return F; } - - return F; } -function lookup(found: Found): set[Item] +function get_items(seen: Seen): set[Item] { local item: Item; local return_data: set[Item] = set(); - if ( found?$host ) + if ( ! have_full_data ) + { + # A reporter warning should be generated here because this function + # should never be called from a host that doesn't have the full data. + # TODO: do a reporter warning. + return return_data; + } + + if ( seen?$host ) { # See if the host is known about and it has meta values - if ( found$host in data_store$host_data ) + if ( seen$host in data_store$net_data ) { - for ( m in data_store$host_data[found$host] ) + for ( m in data_store$net_data[seen$host] ) { - item = [$host=found$host, $meta=m]; + # TODO: the lookup should be finding all and not just most specific + # and $host/$net should have the correct value. + item = [$host=seen$host, $meta=m]; add return_data[item]; } } } - else if ( found?$str && found?$str_type ) + else if ( seen?$str && seen?$str_type ) { # See if the string is known about and it has meta values - if ( [found$str, found$str_type] in data_store$string_data ) + if ( [seen$str, seen$str_type] in data_store$string_data ) { - for ( m in data_store$string_data[found$str, found$str_type] ) + for ( m in data_store$string_data[seen$str, seen$str_type] ) { - item = [$str=found$str, $str_type=found$str_type, $meta=m]; + item = [$str=seen$str, $str_type=seen$str_type, $meta=m]; add return_data[item]; } } - - # Check if there are any custom str_type lookup functions and add the values to - # the result set. - if ( found$str_type in custom_lookup ) - { - for ( lookup_func in custom_lookup[found$str_type] ) - { - # Iterating here because there is no way to merge sets generically. - for ( custom_lookup_item in lookup_func(found) ) - add return_data[custom_lookup_item]; - } - } - } - - - - # TODO: Later we should probably track whitelist matches. - # TODO: base this on a set instead of iterating the items. - for ( item in return_data ) - { - if ( item$meta$intent == WHITELIST ) - { - return set(); - } } return return_data; } -function Intel::found_in_conn(c: connection, found: Found) +function Intel::seen_in_conn(c: connection, seen: Seen) { - if ( find(found) ) + if ( find(seen) ) { - if ( store_metadata ) + if ( have_full_data ) { - local items = lookup(found); - event Intel::match_in_conn(c, found, items); + local items = get_items(seen); + event Intel::match_in_conn(c, seen, items); } else { - event Intel::match_in_conn_no_items(c, found); + event Intel::match_in_conn_no_items(c, seen); } } } -function register_custom_matcher(str_type: SubType, func: function(found: Found): bool) - { - if ( str_type !in custom_matchers ) - custom_matchers[str_type] = set(func); - else - add custom_matchers[str_type][func]; - } - -function register_custom_lookup(str_type: SubType, func: function(found: Found): set[Item]) - { - if ( str_type !in custom_lookup ) - custom_lookup[str_type] = set(func); - else - add custom_lookup[str_type][func]; - } - -function unique_data(): count - { - return |data_store$host_data| + |data_store$string_data|; - } - -#function get_meta(check: MetaData, metas: set[MetaData]): MetaData -# { -# local check_hash = md5_hash(check); -# for ( m in metas ) -# { -# if ( check_hash == md5_hash(m) ) -# return m; -# } -# -# return [$source=""]; -# } function has_meta(check: MetaData, metas: set[MetaData]): bool { @@ -309,35 +206,41 @@ function has_meta(check: MetaData, metas: set[MetaData]): bool function insert(item: Item) { - local err_msg = ""; - if ( item?$str && ! item?$str_type ) - err_msg = "You must provide a str_type for strings or this item doesn't make sense."; - - if ( err_msg == "" ) + if ( item?$str && !item?$str_type ) { - # Create and fill out the meta data item. - local meta = item$meta; - local metas: set[MetaData]; + event reporter_warning(network_time(), fmt("You must provide a str_type for strings or this item doesn't make sense. Item: %s", item), ""); + return; + } - if ( item?$host ) - { - if ( item$host !in data_store$host_data ) - data_store$host_data[item$host] = set(); - - metas = data_store$host_data[item$host]; - } - else if ( item?$str ) - { - if ( [item$str, item$str_type] !in data_store$string_data ) - data_store$string_data[item$str, item$str_type] = set(); + # Create and fill out the meta data item. + local meta = item$meta; + local metas: set[MetaData]; - metas = data_store$string_data[item$str, item$str_type]; - } - else - { - err_msg = "Malformed intelligence item"; - } + if ( item?$host ) + { + local host = mask_addr(item$host, is_v4_addr(item$host) ? 32 : 128); + if ( host !in data_store$net_data ) + data_store$net_data[host] = set(); + + metas = data_store$net_data[host]; + } + else if ( item?$net ) + { + if ( item$net !in data_store$net_data ) + data_store$net_data[item$net] = set(); + metas = data_store$net_data[item$net]; + } + else if ( item?$str ) + { + if ( [item$str, item$str_type] !in data_store$string_data ) + data_store$string_data[item$str, item$str_type] = set(); + + metas = data_store$string_data[item$str, item$str_type]; + } + + if ( have_full_data ) + { for ( m in metas ) { if ( meta$source == m$source ) @@ -349,6 +252,7 @@ function insert(item: Item) } else { + # Same source, different metadata means updated item. event Intel::updated_item(item); break; } @@ -359,19 +263,7 @@ function insert(item: Item) break; } } - add metas[item$meta]; - return; } - - if ( err_msg != "" ) - Log::write(Intel::LOG, [$ts=network_time(), $level="warn", $message=err_msg, $item=item]); - - return; } - -event insert_event(item: Item) - { - insert(item); - } - + \ No newline at end of file diff --git a/scripts/base/frameworks/intel/non-cluster b/scripts/base/frameworks/intel/non-cluster deleted file mode 100644 index dddf430966..0000000000 --- a/scripts/base/frameworks/intel/non-cluster +++ /dev/null @@ -1,3 +0,0 @@ - -module Intel; - diff --git a/scripts/base/frameworks/intel/plugins/dns_zones.bro b/scripts/base/frameworks/intel/plugins/dns_zones.bro deleted file mode 100644 index ba35b35421..0000000000 --- a/scripts/base/frameworks/intel/plugins/dns_zones.bro +++ /dev/null @@ -1,61 +0,0 @@ - -module Intel; - -export { - redef enum SubType += { - DNS_ZONE, - }; -} - -function dns_zone_ripper(found: Found): Found - { - local found_copy = copy(found); - - ## # We only support fourth level depth zones right now for performance. - ## if ( /(\.[^\.]+){4,}/ in found_copy$str ) - ## { - ## local parts = split_all(found_copy$str, /\./); - ## local len = |parts|; - ## found_copy$str = parts[len-6] + "." + parts[len-4] + "." + parts[len-2] + "." + parts[len]; - ## } - - # We can assume that we're getting a string and subtype because - # this function is only registered for DOMAIN and DNS_ZONE data. - local dns_name = sub(found_copy$str, /^[^\.]*\./, ""); - found_copy$str = dns_name; - # We are doing a literal search for a DNS zone at this point - found_copy$str_type = Intel::DNS_ZONE; - return found_copy; - } - -# This matcher extension adds additional matchers for domain names. -function dns_zone_matcher(found: Found): bool - { - local found_copy = dns_zone_ripper(found); - if ( found$str == found_copy$str ) - return F; - - return Intel::find(found_copy); - } - -function dns_zone_lookup(found: Found): set[Item] - { - local result_set: set[Item] = set(); - local found_copy = dns_zone_ripper(found); - if ( found$str == found_copy$str ) - return result_set; - - for ( item in Intel::lookup(found_copy) ) - add result_set[item]; - return result_set; - } - -event bro_init() &priority=10 - { - register_custom_matcher(DOMAIN, dns_zone_matcher); - register_custom_lookup(DOMAIN, dns_zone_lookup); - ## The DNS_ZONE subtype needs added because it's ultimately - ## a subset of DOMAIN and will need to be searched as well. - register_custom_matcher(DNS_ZONE, dns_zone_matcher); - register_custom_lookup(DNS_ZONE, dns_zone_lookup); - } diff --git a/scripts/base/frameworks/intel/plugins/set.bro b/scripts/base/frameworks/intel/plugins/set.bro deleted file mode 100644 index b169e91972..0000000000 --- a/scripts/base/frameworks/intel/plugins/set.bro +++ /dev/null @@ -1,19 +0,0 @@ -module Intel; - -redef record Intel::Indexes += { - hosts: set[addr] &default=set(); - strings: set[string, SubType] &default=set(); -}; - -redef plugins += { - [$index() = { - - }, - $match(found: Found): bool = { - - }, - $lookup(found: Found): set[Item] = { - - } - ] -}; \ No newline at end of file From a803cae92e7e593c9e537a96a567bbce580e87dd Mon Sep 17 00:00:00 2001 From: Seth Hall Date: Fri, 28 Sep 2012 07:07:02 -0400 Subject: [PATCH 04/22] Intelligence framework checkpoint - Basic API seems to works, but tests aren't updated yet. - Several scripts are available in policy/frameworks/intel that call the "seen" function to provide data into the intel framework to be tested. - Intel::policy is not done yet and needs to be discussed to figure out what it needs to have. - Running the intel framework and having it do something finally is really cool! --- scripts/base/frameworks/intel/__load__.bro | 2 +- scripts/base/frameworks/intel/cluster.bro | 8 +- .../frameworks/intel/http-user-agents.bro | 67 ----------- scripts/base/frameworks/intel/main.bro | 105 ++++++++++++------ scripts/policy/frameworks/intel/__load__.bro | 6 + .../frameworks/intel/conn-established.bro | 14 +++ scripts/policy/frameworks/intel/dns.bro | 16 +++ .../frameworks/intel/http-host-header.bro | 16 +++ scripts/policy/frameworks/intel/http-url.bro | 16 +++ .../frameworks/intel/http-user-agent.bro | 16 +++ scripts/policy/frameworks/intel/ssl.bro | 41 +++++++ 11 files changed, 198 insertions(+), 109 deletions(-) delete mode 100644 scripts/base/frameworks/intel/http-user-agents.bro create mode 100644 scripts/policy/frameworks/intel/__load__.bro create mode 100644 scripts/policy/frameworks/intel/conn-established.bro create mode 100644 scripts/policy/frameworks/intel/dns.bro create mode 100644 scripts/policy/frameworks/intel/http-host-header.bro create mode 100644 scripts/policy/frameworks/intel/http-url.bro create mode 100644 scripts/policy/frameworks/intel/http-user-agent.bro create mode 100644 scripts/policy/frameworks/intel/ssl.bro diff --git a/scripts/base/frameworks/intel/__load__.bro b/scripts/base/frameworks/intel/__load__.bro index c6822212c0..806159d938 100644 --- a/scripts/base/frameworks/intel/__load__.bro +++ b/scripts/base/frameworks/intel/__load__.bro @@ -5,4 +5,4 @@ @load base/frameworks/cluster @if ( Cluster::is_enabled() ) @load ./cluster -@endif \ No newline at end of file +@endif diff --git a/scripts/base/frameworks/intel/cluster.bro b/scripts/base/frameworks/intel/cluster.bro index 5b5f67e978..4d8885c749 100644 --- a/scripts/base/frameworks/intel/cluster.bro +++ b/scripts/base/frameworks/intel/cluster.bro @@ -21,14 +21,14 @@ redef record Item += { }; # Primary intelligence distribution comes from manager. -redef Cluster::manager2worker_events += /Intel::cluster_(new|updated)_item/; +redef Cluster::manager2worker_events += /^Intel::cluster_.*/; # If a worker finds intelligence and adds it, it should share it back to the manager. -redef Cluster::worker2manager_events += /Intel::(match_in_.*_no_items|cluster_(new|updated)_item)/; +redef Cluster::worker2manager_events += /^Intel::(cluster_.*|match_no_items)/; @if ( Cluster::local_node_type() == Cluster::MANAGER ) -event Intel::match_in_conn_no_items(c: connection, seen: Seen) &priority=5 +event Intel::match_no_items(s: Seen) &priority=5 { - event Intel::match_in_conn(c, seen, Intel::get_items(seen)); + event Intel::match(c, s, Intel::get_items(s)); } @endif diff --git a/scripts/base/frameworks/intel/http-user-agents.bro b/scripts/base/frameworks/intel/http-user-agents.bro deleted file mode 100644 index c9150573c0..0000000000 --- a/scripts/base/frameworks/intel/http-user-agents.bro +++ /dev/null @@ -1,67 +0,0 @@ - -@load base/protocols/http -@load base/frameworks/intel - -module HTTP; - -export { - redef enum Intel::Where += { - HTTP::IN_HEADER, - HTTP::IN_REQUEST, - HTTP::IN_HOST_HEADER, - HTTP::IN_CONN_EST, - HTTP::IN_DNS_REQUEST, - }; -} - -event connection_established(c: connection) - { - Intel::found_in_conn(c, [$host=c$id$orig_h, $where=IN_CONN_EST]); - Intel::found_in_conn(c, [$host=c$id$resp_h, $where=IN_CONN_EST]); - } - -event http_header(c: connection, is_orig: bool, name: string, value: string) - { - if ( is_orig && name == "USER-AGENT" ) - Intel::found_in_conn(c, [$str=value, - $str_type=Intel::USER_AGENT, - $where=IN_HEADER]); - - if ( is_orig && name == "HOST" ) - Intel::found_in_conn(c, [$str=value, - $str_type=Intel::DOMAIN, - $where=IN_HOST_HEADER]); - } - -event http_message_done(c: connection, is_orig: bool, stat: http_message_stat) - { - if ( c?$http ) - { - if ( c$http?$user_agent ) - Intel::found_in_conn(c, [$str=c$http$user_agent, - $str_type=Intel::USER_AGENT, - $where=IN_HEADER]); - - Intel::found_in_conn(c, [$str=HTTP::build_url(c$http), - $str_type=Intel::URL, - $where=IN_REQUEST]); - } - } - - -event dns_request(c: connection, msg: dns_msg, query: string, qtype: count, qclass: count) - { - Intel::found_in_conn(c, [$str=query, - $str_type=Intel::DOMAIN, - $where=IN_DNS_REQUEST]); - - } - -event Intel::match_in_conn(c: connection, found: Intel::Found, items: set[Intel::Item]) - { - print "matched one!"; - for ( i in items ) - { - print " " + i$meta$desc; - } - } \ No newline at end of file diff --git a/scripts/base/frameworks/intel/main.bro b/scripts/base/frameworks/intel/main.bro index dbf40f637d..94d26362c0 100644 --- a/scripts/base/frameworks/intel/main.bro +++ b/scripts/base/frameworks/intel/main.bro @@ -11,8 +11,7 @@ export { redef enum Log::ID += { LOG }; redef enum Notice::Type += { - ## This notice should be used in all detector scripts to indicate - ## an intelligence based detection. + ## Notice type to indicate an intelligence hit. Detection, }; @@ -64,29 +63,42 @@ export { }; type Seen: record { - host: addr &optional; - str: string &optional; - str_type: StrType &optional; + host: addr &optional &log; + str: string &optional &log; + str_type: StrType &optional &log; - where: Where; + where: Where &log; + + conn: connection &optional; + }; + + type Info: record { + ts: time &log; + + uid: string &log &optional; + id: conn_id &log &optional; + + seen: Seen &log; }; type PolicyItem: record { - pred: function(seen: Seen, item: Item): bool &optional; + pred: function(s: Seen, item: Item): bool &optional; log_it: bool &default=T; }; ## Intelligence data manipulation functions. global insert: function(item: Item); - global delete_item: function(item: Item): bool; ## Function to declare discovery of a piece of data in order to check ## it against known intelligence for matches. - global seen_in_conn: function(c: connection, seen: Seen); + global seen: function(s: Seen); ## Intelligence policy variable for handling matches. - const policy: set[PolicyItem] = {} &redef; + const policy: set[PolicyItem] = { + # [$pred(s: Seen) = { return T; }, + # $action=Intel::ACTION_LOG] + } &redef; ## API Events that indicate when various things happen internally within the ## intelligence framework. @@ -94,34 +106,40 @@ export { global updated_item: event(item: Item); } -## Event to represent a match happening in a connection. On clusters there -## is no assurance as to where this event will be generated so don't -## assume that arbitrary global state beyond the given data -## will be available. -global match_in_conn: event(c: connection, seen: Seen, items: set[Item]); +# Event to represent a match happening in a connection. On clusters there +# is no assurance as to where this event will be generated so don't +# assume that arbitrary global state beyond the given data +# will be available. +global match: event(s: Seen, items: set[Item]); # Internal handler for conn oriented matches with no metadata based on the have_full_data setting. -global match_in_conn_no_items: event(c: connection, seen: Seen); +global match_no_items: event(s: Seen); -## Optionally store metadata. This is used internally depending on -## if this is a cluster deployment or not. +# Optionally store metadata. This is used internally depending on +# if this is a cluster deployment or not. const have_full_data = T &redef; +# The in memory data structure for holding intelligence. type DataStore: record { net_data: table[subnet] of set[MetaData]; string_data: table[string, StrType] of set[MetaData]; }; global data_store: DataStore; -function find(seen: Seen): bool +event bro_init() &priority=5 { - if ( seen?$host && - seen$host in data_store$net_data ) + Log::create_stream(LOG, [$columns=Info]); + } + +function find(s: Seen): bool + { + if ( s?$host && + s$host in data_store$net_data ) { return T; } - else if ( seen?$str && seen?$str_type && - [seen$str, seen$str_type] in data_store$string_data ) + else if ( s?$str && s?$str_type && + [s$str, s$str_type] in data_store$string_data ) { return T; } @@ -131,7 +149,7 @@ function find(seen: Seen): bool } } -function get_items(seen: Seen): set[Item] +function get_items(s: Seen): set[Item] { local item: Item; local return_data: set[Item] = set(); @@ -144,28 +162,28 @@ function get_items(seen: Seen): set[Item] return return_data; } - if ( seen?$host ) + if ( s?$host ) { # See if the host is known about and it has meta values - if ( seen$host in data_store$net_data ) + if ( s$host in data_store$net_data ) { - for ( m in data_store$net_data[seen$host] ) + for ( m in data_store$net_data[s$host] ) { # TODO: the lookup should be finding all and not just most specific # and $host/$net should have the correct value. - item = [$host=seen$host, $meta=m]; + item = [$host=s$host, $meta=m]; add return_data[item]; } } } - else if ( seen?$str && seen?$str_type ) + else if ( s?$str && s?$str_type ) { # See if the string is known about and it has meta values - if ( [seen$str, seen$str_type] in data_store$string_data ) + if ( [s$str, s$str_type] in data_store$string_data ) { - for ( m in data_store$string_data[seen$str, seen$str_type] ) + for ( m in data_store$string_data[s$str, s$str_type] ) { - item = [$str=seen$str, $str_type=seen$str_type, $meta=m]; + item = [$str=s$str, $str_type=s$str_type, $meta=m]; add return_data[item]; } } @@ -174,18 +192,31 @@ function get_items(seen: Seen): set[Item] return return_data; } -function Intel::seen_in_conn(c: connection, seen: Seen) +event Intel::match(s: Seen, items: set[Item]) { - if ( find(seen) ) + local info: Info = [$ts=network_time(), $seen=s]; + + if ( s?$conn ) + { + info$uid = s$conn$uid; + info$id = s$conn$id; + } + + Log::write(Intel::LOG, info); + } + +function Intel::seen(s: Seen) + { + if ( find(s) ) { if ( have_full_data ) { - local items = get_items(seen); - event Intel::match_in_conn(c, seen, items); + local items = get_items(s); + event Intel::match(s, items); } else { - event Intel::match_in_conn_no_items(c, seen); + event Intel::match_no_items(s); } } } diff --git a/scripts/policy/frameworks/intel/__load__.bro b/scripts/policy/frameworks/intel/__load__.bro new file mode 100644 index 0000000000..5eead37872 --- /dev/null +++ b/scripts/policy/frameworks/intel/__load__.bro @@ -0,0 +1,6 @@ +@load ./conn-established +@load ./dns +@load ./http-host-header +@load ./http-url +@load ./http-user-agent +@load ./ssl \ No newline at end of file diff --git a/scripts/policy/frameworks/intel/conn-established.bro b/scripts/policy/frameworks/intel/conn-established.bro new file mode 100644 index 0000000000..7d0007d20f --- /dev/null +++ b/scripts/policy/frameworks/intel/conn-established.bro @@ -0,0 +1,14 @@ +@load base/frameworks/intel + +export { + redef enum Intel::Where += { + Conn::IN_ORIG, + Conn::IN_RESP, + }; +} + +event connection_established(c: connection) + { + Intel::seen([$host=c$id$orig_h, $conn=c, $where=Conn::IN_ORIG]); + Intel::seen([$host=c$id$resp_h, $conn=c, $where=Conn::IN_RESP]); + } diff --git a/scripts/policy/frameworks/intel/dns.bro b/scripts/policy/frameworks/intel/dns.bro new file mode 100644 index 0000000000..3e2078b29b --- /dev/null +++ b/scripts/policy/frameworks/intel/dns.bro @@ -0,0 +1,16 @@ +@load base/frameworks/intel + +export { + redef enum Intel::Where += { + DNS::IN_REQUEST, + DNS::IN_RESPONSE, + }; +} + +event dns_request(c: connection, msg: dns_msg, query: string, qtype: count, qclass: count) + { + Intel::seen([$str=query, + $str_type=Intel::DOMAIN, + $conn=c, + $where=DNS::IN_REQUEST]); + } diff --git a/scripts/policy/frameworks/intel/http-host-header.bro b/scripts/policy/frameworks/intel/http-host-header.bro new file mode 100644 index 0000000000..590f1f1e3e --- /dev/null +++ b/scripts/policy/frameworks/intel/http-host-header.bro @@ -0,0 +1,16 @@ +@load base/frameworks/intel + +export { + redef enum Intel::Where += { + HTTP::IN_HOST_HEADER, + }; +} + +event http_header(c: connection, is_orig: bool, name: string, value: string) + { + if ( is_orig && name == "HOST" ) + Intel::seen([$str=value, + $str_type=Intel::DOMAIN, + $conn=c, + $where=HTTP::IN_HOST_HEADER]); + } diff --git a/scripts/policy/frameworks/intel/http-url.bro b/scripts/policy/frameworks/intel/http-url.bro new file mode 100644 index 0000000000..d5013b3252 --- /dev/null +++ b/scripts/policy/frameworks/intel/http-url.bro @@ -0,0 +1,16 @@ +@load base/frameworks/intel + +export { + redef enum Intel::Where += { + HTTP::IN_URL, + }; +} + +event http_message_done(c: connection, is_orig: bool, stat: http_message_stat) + { + if ( is_orig && c?$http ) + Intel::seen([$str=HTTP::build_url(c$http), + $str_type=Intel::URL, + $conn=c, + $where=HTTP::IN_URL]); + } diff --git a/scripts/policy/frameworks/intel/http-user-agent.bro b/scripts/policy/frameworks/intel/http-user-agent.bro new file mode 100644 index 0000000000..4a4570f817 --- /dev/null +++ b/scripts/policy/frameworks/intel/http-user-agent.bro @@ -0,0 +1,16 @@ +@load base/frameworks/intel + +export { + redef enum Intel::Where += { + HTTP::IN_USER_AGENT_HEADER, + }; +} + +event http_header(c: connection, is_orig: bool, name: string, value: string) + { + if ( is_orig && name == "USER-AGENT" ) + Intel::seen([$str=value, + $str_type=Intel::USER_AGENT, + $conn=c, + $where=HTTP::IN_USER_AGENT_HEADER]); + } diff --git a/scripts/policy/frameworks/intel/ssl.bro b/scripts/policy/frameworks/intel/ssl.bro new file mode 100644 index 0000000000..9a27e40c46 --- /dev/null +++ b/scripts/policy/frameworks/intel/ssl.bro @@ -0,0 +1,41 @@ +@load base/frameworks/intel + +export { + redef enum Intel::Where += { + SSL::IN_SERVER_CERT, + SSL::IN_CLIENT_CERT, + SSL::IN_SERVER_NAME, + }; +} + + +event x509_certificate(c: connection, is_orig: bool, cert: X509, chain_idx: count, chain_len: count, der_cert: string) + { + if ( chain_idx == 0 ) + { + if ( /emailAddress=/ in cert$subject ) + { + local email = sub(cert$subject, /^.*emailAddress=/, ""); + email = sub(email, /,.*$/, ""); + Intel::seen([$str=email, + $str_type=Intel::EMAIL, + $conn=c, + $where=(is_orig ? SSL::IN_CLIENT_CERT : SSL::IN_SERVER_CERT)]); + } + + Intel::seen([$str=sha1_hash(der_cert), + $str_type=Intel::CERT_HASH, + $conn=c, + $where=(is_orig ? SSL::IN_CLIENT_CERT : SSL::IN_SERVER_CERT)]); + } + } + +event ssl_extension(c: connection, is_orig: bool, code: count, val: string) + { + if ( is_orig && SSL::extensions[code] == "server_name" && + c?$ssl && c$ssl?$server_name ) + Intel::seen([$str=c$ssl$server_name, + $str_type=Intel::DOMAIN, + $conn=c, + $where=SSL::IN_SERVER_NAME]); + } From bd6779b83c448a42b9e302535a530f28802cab5f Mon Sep 17 00:00:00 2001 From: Seth Hall Date: Fri, 28 Sep 2012 09:49:00 -0400 Subject: [PATCH 05/22] Fixed a cluster support bug. --- scripts/base/frameworks/intel/cluster.bro | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/base/frameworks/intel/cluster.bro b/scripts/base/frameworks/intel/cluster.bro index 4d8885c749..fa6f129a76 100644 --- a/scripts/base/frameworks/intel/cluster.bro +++ b/scripts/base/frameworks/intel/cluster.bro @@ -28,7 +28,7 @@ redef Cluster::worker2manager_events += /^Intel::(cluster_.*|match_no_items)/; @if ( Cluster::local_node_type() == Cluster::MANAGER ) event Intel::match_no_items(s: Seen) &priority=5 { - event Intel::match(c, s, Intel::get_items(s)); + event Intel::match(s, Intel::get_items(s)); } @endif From 2366b21787927405029db962e3503be39953d42e Mon Sep 17 00:00:00 2001 From: Seth Hall Date: Fri, 28 Sep 2012 13:25:06 -0400 Subject: [PATCH 06/22] One test for cluster transparency of the intel framework. --- scripts/base/frameworks/intel/cluster.bro | 4 +- scripts/base/frameworks/intel/main.bro | 46 ++++++++------ scripts/policy/frameworks/intel/__load__.bro | 2 +- .../frameworks/intel/http-user-agent.bro | 16 ----- .../manager-1..stdout | 9 +-- .../worker-1..stdout | 10 +-- .../worker-2..stdout | 11 ++-- .../frameworks/intel/cluster-transparency.bro | 62 +++++++++++++------ .../base/frameworks/intel/dns-zone-plugin.bro | 6 +- 9 files changed, 86 insertions(+), 80 deletions(-) delete mode 100644 scripts/policy/frameworks/intel/http-user-agent.bro diff --git a/scripts/base/frameworks/intel/cluster.bro b/scripts/base/frameworks/intel/cluster.bro index fa6f129a76..ec1601fed3 100644 --- a/scripts/base/frameworks/intel/cluster.bro +++ b/scripts/base/frameworks/intel/cluster.bro @@ -21,9 +21,9 @@ redef record Item += { }; # Primary intelligence distribution comes from manager. -redef Cluster::manager2worker_events += /^Intel::cluster_.*/; +redef Cluster::manager2worker_events += /^Intel::cluster_.*$/; # If a worker finds intelligence and adds it, it should share it back to the manager. -redef Cluster::worker2manager_events += /^Intel::(cluster_.*|match_no_items)/; +redef Cluster::worker2manager_events += /^Intel::(cluster_.*|match_no_items)$/; @if ( Cluster::local_node_type() == Cluster::MANAGER ) event Intel::match_no_items(s: Seen) &priority=5 diff --git a/scripts/base/frameworks/intel/main.bro b/scripts/base/frameworks/intel/main.bro index 94d26362c0..74dd757642 100644 --- a/scripts/base/frameworks/intel/main.bro +++ b/scripts/base/frameworks/intel/main.bro @@ -88,7 +88,7 @@ export { }; ## Intelligence data manipulation functions. - global insert: function(item: Item); + global insert: function(item: Item); ## Function to declare discovery of a piece of data in order to check ## it against known intelligence for matches. @@ -192,21 +192,16 @@ function get_items(s: Seen): set[Item] return return_data; } -event Intel::match(s: Seen, items: set[Item]) - { - local info: Info = [$ts=network_time(), $seen=s]; - if ( s?$conn ) - { - info$uid = s$conn$uid; - info$id = s$conn$id; - } - - Log::write(Intel::LOG, info); - } +#global total_seen=0; +#event bro_done() +# { +# print fmt("total seen: %d", total_seen); +# } function Intel::seen(s: Seen) { + #++total_seen; if ( find(s) ) { if ( have_full_data ) @@ -235,6 +230,19 @@ function has_meta(check: MetaData, metas: set[MetaData]): bool return F; } +event Intel::match(s: Seen, items: set[Item]) + { + local info: Info = [$ts=network_time(), $seen=s]; + + if ( s?$conn ) + { + info$uid = s$conn$uid; + info$id = s$conn$id; + } + + Log::write(Intel::LOG, info); + } + function insert(item: Item) { if ( item?$str && !item?$str_type ) @@ -270,6 +278,7 @@ function insert(item: Item) metas = data_store$string_data[item$str, item$str_type]; } + local updated = F; if ( have_full_data ) { for ( m in metas ) @@ -284,17 +293,16 @@ function insert(item: Item) else { # Same source, different metadata means updated item. - event Intel::updated_item(item); - break; + updated = T; } } - else - { - event Intel::new_item(item); - break; - } } add metas[item$meta]; } + + if ( updated ) + event Intel::updated_item(item); + else + event Intel::new_item(item); } \ No newline at end of file diff --git a/scripts/policy/frameworks/intel/__load__.bro b/scripts/policy/frameworks/intel/__load__.bro index 5eead37872..ffb8263afd 100644 --- a/scripts/policy/frameworks/intel/__load__.bro +++ b/scripts/policy/frameworks/intel/__load__.bro @@ -2,5 +2,5 @@ @load ./dns @load ./http-host-header @load ./http-url -@load ./http-user-agent +@load ./user-agents @load ./ssl \ No newline at end of file diff --git a/scripts/policy/frameworks/intel/http-user-agent.bro b/scripts/policy/frameworks/intel/http-user-agent.bro deleted file mode 100644 index 4a4570f817..0000000000 --- a/scripts/policy/frameworks/intel/http-user-agent.bro +++ /dev/null @@ -1,16 +0,0 @@ -@load base/frameworks/intel - -export { - redef enum Intel::Where += { - HTTP::IN_USER_AGENT_HEADER, - }; -} - -event http_header(c: connection, is_orig: bool, name: string, value: string) - { - if ( is_orig && name == "USER-AGENT" ) - Intel::seen([$str=value, - $str_type=Intel::USER_AGENT, - $conn=c, - $where=HTTP::IN_USER_AGENT_HEADER]); - } diff --git a/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/manager-1..stdout b/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/manager-1..stdout index 59d996c821..9e60d6faf2 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/manager-1..stdout +++ b/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/manager-1..stdout @@ -1,7 +1,2 @@ -1.2.3.4 -{ -b, -c, -a -} -foobar +cluster_new_item: 123.123.123.123 from source worker-1 (from peer: worker-1) +cluster_new_item: 4.3.2.1 from source worker-2 (from peer: worker-2) diff --git a/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/worker-1..stdout b/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/worker-1..stdout index 59d996c821..02fdcb8cd3 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/worker-1..stdout +++ b/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/worker-1..stdout @@ -1,7 +1,3 @@ -1.2.3.4 -{ -b, -c, -a -} -foobar +cluster_new_item: 1.2.3.4 from source manager (from peer: manager-1) +cluster_new_item: 123.123.123.123 from source worker-1 (from peer: manager-1) +cluster_new_item: 4.3.2.1 from source worker-2 (from peer: manager-1) diff --git a/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/worker-2..stdout b/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/worker-2..stdout index 59d996c821..4146be9475 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/worker-2..stdout +++ b/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/worker-2..stdout @@ -1,7 +1,4 @@ -1.2.3.4 -{ -b, -c, -a -} -foobar +cluster_new_item: 1.2.3.4 from source manager (from peer: manager-1) +cluster_new_item: 123.123.123.123 from source worker-1 (from peer: manager-1) +cluster_new_item: 4.3.2.1 from source worker-2 (from peer: manager-1) +terminating! diff --git a/testing/btest/scripts/base/frameworks/intel/cluster-transparency.bro b/testing/btest/scripts/base/frameworks/intel/cluster-transparency.bro index 3c21946938..22d948e270 100644 --- a/testing/btest/scripts/base/frameworks/intel/cluster-transparency.bro +++ b/testing/btest/scripts/base/frameworks/intel/cluster-transparency.bro @@ -3,7 +3,7 @@ # @TEST-EXEC: btest-bg-run manager-1 BROPATH=$BROPATH:.. CLUSTER_NODE=manager-1 bro %INPUT # @TEST-EXEC: btest-bg-run worker-1 BROPATH=$BROPATH:.. CLUSTER_NODE=worker-1 bro %INPUT # @TEST-EXEC: btest-bg-run worker-2 BROPATH=$BROPATH:.. CLUSTER_NODE=worker-2 bro %INPUT -# @TEST-EXEC: btest-bg-wait -k 3 +# @TEST-EXEC: btest-bg-wait -k 10 # @TEST-EXEC: btest-diff manager-1/.stdout # @TEST-EXEC: btest-diff worker-1/.stdout # @TEST-EXEC: btest-diff worker-2/.stdout @@ -11,34 +11,60 @@ @TEST-START-FILE cluster-layout.bro redef Cluster::nodes = { ["manager-1"] = [$node_type=Cluster::MANAGER, $ip=127.0.0.1, $p=37757/tcp, $workers=set("worker-1", "worker-2")], - ["worker-1"] = [$node_type=Cluster::WORKER, $ip=127.0.0.1, $p=37760/tcp, $manager="manager-1",$interface="eth0"], - ["worker-2"] = [$node_type=Cluster::WORKER, $ip=127.0.0.1, $p=37761/tcp, $manager="manager-1",$interface="eth1"], + ["worker-1"] = [$node_type=Cluster::WORKER, $ip=127.0.0.1, $p=37760/tcp, $manager="manager-1"], + ["worker-2"] = [$node_type=Cluster::WORKER, $ip=127.0.0.1, $p=37761/tcp, $manager="manager-1"], }; @TEST-END-FILE +@load base/frameworks/control + +module Intel; + event remote_connection_handshake_done(p: event_peer) { # Insert the data once both workers are connected. if ( Cluster::local_node_type() == Cluster::MANAGER && Cluster::worker_count == 2 ) { - Intel::insert([$ip=1.2.3.4,$meta=[$source="foobar", $class=Intel::MALICIOUS, $tags=set("a","b","c")]]); + Intel::insert([$host=1.2.3.4,$meta=[$source="manager"]]); + } + } + +global worker2_data = 0; +global sent_data = F; +event Intel::cluster_new_item(item: Intel::Item) + { + if ( ! is_remote_event() ) + return; + + print fmt("cluster_new_item: %s from source %s (from peer: %s)", item$host, item$meta$source, get_event_peer()$descr); + + if ( ! sent_data ) + { + # We wait to insert data here because we can now be sure the + # full cluster is constructed. + sent_data = T; + if ( Cluster::node == "worker-1" ) + Intel::insert([$host=123.123.123.123,$meta=[$source="worker-1"]]); + if ( Cluster::node == "worker-2" ) + Intel::insert([$host=4.3.2.1,$meta=[$source="worker-2"]]); + } + + # We're forcing worker-2 to die first when it has three intelligence items + # which were distributed over the cluster (data inserted locally is resent). + if ( Cluster::node == "worker-2" ) + { + ++worker2_data; + if ( worker2_data == 3 ) + { + print "terminating!"; + event Control::shutdown_request(); + } } } event remote_connection_closed(p: event_peer) { - if ( Cluster::local_node_type() == Cluster::MANAGER && Cluster::worker_count == 0 ) - terminate_communication(); - } - -# This should print out a single time on the manager and each worker -# due to the cluster transparency. -event Intel::new_item(item: Intel::Item) - { - print item$ip; - print item$meta$tags; - print item$meta$source; - - if ( Cluster::local_node_type() == Cluster::WORKER ) - terminate_communication(); + # Cascading termination + #print fmt("disconnected from: %s", p); + terminate_communication(); } \ No newline at end of file diff --git a/testing/btest/scripts/base/frameworks/intel/dns-zone-plugin.bro b/testing/btest/scripts/base/frameworks/intel/dns-zone-plugin.bro index 8bcbc0ec7b..6e7b3d2c70 100644 --- a/testing/btest/scripts/base/frameworks/intel/dns-zone-plugin.bro +++ b/testing/btest/scripts/base/frameworks/intel/dns-zone-plugin.bro @@ -4,11 +4,11 @@ event bro_init() { Intel::insert([$str="bad.com", $subtype=Intel::DNS_ZONE, $meta=[$source="src1", $class=Intel::MALICIOUS]]); - local query: Intel::Query = [$str="some.host.bad.com", $subtype=Intel::DOMAIN, $class=Intel::MALICIOUS]; - if ( Intel::matcher(query) ) + local q: Intel::Query = [$str="some.host.bad.com", $subtype=Intel::DOMAIN, $class=Intel::MALICIOUS]; + if ( Intel::query(q) ) { print "It matched!"; - local items = Intel::lookup(query); + local items = Intel::lookup(q); for ( item in items ) { print item$str; From 0bcedcd204b4c305b62e42ebfd628ef4447b8f50 Mon Sep 17 00:00:00 2001 From: Seth Hall Date: Fri, 28 Sep 2012 13:25:37 -0400 Subject: [PATCH 07/22] Restructuring the scripts that feed data into the intel framework slightly. --- .../frameworks/intel/conn-established.bro | 8 +------ scripts/policy/frameworks/intel/dns.bro | 8 +------ .../frameworks/intel/http-host-header.bro | 7 +----- scripts/policy/frameworks/intel/http-url.bro | 7 +----- scripts/policy/frameworks/intel/ssl.bro | 10 +-------- .../policy/frameworks/intel/user-agents.bro | 20 +++++++++++++++++ .../frameworks/intel/where-locations.bro | 22 +++++++++++++++++++ 7 files changed, 47 insertions(+), 35 deletions(-) create mode 100644 scripts/policy/frameworks/intel/user-agents.bro create mode 100644 scripts/policy/frameworks/intel/where-locations.bro diff --git a/scripts/policy/frameworks/intel/conn-established.bro b/scripts/policy/frameworks/intel/conn-established.bro index 7d0007d20f..a2e67b292b 100644 --- a/scripts/policy/frameworks/intel/conn-established.bro +++ b/scripts/policy/frameworks/intel/conn-established.bro @@ -1,11 +1,5 @@ @load base/frameworks/intel - -export { - redef enum Intel::Where += { - Conn::IN_ORIG, - Conn::IN_RESP, - }; -} +@load ./where-locations event connection_established(c: connection) { diff --git a/scripts/policy/frameworks/intel/dns.bro b/scripts/policy/frameworks/intel/dns.bro index 3e2078b29b..a0dee47acf 100644 --- a/scripts/policy/frameworks/intel/dns.bro +++ b/scripts/policy/frameworks/intel/dns.bro @@ -1,11 +1,5 @@ @load base/frameworks/intel - -export { - redef enum Intel::Where += { - DNS::IN_REQUEST, - DNS::IN_RESPONSE, - }; -} +@load ./where-locations event dns_request(c: connection, msg: dns_msg, query: string, qtype: count, qclass: count) { diff --git a/scripts/policy/frameworks/intel/http-host-header.bro b/scripts/policy/frameworks/intel/http-host-header.bro index 590f1f1e3e..f16b1628aa 100644 --- a/scripts/policy/frameworks/intel/http-host-header.bro +++ b/scripts/policy/frameworks/intel/http-host-header.bro @@ -1,10 +1,5 @@ @load base/frameworks/intel - -export { - redef enum Intel::Where += { - HTTP::IN_HOST_HEADER, - }; -} +@load ./where-locations event http_header(c: connection, is_orig: bool, name: string, value: string) { diff --git a/scripts/policy/frameworks/intel/http-url.bro b/scripts/policy/frameworks/intel/http-url.bro index d5013b3252..7c4086a7e6 100644 --- a/scripts/policy/frameworks/intel/http-url.bro +++ b/scripts/policy/frameworks/intel/http-url.bro @@ -1,10 +1,5 @@ @load base/frameworks/intel - -export { - redef enum Intel::Where += { - HTTP::IN_URL, - }; -} +@load ./where-locations event http_message_done(c: connection, is_orig: bool, stat: http_message_stat) { diff --git a/scripts/policy/frameworks/intel/ssl.bro b/scripts/policy/frameworks/intel/ssl.bro index 9a27e40c46..394df63020 100644 --- a/scripts/policy/frameworks/intel/ssl.bro +++ b/scripts/policy/frameworks/intel/ssl.bro @@ -1,13 +1,5 @@ @load base/frameworks/intel - -export { - redef enum Intel::Where += { - SSL::IN_SERVER_CERT, - SSL::IN_CLIENT_CERT, - SSL::IN_SERVER_NAME, - }; -} - +@load ./where-locations event x509_certificate(c: connection, is_orig: bool, cert: X509, chain_idx: count, chain_len: count, der_cert: string) { diff --git a/scripts/policy/frameworks/intel/user-agents.bro b/scripts/policy/frameworks/intel/user-agents.bro new file mode 100644 index 0000000000..693738a431 --- /dev/null +++ b/scripts/policy/frameworks/intel/user-agents.bro @@ -0,0 +1,20 @@ +@load base/frameworks/intel +@load ./where-locations + +event http_header(c: connection, is_orig: bool, name: string, value: string) + { + if ( is_orig && name == "USER-AGENT" ) + Intel::seen([$str=value, + $str_type=Intel::USER_AGENT, + $conn=c, + $where=HTTP::IN_USER_AGENT_HEADER]); + } + +event mime_end_entity(c: connection) + { + if ( c?$smtp && c$smtp?$user_agent ) + Intel::seen([$str=c$smtp$user_agent, + $str_type=Intel::USER_AGENT, + $conn=c, + $where=SMTP::IN_HEADER]); + } diff --git a/scripts/policy/frameworks/intel/where-locations.bro b/scripts/policy/frameworks/intel/where-locations.bro new file mode 100644 index 0000000000..e6faec6809 --- /dev/null +++ b/scripts/policy/frameworks/intel/where-locations.bro @@ -0,0 +1,22 @@ +@load base/frameworks/intel + +export { + redef enum Intel::Where += { + Conn::IN_ORIG, + Conn::IN_RESP, + DNS::IN_REQUEST, + DNS::IN_RESPONSE, + HTTP::IN_HOST_HEADER, + HTTP::IN_USER_AGENT_HEADER, + HTTP::IN_URL, + SMTP::IN_MAIL_FROM, + SMTP::IN_RCPT_TO, + SMTP::IN_FROM, + SMTP::IN_TO, + SMTP::IN_CC, + SSL::IN_SERVER_CERT, + SSL::IN_CLIENT_CERT, + SSL::IN_SERVER_NAME, + SMTP::IN_HEADER, + }; +} From 38f0b7927c94a636c820c1855b3b7a8e0e3ae356 Mon Sep 17 00:00:00 2001 From: Seth Hall Date: Fri, 28 Sep 2012 14:52:05 -0400 Subject: [PATCH 08/22] Updated intel framework test to include matching. --- scripts/base/frameworks/intel/main.bro | 9 +++++---- .../manager-1.intel.log | 10 ++++++++++ .../worker-2..stdout | 2 +- .../frameworks/intel/cluster-transparency.bro | 16 +++++++++++++--- .../base/frameworks/intel/dns-zone-plugin.bro | 18 ------------------ 5 files changed, 29 insertions(+), 26 deletions(-) create mode 100644 testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/manager-1.intel.log delete mode 100644 testing/btest/scripts/base/frameworks/intel/dns-zone-plugin.bro diff --git a/scripts/base/frameworks/intel/main.bro b/scripts/base/frameworks/intel/main.bro index 74dd757642..465c98f644 100644 --- a/scripts/base/frameworks/intel/main.bro +++ b/scripts/base/frameworks/intel/main.bro @@ -18,7 +18,7 @@ export { ## String data needs to be further categoried since it could represent ## and number of types of data. type StrType: enum { - ## A complete URL. + ## A complete URL without the prefix "http://". URL, ## User-Agent string, typically HTTP or mail message body. USER_AGENT, @@ -59,7 +59,7 @@ export { ## Enum to represent where data came from when it was discovered. type Where: enum { ## A catchall value to represent data of unknown provenance. - ANYWHERE, + IN_ANYWHERE, }; type Seen: record { @@ -104,6 +104,8 @@ export { ## intelligence framework. global new_item: event(item: Item); global updated_item: event(item: Item); + + global log_intel: event(rec: Info); } # Event to represent a match happening in a connection. On clusters there @@ -128,7 +130,7 @@ global data_store: DataStore; event bro_init() &priority=5 { - Log::create_stream(LOG, [$columns=Info]); + Log::create_stream(LOG, [$columns=Info, $ev=log_intel]); } function find(s: Seen): bool @@ -192,7 +194,6 @@ function get_items(s: Seen): set[Item] return return_data; } - #global total_seen=0; #event bro_done() # { diff --git a/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/manager-1.intel.log b/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/manager-1.intel.log new file mode 100644 index 0000000000..1af88a5929 --- /dev/null +++ b/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/manager-1.intel.log @@ -0,0 +1,10 @@ +#separator \x09 +#set_separator , +#empty_field (empty) +#unset_field - +#path intel +#open 2012-09-28-18-50-43 +#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p seen.host seen.str seen.str_type seen.where +#types time string addr port addr port addr string enum enum +1348858243.346443 - - - - - 123.123.123.123 - - Intel::IN_ANYWHERE +#close 2012-09-28-18-50-53 diff --git a/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/worker-2..stdout b/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/worker-2..stdout index 4146be9475..168b65a52d 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/worker-2..stdout +++ b/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/worker-2..stdout @@ -1,4 +1,4 @@ cluster_new_item: 1.2.3.4 from source manager (from peer: manager-1) cluster_new_item: 123.123.123.123 from source worker-1 (from peer: manager-1) cluster_new_item: 4.3.2.1 from source worker-2 (from peer: manager-1) -terminating! +Doing a lookup diff --git a/testing/btest/scripts/base/frameworks/intel/cluster-transparency.bro b/testing/btest/scripts/base/frameworks/intel/cluster-transparency.bro index 22d948e270..f50462746b 100644 --- a/testing/btest/scripts/base/frameworks/intel/cluster-transparency.bro +++ b/testing/btest/scripts/base/frameworks/intel/cluster-transparency.bro @@ -5,6 +5,7 @@ # @TEST-EXEC: btest-bg-run worker-2 BROPATH=$BROPATH:.. CLUSTER_NODE=worker-2 bro %INPUT # @TEST-EXEC: btest-bg-wait -k 10 # @TEST-EXEC: btest-diff manager-1/.stdout +# @TEST-EXEC: btest-diff manager-1/intel.log # @TEST-EXEC: btest-diff worker-1/.stdout # @TEST-EXEC: btest-diff worker-2/.stdout @@ -20,6 +21,8 @@ redef Cluster::nodes = { module Intel; +redef Log::default_rotation_interval=0sec; + event remote_connection_handshake_done(p: event_peer) { # Insert the data once both workers are connected. @@ -49,19 +52,26 @@ event Intel::cluster_new_item(item: Intel::Item) Intel::insert([$host=4.3.2.1,$meta=[$source="worker-2"]]); } - # We're forcing worker-2 to die first when it has three intelligence items + # We're forcing worker-2 to do a lookup when it has three intelligence items # which were distributed over the cluster (data inserted locally is resent). if ( Cluster::node == "worker-2" ) { ++worker2_data; if ( worker2_data == 3 ) { - print "terminating!"; - event Control::shutdown_request(); + # Now that everything is inserted, see if we can match on the data inserted + # by worker-1. + print "Doing a lookup"; + Intel::seen([$host=123.123.123.123, $where=Intel::IN_ANYWHERE]); } } } +event Intel::log_intel(rec: Intel::Info) + { + event Control::shutdown_request(); + } + event remote_connection_closed(p: event_peer) { # Cascading termination diff --git a/testing/btest/scripts/base/frameworks/intel/dns-zone-plugin.bro b/testing/btest/scripts/base/frameworks/intel/dns-zone-plugin.bro deleted file mode 100644 index 6e7b3d2c70..0000000000 --- a/testing/btest/scripts/base/frameworks/intel/dns-zone-plugin.bro +++ /dev/null @@ -1,18 +0,0 @@ -# @TEST-EXEC: bro %INPUT >out -# @TEST-EXEC: btest-diff out - -event bro_init() - { - Intel::insert([$str="bad.com", $subtype=Intel::DNS_ZONE, $meta=[$source="src1", $class=Intel::MALICIOUS]]); - local q: Intel::Query = [$str="some.host.bad.com", $subtype=Intel::DOMAIN, $class=Intel::MALICIOUS]; - if ( Intel::query(q) ) - { - print "It matched!"; - local items = Intel::lookup(q); - for ( item in items ) - { - print item$str; - print item$subtype; - } - } - } From 03532ff051273e5992abfd5e396870c9baa37e23 Mon Sep 17 00:00:00 2001 From: Seth Hall Date: Fri, 28 Sep 2012 16:38:34 -0400 Subject: [PATCH 09/22] Fixing a problem with intel distribution on clusters. --- scripts/base/frameworks/intel/__load__.bro | 1 + scripts/base/frameworks/intel/cluster.bro | 21 +++++++++++++++++++++ scripts/base/frameworks/intel/main.bro | 2 +- 3 files changed, 23 insertions(+), 1 deletion(-) diff --git a/scripts/base/frameworks/intel/__load__.bro b/scripts/base/frameworks/intel/__load__.bro index 806159d938..08d0f62222 100644 --- a/scripts/base/frameworks/intel/__load__.bro +++ b/scripts/base/frameworks/intel/__load__.bro @@ -3,6 +3,7 @@ # The cluster framework must be loaded first. @load base/frameworks/cluster + @if ( Cluster::is_enabled() ) @load ./cluster @endif diff --git a/scripts/base/frameworks/intel/cluster.bro b/scripts/base/frameworks/intel/cluster.bro index ec1601fed3..de6e0dc6be 100644 --- a/scripts/base/frameworks/intel/cluster.bro +++ b/scripts/base/frameworks/intel/cluster.bro @@ -2,6 +2,7 @@ ##! toward distributing intelligence information across clusters. @load base/frameworks/cluster +@load ./input module Intel; @@ -25,11 +26,31 @@ redef Cluster::manager2worker_events += /^Intel::cluster_.*$/; # If a worker finds intelligence and adds it, it should share it back to the manager. redef Cluster::worker2manager_events += /^Intel::(cluster_.*|match_no_items)$/; +@if ( Cluster::local_node_type() != Cluster::MANAGER ) +redef Intel::data_store &synchronized; +@endif + @if ( Cluster::local_node_type() == Cluster::MANAGER ) event Intel::match_no_items(s: Seen) &priority=5 { event Intel::match(s, Intel::get_items(s)); } + +global initial_sync = F; +event remote_connection_handshake_done(p: event_peer) + { + # Insert the data once something is connected. + # This should only push the data to a single host where the + # normal Bro synchronization should take over. + if ( ! initial_sync ) + { + initial_sync = T; + for ( net in data_store$net_data ) + event Intel::cluster_new_item([$net=net, $meta=[$source=""]]); + for ( [str, str_type] in data_store$string_data ) + event Intel::cluster_new_item([$str=str, $str_type=str_type, $meta=[$source=""]]); + } + } @endif event Intel::cluster_new_item(item: Intel::Item) &priority=5 diff --git a/scripts/base/frameworks/intel/main.bro b/scripts/base/frameworks/intel/main.bro index 465c98f644..b169592a06 100644 --- a/scripts/base/frameworks/intel/main.bro +++ b/scripts/base/frameworks/intel/main.bro @@ -126,7 +126,7 @@ type DataStore: record { net_data: table[subnet] of set[MetaData]; string_data: table[string, StrType] of set[MetaData]; }; -global data_store: DataStore; +global data_store: DataStore &redef; event bro_init() &priority=5 { From bacc99f64580c0b537a8502282055e97d5379970 Mon Sep 17 00:00:00 2001 From: Seth Hall Date: Fri, 28 Sep 2012 21:17:14 -0400 Subject: [PATCH 10/22] Added sources to the intel log. --- scripts/base/frameworks/intel/main.bro | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/scripts/base/frameworks/intel/main.bro b/scripts/base/frameworks/intel/main.bro index b169592a06..9cde7458de 100644 --- a/scripts/base/frameworks/intel/main.bro +++ b/scripts/base/frameworks/intel/main.bro @@ -73,12 +73,13 @@ export { }; type Info: record { - ts: time &log; + ts: time &log; - uid: string &log &optional; - id: conn_id &log &optional; + uid: string &log &optional; + id: conn_id &log &optional; - seen: Seen &log; + seen: Seen &log; + sources: set[string] &log; }; type PolicyItem: record { @@ -233,7 +234,8 @@ function has_meta(check: MetaData, metas: set[MetaData]): bool event Intel::match(s: Seen, items: set[Item]) { - local info: Info = [$ts=network_time(), $seen=s]; + local empty_set: set[string] = set(); + local info: Info = [$ts=network_time(), $seen=s, $sources=empty_set]; if ( s?$conn ) { @@ -241,6 +243,9 @@ event Intel::match(s: Seen, items: set[Item]) info$id = s$conn$id; } + for ( item in items ) + add info$sources[item$meta$source]; + Log::write(Intel::LOG, info); } From 375769f42cc81900aa615df46b2405def4947dd8 Mon Sep 17 00:00:00 2001 From: Seth Hall Date: Fri, 28 Sep 2012 22:07:40 -0400 Subject: [PATCH 11/22] Added intelligence check for "Received" path checking and a bit of reshuffling. --- scripts/policy/frameworks/intel/__load__.bro | 5 ++-- .../{user-agents.bro => http-user-agents.bro} | 8 ------ scripts/policy/frameworks/intel/smtp.bro | 25 +++++++++++++++++++ .../frameworks/intel/where-locations.bro | 1 + 4 files changed, 29 insertions(+), 10 deletions(-) rename scripts/policy/frameworks/intel/{user-agents.bro => http-user-agents.bro} (58%) create mode 100644 scripts/policy/frameworks/intel/smtp.bro diff --git a/scripts/policy/frameworks/intel/__load__.bro b/scripts/policy/frameworks/intel/__load__.bro index ffb8263afd..b5bd74a06d 100644 --- a/scripts/policy/frameworks/intel/__load__.bro +++ b/scripts/policy/frameworks/intel/__load__.bro @@ -2,5 +2,6 @@ @load ./dns @load ./http-host-header @load ./http-url -@load ./user-agents -@load ./ssl \ No newline at end of file +@load ./http-user-agents +@load ./ssl +@load ./smtp \ No newline at end of file diff --git a/scripts/policy/frameworks/intel/user-agents.bro b/scripts/policy/frameworks/intel/http-user-agents.bro similarity index 58% rename from scripts/policy/frameworks/intel/user-agents.bro rename to scripts/policy/frameworks/intel/http-user-agents.bro index 693738a431..93445c1e43 100644 --- a/scripts/policy/frameworks/intel/user-agents.bro +++ b/scripts/policy/frameworks/intel/http-user-agents.bro @@ -10,11 +10,3 @@ event http_header(c: connection, is_orig: bool, name: string, value: string) $where=HTTP::IN_USER_AGENT_HEADER]); } -event mime_end_entity(c: connection) - { - if ( c?$smtp && c$smtp?$user_agent ) - Intel::seen([$str=c$smtp$user_agent, - $str_type=Intel::USER_AGENT, - $conn=c, - $where=SMTP::IN_HEADER]); - } diff --git a/scripts/policy/frameworks/intel/smtp.bro b/scripts/policy/frameworks/intel/smtp.bro new file mode 100644 index 0000000000..6f72a3da2a --- /dev/null +++ b/scripts/policy/frameworks/intel/smtp.bro @@ -0,0 +1,25 @@ +@load base/frameworks/intel +@load ./where-locations + +event mime_end_entity(c: connection) + { + if ( c?$smtp ) + { + if ( c$smtp?$path ) + { + local path = c$smtp$path; + for ( i in path ) + { + Intel::seen([$host=path[i], + $conn=c, + $where=SMTP::IN_RECEIVED_HEADER]); + } + } + + if ( c$smtp?$user_agent ) + Intel::seen([$str=c$smtp$user_agent, + $str_type=Intel::USER_AGENT, + $conn=c, + $where=SMTP::IN_HEADER]); + } + } diff --git a/scripts/policy/frameworks/intel/where-locations.bro b/scripts/policy/frameworks/intel/where-locations.bro index e6faec6809..360ac15131 100644 --- a/scripts/policy/frameworks/intel/where-locations.bro +++ b/scripts/policy/frameworks/intel/where-locations.bro @@ -14,6 +14,7 @@ export { SMTP::IN_FROM, SMTP::IN_TO, SMTP::IN_CC, + SMTP::IN_RECEIVED_HEADER, SSL::IN_SERVER_CERT, SSL::IN_CLIENT_CERT, SSL::IN_SERVER_NAME, From 38468f9daa0c6f41e7588a1e4c954d10fe46e6bd Mon Sep 17 00:00:00 2001 From: Seth Hall Date: Fri, 28 Sep 2012 22:43:36 -0400 Subject: [PATCH 12/22] Lots more intelligence checking in SMTP traffic. --- scripts/policy/frameworks/intel/smtp.bro | 47 ++++++++++++++++++- .../frameworks/intel/where-locations.bro | 3 +- 2 files changed, 48 insertions(+), 2 deletions(-) diff --git a/scripts/policy/frameworks/intel/smtp.bro b/scripts/policy/frameworks/intel/smtp.bro index 6f72a3da2a..f44f0575dc 100644 --- a/scripts/policy/frameworks/intel/smtp.bro +++ b/scripts/policy/frameworks/intel/smtp.bro @@ -15,11 +15,56 @@ event mime_end_entity(c: connection) $where=SMTP::IN_RECEIVED_HEADER]); } } - + if ( c$smtp?$user_agent ) Intel::seen([$str=c$smtp$user_agent, $str_type=Intel::USER_AGENT, $conn=c, $where=SMTP::IN_HEADER]); + + if ( c$smtp?$x_originating_ip ) + Intel::seen([$host=c$smtp$x_originating_ip, + $conn=c, + $where=SMTP::IN_X_ORIGINATING_IP_HEADER]); + + if ( c$smtp?$mailfrom ) + Intel::seen([$str=c$smtp$mailfrom, + $str_type=Intel::EMAIL, + $conn=c, + $where=SMTP::IN_MAIL_FROM]); + + if ( c$smtp?$rcptto ) + { + for ( rcptto in c$smtp$rcptto ) + { + Intel::seen([$str=rcptto, + $str_type=Intel::EMAIL, + $conn=c, + $where=SMTP::IN_RCPT_TO]); + } + } + + if ( c$smtp?$from ) + Intel::seen([$str=c$smtp$from, + $str_type=Intel::EMAIL, + $conn=c, + $where=SMTP::IN_FROM]); + + if ( c$smtp?$to ) + { + for ( email_to in c$smtp$to ) + { + Intel::seen([$str=email_to, + $str_type=Intel::EMAIL, + $conn=c, + $where=SMTP::IN_TO]); + } + } + + if ( c$smtp?$reply_to ) + Intel::seen([$str=c$smtp$reply_to, + $str_type=Intel::EMAIL, + $conn=c, + $where=SMTP::IN_REPLY_TO]); } } diff --git a/scripts/policy/frameworks/intel/where-locations.bro b/scripts/policy/frameworks/intel/where-locations.bro index 360ac15131..45be5892ce 100644 --- a/scripts/policy/frameworks/intel/where-locations.bro +++ b/scripts/policy/frameworks/intel/where-locations.bro @@ -13,8 +13,9 @@ export { SMTP::IN_RCPT_TO, SMTP::IN_FROM, SMTP::IN_TO, - SMTP::IN_CC, SMTP::IN_RECEIVED_HEADER, + SMTP::IN_REPLY_TO, + SMTP::IN_X_ORIGINATING_IP_HEADER, SSL::IN_SERVER_CERT, SSL::IN_CLIENT_CERT, SSL::IN_SERVER_NAME, From bf9651b323a347c4d0b878d965334d6fc4b46a5b Mon Sep 17 00:00:00 2001 From: Seth Hall Date: Wed, 3 Oct 2012 16:25:02 -0400 Subject: [PATCH 13/22] Reworked cluster intelligence data distribution mechanism and fixed tests. - Intel data distribution on clusters is now pushed in whole by the manager when a worker connects. Additions after that point are managed by the normal single-item distribution mechanism already built into the intelligence framework. - The manager maintains the complete "minimal" data store that the workers use to do their matching so that full "minimal" data distribution is very easy. - Tests are cleaned up and work. --- scripts/base/frameworks/intel/cluster.bro | 60 +++++-------------- scripts/base/frameworks/intel/input.bro | 1 + scripts/base/frameworks/intel/main.bro | 53 ++++++++++++---- .../manager-1..stdout | 2 - .../manager-1.intel.log | 10 ---- .../worker-1..stdout | 3 - .../worker-2..stdout | 4 -- .../out | 3 - .../out | 1 - .../out | 3 - .../out | 3 - .../out | 3 - .../frameworks/intel/cluster-transparency.bro | 2 +- .../base/frameworks/intel/input-and-match.bro | 52 ++++++++-------- .../base/frameworks/intel/item-merge.bro | 23 ------- .../base/frameworks/intel/matching.bro | 38 ------------ 16 files changed, 84 insertions(+), 177 deletions(-) delete mode 100644 testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/manager-1..stdout delete mode 100644 testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/manager-1.intel.log delete mode 100644 testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/worker-1..stdout delete mode 100644 testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/worker-2..stdout delete mode 100644 testing/btest/Baseline/scripts.base.frameworks.intel.dns-zone-plugin/out delete mode 100644 testing/btest/Baseline/scripts.base.frameworks.intel.input-and-match/out delete mode 100644 testing/btest/Baseline/scripts.base.frameworks.intel.insert-and-matcher/out delete mode 100644 testing/btest/Baseline/scripts.base.frameworks.intel.item-merge/out delete mode 100644 testing/btest/Baseline/scripts.base.frameworks.intel.matching/out delete mode 100644 testing/btest/scripts/base/frameworks/intel/item-merge.bro delete mode 100644 testing/btest/scripts/base/frameworks/intel/matching.bro diff --git a/scripts/base/frameworks/intel/cluster.bro b/scripts/base/frameworks/intel/cluster.bro index de6e0dc6be..c210598b68 100644 --- a/scripts/base/frameworks/intel/cluster.bro +++ b/scripts/base/frameworks/intel/cluster.bro @@ -6,29 +6,23 @@ module Intel; +redef record Item += { + ## This field is used internally for cluster transparency to avoid + ## re-dispatching intelligence items over and over from workers. + first_dispatch: bool &default=T; +}; + # If this process is not a manager process, we don't want the full metadata @if ( Cluster::local_node_type() != Cluster::MANAGER ) redef have_full_data = F; @endif global cluster_new_item: event(item: Item); -global cluster_updated_item: event(item: Item); - -redef record Item += { - ## This field is solely used internally for cluster transparency with - ## the intelligence framework to avoid storms of intelligence data - ## swirling forever. It allows data to propagate only a single time. - first_dispatch: bool &default=T; -}; # Primary intelligence distribution comes from manager. -redef Cluster::manager2worker_events += /^Intel::cluster_.*$/; +redef Cluster::manager2worker_events += /^Intel::(cluster_new_item)$/; # If a worker finds intelligence and adds it, it should share it back to the manager. -redef Cluster::worker2manager_events += /^Intel::(cluster_.*|match_no_items)$/; - -@if ( Cluster::local_node_type() != Cluster::MANAGER ) -redef Intel::data_store &synchronized; -@endif +redef Cluster::worker2manager_events += /^Intel::(cluster_new_item|match_no_items)$/; @if ( Cluster::local_node_type() == Cluster::MANAGER ) event Intel::match_no_items(s: Seen) &priority=5 @@ -36,19 +30,13 @@ event Intel::match_no_items(s: Seen) &priority=5 event Intel::match(s, Intel::get_items(s)); } -global initial_sync = F; event remote_connection_handshake_done(p: event_peer) { - # Insert the data once something is connected. - # This should only push the data to a single host where the - # normal Bro synchronization should take over. - if ( ! initial_sync ) + # When a worker connects, send it the complete minimal data store. + # It will be kept up to date after this by the cluster_new_item event. + if ( Cluster::nodes[p$descr]$node_type == Cluster::WORKER ) { - initial_sync = T; - for ( net in data_store$net_data ) - event Intel::cluster_new_item([$net=net, $meta=[$source=""]]); - for ( [str, str_type] in data_store$string_data ) - event Intel::cluster_new_item([$str=str, $str_type=str_type, $meta=[$source=""]]); + send_id(p, "min_data_store"); } } @endif @@ -60,34 +48,14 @@ event Intel::cluster_new_item(item: Intel::Item) &priority=5 Intel::insert(item); } -event Intel::cluster_updated_item(item: Intel::Item) &priority=5 - { - # Ignore locally generated events to avoid event storms. - if ( is_remote_event() ) - Intel::insert(item); - } - event Intel::new_item(item: Intel::Item) &priority=5 { # The cluster manager always rebroadcasts intelligence. - # Workers redistribute it if it was locally generated on - # the worker. + # Workers redistribute it if it was locally generated. if ( Cluster::local_node_type() == Cluster::MANAGER || item$first_dispatch ) { - item$first_dispatch = F; + item$first_dispatch=F; event Intel::cluster_new_item(item); } } - -event Intel::updated_item(item: Intel::Item) &priority=5 - { - # If this is the first time this item has been dispatched or this - # is a manager, send it over the cluster. - if ( Cluster::local_node_type() == Cluster::MANAGER || - item$first_dispatch ) - { - item$first_dispatch = F; - event Intel::cluster_updated_item(item); - } - } diff --git a/scripts/base/frameworks/intel/input.bro b/scripts/base/frameworks/intel/input.bro index fd2c0bae97..a43a76f793 100644 --- a/scripts/base/frameworks/intel/input.bro +++ b/scripts/base/frameworks/intel/input.bro @@ -26,3 +26,4 @@ event bro_init() &priority=5 $ev=Intel::read_entry]); } } + diff --git a/scripts/base/frameworks/intel/main.bro b/scripts/base/frameworks/intel/main.bro index 9cde7458de..cefd82936b 100644 --- a/scripts/base/frameworks/intel/main.bro +++ b/scripts/base/frameworks/intel/main.bro @@ -129,6 +129,16 @@ type DataStore: record { }; global data_store: DataStore &redef; +# The inmemory data structure for holding the barest matchable intelligence. +# This is primarily for workers to do the initial quick matches and store +# a minimal amount of data for the full match to happen on the manager. +type MinDataStore: record { + net_data: set[subnet]; + string_data: set[string, StrType]; +}; +global min_data_store: MinDataStore &redef; + + event bro_init() &priority=5 { Log::create_stream(LOG, [$columns=Info, $ev=log_intel]); @@ -137,12 +147,14 @@ event bro_init() &priority=5 function find(s: Seen): bool { if ( s?$host && - s$host in data_store$net_data ) + ((have_full_data && s$host in data_store$net_data) || + (s$host in min_data_store$net_data))) { return T; } else if ( s?$str && s?$str_type && - [s$str, s$str_type] in data_store$string_data ) + ((have_full_data && [s$str, s$str_type] in data_store$string_data) || + ([s$str, s$str_type] in min_data_store$string_data))) { return T; } @@ -232,7 +244,7 @@ function has_meta(check: MetaData, metas: set[MetaData]): bool return F; } -event Intel::match(s: Seen, items: set[Item]) +event Intel::match(s: Seen, items: set[Item]) &priority=5 { local empty_set: set[string] = set(); local info: Info = [$ts=network_time(), $seen=s, $sources=empty_set]; @@ -264,24 +276,39 @@ function insert(item: Item) if ( item?$host ) { local host = mask_addr(item$host, is_v4_addr(item$host) ? 32 : 128); - if ( host !in data_store$net_data ) - data_store$net_data[host] = set(); - - metas = data_store$net_data[host]; + if ( have_full_data ) + { + if ( host !in data_store$net_data ) + data_store$net_data[host] = set(); + + metas = data_store$net_data[host]; + } + + add min_data_store$net_data[host]; } else if ( item?$net ) { - if ( item$net !in data_store$net_data ) - data_store$net_data[item$net] = set(); + if ( have_full_data ) + { + if ( item$net !in data_store$net_data ) + data_store$net_data[item$net] = set(); - metas = data_store$net_data[item$net]; + metas = data_store$net_data[item$net]; + } + + add min_data_store$net_data[item$net]; } else if ( item?$str ) { - if ( [item$str, item$str_type] !in data_store$string_data ) - data_store$string_data[item$str, item$str_type] = set(); + if ( have_full_data ) + { + if ( [item$str, item$str_type] !in data_store$string_data ) + data_store$string_data[item$str, item$str_type] = set(); - metas = data_store$string_data[item$str, item$str_type]; + metas = data_store$string_data[item$str, item$str_type]; + } + + add min_data_store$string_data[item$str, item$str_type]; } local updated = F; diff --git a/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/manager-1..stdout b/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/manager-1..stdout deleted file mode 100644 index 9e60d6faf2..0000000000 --- a/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/manager-1..stdout +++ /dev/null @@ -1,2 +0,0 @@ -cluster_new_item: 123.123.123.123 from source worker-1 (from peer: worker-1) -cluster_new_item: 4.3.2.1 from source worker-2 (from peer: worker-2) diff --git a/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/manager-1.intel.log b/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/manager-1.intel.log deleted file mode 100644 index 1af88a5929..0000000000 --- a/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/manager-1.intel.log +++ /dev/null @@ -1,10 +0,0 @@ -#separator \x09 -#set_separator , -#empty_field (empty) -#unset_field - -#path intel -#open 2012-09-28-18-50-43 -#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p seen.host seen.str seen.str_type seen.where -#types time string addr port addr port addr string enum enum -1348858243.346443 - - - - - 123.123.123.123 - - Intel::IN_ANYWHERE -#close 2012-09-28-18-50-53 diff --git a/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/worker-1..stdout b/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/worker-1..stdout deleted file mode 100644 index 02fdcb8cd3..0000000000 --- a/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/worker-1..stdout +++ /dev/null @@ -1,3 +0,0 @@ -cluster_new_item: 1.2.3.4 from source manager (from peer: manager-1) -cluster_new_item: 123.123.123.123 from source worker-1 (from peer: manager-1) -cluster_new_item: 4.3.2.1 from source worker-2 (from peer: manager-1) diff --git a/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/worker-2..stdout b/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/worker-2..stdout deleted file mode 100644 index 168b65a52d..0000000000 --- a/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/worker-2..stdout +++ /dev/null @@ -1,4 +0,0 @@ -cluster_new_item: 1.2.3.4 from source manager (from peer: manager-1) -cluster_new_item: 123.123.123.123 from source worker-1 (from peer: manager-1) -cluster_new_item: 4.3.2.1 from source worker-2 (from peer: manager-1) -Doing a lookup diff --git a/testing/btest/Baseline/scripts.base.frameworks.intel.dns-zone-plugin/out b/testing/btest/Baseline/scripts.base.frameworks.intel.dns-zone-plugin/out deleted file mode 100644 index 1eb51e2701..0000000000 --- a/testing/btest/Baseline/scripts.base.frameworks.intel.dns-zone-plugin/out +++ /dev/null @@ -1,3 +0,0 @@ -It matched! -bad.com -Intel::DNS_ZONE diff --git a/testing/btest/Baseline/scripts.base.frameworks.intel.input-and-match/out b/testing/btest/Baseline/scripts.base.frameworks.intel.input-and-match/out deleted file mode 100644 index f3e4cf8e60..0000000000 --- a/testing/btest/Baseline/scripts.base.frameworks.intel.input-and-match/out +++ /dev/null @@ -1 +0,0 @@ -Matched it! diff --git a/testing/btest/Baseline/scripts.base.frameworks.intel.insert-and-matcher/out b/testing/btest/Baseline/scripts.base.frameworks.intel.insert-and-matcher/out deleted file mode 100644 index 71fec4e23c..0000000000 --- a/testing/btest/Baseline/scripts.base.frameworks.intel.insert-and-matcher/out +++ /dev/null @@ -1,3 +0,0 @@ -VALID -VALID -VALID diff --git a/testing/btest/Baseline/scripts.base.frameworks.intel.item-merge/out b/testing/btest/Baseline/scripts.base.frameworks.intel.item-merge/out deleted file mode 100644 index c3220cd40c..0000000000 --- a/testing/btest/Baseline/scripts.base.frameworks.intel.item-merge/out +++ /dev/null @@ -1,3 +0,0 @@ -Number of matching intel items: 2 (should be 2) -Number of matching intel items: 2 (should still be 2) -Number of matching intel items: 3 (should be 3) diff --git a/testing/btest/Baseline/scripts.base.frameworks.intel.matching/out b/testing/btest/Baseline/scripts.base.frameworks.intel.matching/out deleted file mode 100644 index 71fec4e23c..0000000000 --- a/testing/btest/Baseline/scripts.base.frameworks.intel.matching/out +++ /dev/null @@ -1,3 +0,0 @@ -VALID -VALID -VALID diff --git a/testing/btest/scripts/base/frameworks/intel/cluster-transparency.bro b/testing/btest/scripts/base/frameworks/intel/cluster-transparency.bro index f50462746b..3810de5d4b 100644 --- a/testing/btest/scripts/base/frameworks/intel/cluster-transparency.bro +++ b/testing/btest/scripts/base/frameworks/intel/cluster-transparency.bro @@ -39,7 +39,7 @@ event Intel::cluster_new_item(item: Intel::Item) if ( ! is_remote_event() ) return; - print fmt("cluster_new_item: %s from source %s (from peer: %s)", item$host, item$meta$source, get_event_peer()$descr); + print fmt("cluster_new_item: %s inserted by %s (from peer: %s)", item$host, item$meta$source, get_event_peer()$descr); if ( ! sent_data ) { diff --git a/testing/btest/scripts/base/frameworks/intel/input-and-match.bro b/testing/btest/scripts/base/frameworks/intel/input-and-match.bro index 213520442a..f77f5c0f1d 100644 --- a/testing/btest/scripts/base/frameworks/intel/input-and-match.bro +++ b/testing/btest/scripts/base/frameworks/intel/input-and-match.bro @@ -1,36 +1,40 @@ -# @TEST-EXEC: bro %INPUT >out -# @TEST-EXEC: btest-diff out +# @TEST-SERIALIZE: comm + +# @TEST-EXEC: btest-bg-run broproc bro %INPUT +# @TEST-EXEC: btest-bg-wait -k 5 +# @TEST-EXEC: btest-diff broproc/intel.log @TEST-START-FILE intel.dat -#fields ip net str subtype meta.source meta.class meta.desc meta.url meta.tags -1.2.3.4 - - - source1 Intel::MALICIOUS this host is just plain baaad http://some-data-distributor.com/1234 foo,bar -1.2.3.4 - - - source1 Intel::MALICIOUS this host is just plain baaad http://some-data-distributor.com/1234 foo,bar -- - e@mail.com Intel::EMAIL source1 Intel::MALICIOUS Phishing email source http://some-data-distributor.com/100000 - +#fields host net str str_type meta.source meta.desc meta.url +1.2.3.4 - - - source1 this host is just plain baaad http://some-data-distributor.com/1234 +1.2.3.4 - - - source1 this host is just plain baaad http://some-data-distributor.com/1234 +- - e@mail.com Intel::EMAIL source1 Phishing email source http://some-data-distributor.com/100000 @TEST-END-FILE @load frameworks/communication/listen -redef Intel::read_files += { "intel.dat" }; +redef Intel::read_files += { "../intel.dat" }; +redef enum Intel::Where += { SOMEWHERE }; -event do_it(allowed_loops: count) +event do_it() { - if ( Intel::matcher([$str="e@mail.com", $subtype=Intel::EMAIL, $class=Intel::MALICIOUS]) && - Intel::matcher([$ip=1.2.3.4, $class=Intel::MALICIOUS]) ) - { - # Once the match happens a single time we print and shutdown. - print "Matched it!"; - terminate_communication(); - return; - } - - if ( allowed_loops > 0 ) - schedule 100msecs { do_it(allowed_loops-1) }; - else - terminate_communication(); + Intel::seen([$str="e@mail.com", + $str_type=Intel::EMAIL, + $where=SOMEWHERE]); + + Intel::seen([$host=1.2.3.4, + $where=SOMEWHERE]); } - -event bro_init() +global log_lines = 0; +event Intel::log_intel(rec: Intel::Info) { - event do_it(20); + ++log_lines; + if ( log_lines == 2 ) + terminate(); + } + +event bro_init() &priority=-10 + { + schedule 1sec { do_it() }; } diff --git a/testing/btest/scripts/base/frameworks/intel/item-merge.bro b/testing/btest/scripts/base/frameworks/intel/item-merge.bro deleted file mode 100644 index cf59b638de..0000000000 --- a/testing/btest/scripts/base/frameworks/intel/item-merge.bro +++ /dev/null @@ -1,23 +0,0 @@ -# @TEST-EXEC: bro %INPUT >out -# @TEST-EXEC: btest-diff out - -event bro_init() - { - Intel::insert([$ip=1.2.3.4, $meta=[$source="source1-feed1", $class=Intel::MALICIOUS, $tags=set("foo")]]); - Intel::insert([$ip=1.2.3.4, $meta=[$source="source2-special-sauce", $class=Intel::MALICIOUS, $tags=set("foo","bar")]]); - - # Lookup should return the items matching the query. - local items = Intel::lookup([$ip=1.2.3.4]); - print fmt("Number of matching intel items: %d (should be 2)", |items|); - - # This can be considered an update of a previous value since the - # data, source, and class are the matching points for determining sameness. - Intel::insert([$ip=1.2.3.4, $meta=[$source="source2-special-sauce", $class=Intel::MALICIOUS, $tags=set("foobar", "testing")]]); - items = Intel::lookup([$ip=1.2.3.4]); - print fmt("Number of matching intel items: %d (should still be 2)", |items|); - - # This is a new value. - Intel::insert([$ip=1.2.3.4, $meta=[$source="source3", $class=Intel::MALICIOUS]]); - items = Intel::lookup([$ip=1.2.3.4]); - print fmt("Number of matching intel items: %d (should be 3)", |items|); - } diff --git a/testing/btest/scripts/base/frameworks/intel/matching.bro b/testing/btest/scripts/base/frameworks/intel/matching.bro deleted file mode 100644 index 79bf599c96..0000000000 --- a/testing/btest/scripts/base/frameworks/intel/matching.bro +++ /dev/null @@ -1,38 +0,0 @@ -# -# @TEST-EXEC: bro %INPUT >out -# @TEST-EXEC: btest-diff out - -event bro_init() - { - Intel::insert([$ip=1.2.3.4, $meta=[$source="zeus-tracker", $class=Intel::MALICIOUS, $tags=set("example-tag1", "example-tag2")]]); - Intel::insert([$str="http://www.google.com/", $subtype=Intel::URL, $meta=[$source="source2", $class=Intel::MALICIOUS, $tags=set("infrastructure", "google")]]); - } - -event bro_done() - { - local orig_h = 1.2.3.4; - - if ( Intel::matcher([$ip=orig_h, $and_tags=set("example-tag1", "example-tag2")]) ) - print "VALID"; - - if ( Intel::matcher([$ip=orig_h, $and_tags=set("don't match")]) ) - print "INVALID"; - - if ( Intel::matcher([$ip=orig_h, $pred=function(meta: Intel::Item): bool { return T; } ]) ) - print "VALID"; - - if ( Intel::matcher([$ip=4.3.2.1, $pred=function(meta: Intel::Item): bool { return T; } ]) ) - print "INVALID"; - - if ( Intel::matcher([$ip=orig_h, $pred=function(meta: Intel::Item): bool { return F; } ]) ) - print "INVALID"; - - if ( Intel::matcher([$str="http://www.google.com/", $subtype=Intel::URL, $and_tags=set("google")]) ) - print "VALID"; - - if ( Intel::matcher([$str="http://www.google.com/", $subtype=Intel::URL, $and_tags=set("woah")]) ) - print "INVALID"; - - if ( Intel::matcher([$str="http://www.example.com", $subtype=Intel::URL]) ) - print "INVALID"; - } From 73f2fd8e3a677007c4f6c4747337befd8ccb89b6 Mon Sep 17 00:00:00 2001 From: Seth Hall Date: Wed, 3 Oct 2012 16:25:18 -0400 Subject: [PATCH 14/22] Updating some intel framework test baselines. --- .../manager-1..stdout | 2 ++ .../manager-1.intel.log | 10 ++++++++++ .../worker-1..stdout | 3 +++ .../worker-2..stdout | 4 ++++ .../broproc.intel.log | 11 +++++++++++ 5 files changed, 30 insertions(+) create mode 100644 testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/manager-1..stdout create mode 100644 testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/manager-1.intel.log create mode 100644 testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/worker-1..stdout create mode 100644 testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/worker-2..stdout create mode 100644 testing/btest/Baseline/scripts.base.frameworks.intel.input-and-match/broproc.intel.log diff --git a/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/manager-1..stdout b/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/manager-1..stdout new file mode 100644 index 0000000000..c57cda176e --- /dev/null +++ b/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/manager-1..stdout @@ -0,0 +1,2 @@ +cluster_new_item: 123.123.123.123 inserted by worker-1 (from peer: worker-1) +cluster_new_item: 4.3.2.1 inserted by worker-2 (from peer: worker-2) diff --git a/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/manager-1.intel.log b/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/manager-1.intel.log new file mode 100644 index 0000000000..26efc039c4 --- /dev/null +++ b/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/manager-1.intel.log @@ -0,0 +1,10 @@ +#separator \x09 +#set_separator , +#empty_field (empty) +#unset_field - +#path intel +#open 2012-10-03-20-20-39 +#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p seen.host seen.str seen.str_type seen.where sources +#types time string addr port addr port addr string enum enum table[string] +1349295639.424940 - - - - - 123.123.123.123 - - Intel::IN_ANYWHERE worker-1 +#close 2012-10-03-20-20-49 diff --git a/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/worker-1..stdout b/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/worker-1..stdout new file mode 100644 index 0000000000..3be0ae6f70 --- /dev/null +++ b/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/worker-1..stdout @@ -0,0 +1,3 @@ +cluster_new_item: 1.2.3.4 inserted by manager (from peer: manager-1) +cluster_new_item: 123.123.123.123 inserted by worker-1 (from peer: manager-1) +cluster_new_item: 4.3.2.1 inserted by worker-2 (from peer: manager-1) diff --git a/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/worker-2..stdout b/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/worker-2..stdout new file mode 100644 index 0000000000..df950e68c4 --- /dev/null +++ b/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/worker-2..stdout @@ -0,0 +1,4 @@ +cluster_new_item: 1.2.3.4 inserted by manager (from peer: manager-1) +cluster_new_item: 123.123.123.123 inserted by worker-1 (from peer: manager-1) +cluster_new_item: 4.3.2.1 inserted by worker-2 (from peer: manager-1) +Doing a lookup diff --git a/testing/btest/Baseline/scripts.base.frameworks.intel.input-and-match/broproc.intel.log b/testing/btest/Baseline/scripts.base.frameworks.intel.input-and-match/broproc.intel.log new file mode 100644 index 0000000000..d72e9efed3 --- /dev/null +++ b/testing/btest/Baseline/scripts.base.frameworks.intel.input-and-match/broproc.intel.log @@ -0,0 +1,11 @@ +#separator \x09 +#set_separator , +#empty_field (empty) +#unset_field - +#path intel +#open 2012-10-03-20-18-05 +#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p seen.host seen.str seen.str_type seen.where sources +#types time string addr port addr port addr string enum enum table[string] +1349295485.114156 - - - - - - e@mail.com Intel::EMAIL SOMEWHERE source1 +1349295485.114156 - - - - - 1.2.3.4 - - SOMEWHERE source1 +#close 2012-10-03-20-18-05 From 6538f70e2cd766b89e7445b02ea148f8a63cdc8d Mon Sep 17 00:00:00 2001 From: Seth Hall Date: Wed, 10 Oct 2012 11:13:16 -0400 Subject: [PATCH 15/22] Fixed an issue with cluster data distribution. --- scripts/base/frameworks/intel/cluster.bro | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/base/frameworks/intel/cluster.bro b/scripts/base/frameworks/intel/cluster.bro index c210598b68..8ed17109c1 100644 --- a/scripts/base/frameworks/intel/cluster.bro +++ b/scripts/base/frameworks/intel/cluster.bro @@ -36,7 +36,7 @@ event remote_connection_handshake_done(p: event_peer) # It will be kept up to date after this by the cluster_new_item event. if ( Cluster::nodes[p$descr]$node_type == Cluster::WORKER ) { - send_id(p, "min_data_store"); + send_id(p, "Intel::min_data_store"); } } @endif From 21473b05578d463994ebdd7cf841935387db1a22 Mon Sep 17 00:00:00 2001 From: Seth Hall Date: Wed, 10 Oct 2012 11:15:34 -0400 Subject: [PATCH 16/22] Initial API for Intel framework is complete. - More inline docs added. - Removing some debugging code. - New test for the intel framework data distribution mechanism. --- scripts/base/frameworks/intel/main.bro | 100 +++++++++--------- .../manager-1..stdout | 0 .../manager-1.intel.log | 13 +++ .../worker-1..stdout | 0 .../worker-2..stdout | 0 .../intel/read-file-dist-cluster.bro | 66 ++++++++++++ 6 files changed, 131 insertions(+), 48 deletions(-) create mode 100644 testing/btest/Baseline/scripts.base.frameworks.intel.read-file-dist-cluster/manager-1..stdout create mode 100644 testing/btest/Baseline/scripts.base.frameworks.intel.read-file-dist-cluster/manager-1.intel.log create mode 100644 testing/btest/Baseline/scripts.base.frameworks.intel.read-file-dist-cluster/worker-1..stdout create mode 100644 testing/btest/Baseline/scripts.base.frameworks.intel.read-file-dist-cluster/worker-2..stdout create mode 100644 testing/btest/scripts/base/frameworks/intel/read-file-dist-cluster.bro diff --git a/scripts/base/frameworks/intel/main.bro b/scripts/base/frameworks/intel/main.bro index cefd82936b..d66990e611 100644 --- a/scripts/base/frameworks/intel/main.bro +++ b/scripts/base/frameworks/intel/main.bro @@ -1,6 +1,6 @@ ##! The intelligence framework provides a way to store and query IP addresses, ##! and strings (with a str_type). Metadata can -##! also be associated with the intelligence like for making more informated +##! also be associated with the intelligence like for making more informed ##! decisions about matching and handling of intelligence. @load base/frameworks/notice @@ -10,11 +10,6 @@ module Intel; export { redef enum Log::ID += { LOG }; - redef enum Notice::Type += { - ## Notice type to indicate an intelligence hit. - Detection, - }; - ## String data needs to be further categoried since it could represent ## and number of types of data. type StrType: enum { @@ -24,14 +19,14 @@ export { USER_AGENT, ## Email address. EMAIL, - ## DNS domain name (DNS Zones are implemented in an intelligence plugin). + ## DNS domain name. DOMAIN, ## A user name. USER_NAME, - ## File hash which is non hash type specific. It's up to the user to query + ## File hash which is non-hash type specific. It's up to the user to query ## for any relevant hash types. FILE_HASH, - ## Certificate hash. Normally for X.509 certificates from the SSL analyzer. + ## Certificate SHA-1 hash. CERT_HASH, }; @@ -47,45 +42,65 @@ export { url: string &optional; }; + ## Represents a piece of intelligence. type Item: record { + ## The IP address if the intelligence is about an IP address. host: addr &optional; + ## The network if the intelligence is about a CIDR block. net: subnet &optional; + ## The string if the intelligence is about a string. str: string &optional; + ## The type of data that is in the string if the $str field is set. str_type: StrType &optional; + ## Metadata for the item. Typically represents more deeply \ + ## descriptive data for a piece of intelligence. meta: MetaData; }; ## Enum to represent where data came from when it was discovered. + ## The convenction is to prefix the name with "IN_". type Where: enum { ## A catchall value to represent data of unknown provenance. IN_ANYWHERE, }; + ## The $host field and combination of $str and $str_type fields are mutually + ## exclusive. These records *must* represent either an IP address being + ## seen or a string being seen. type Seen: record { - host: addr &optional &log; - str: string &optional &log; - str_type: StrType &optional &log; + ## The IP address if the data seen is an IP address. + host: addr &log &optional; + ## The string if the data is about a string. + str: string &log &optional; + ## The type of data that is in the string if the $str field is set. + str_type: StrType &log &optional; + ## Where the data was discovered. where: Where &log; + ## If the data was discovered within a connection, the + ## connection record should go into get to give context to the data. conn: connection &optional; }; + ## Record used for the logging framework representing a positive + ## hit within the intelligence framework. type Info: record { - ts: time &log; + ## Timestamp when the data was discovered. + ts: time &log; - uid: string &log &optional; - id: conn_id &log &optional; + ## If a connection was associated with this intelligence hit, + ## this is the uid for the connection + uid: string &log &optional; + ## If a connection was associated with this intelligence hit, + ## this is the conn_id for the connection. + id: conn_id &log &optional; - seen: Seen &log; - sources: set[string] &log; - }; - - type PolicyItem: record { - pred: function(s: Seen, item: Item): bool &optional; - - log_it: bool &default=T; + ## Where the data was seen. + seen: Seen &log; + ## Sources which supplied data that resulted in this match. + sources: set[string] &log; }; ## Intelligence data manipulation functions. @@ -95,29 +110,25 @@ export { ## it against known intelligence for matches. global seen: function(s: Seen); - ## Intelligence policy variable for handling matches. - const policy: set[PolicyItem] = { - # [$pred(s: Seen) = { return T; }, - # $action=Intel::ACTION_LOG] - } &redef; - - ## API Events that indicate when various things happen internally within the - ## intelligence framework. - global new_item: event(item: Item); - global updated_item: event(item: Item); + ## Event to represent a match in the intelligence data from data that was seen. + ## On clusters there is no assurance as to where this event will be generated + ## so do not assume that arbitrary global state beyond the given data + ## will be available. + ## + ## This is the primary mechanism where a user will take actions based on data + ## within the intelligence framework. + global match: event(s: Seen, items: set[Item]); global log_intel: event(rec: Info); } -# Event to represent a match happening in a connection. On clusters there -# is no assurance as to where this event will be generated so don't -# assume that arbitrary global state beyond the given data -# will be available. -global match: event(s: Seen, items: set[Item]); - -# Internal handler for conn oriented matches with no metadata based on the have_full_data setting. +# Internal handler for matches with no metadata available. global match_no_items: event(s: Seen); +# Internal events for cluster data distribution +global new_item: event(item: Item); +global updated_item: event(item: Item); + # Optionally store metadata. This is used internally depending on # if this is a cluster deployment or not. const have_full_data = T &redef; @@ -129,7 +140,7 @@ type DataStore: record { }; global data_store: DataStore &redef; -# The inmemory data structure for holding the barest matchable intelligence. +# The in memory data structure for holding the barest matchable intelligence. # This is primarily for workers to do the initial quick matches and store # a minimal amount of data for the full match to happen on the manager. type MinDataStore: record { @@ -207,15 +218,8 @@ function get_items(s: Seen): set[Item] return return_data; } -#global total_seen=0; -#event bro_done() -# { -# print fmt("total seen: %d", total_seen); -# } - function Intel::seen(s: Seen) { - #++total_seen; if ( find(s) ) { if ( have_full_data ) diff --git a/testing/btest/Baseline/scripts.base.frameworks.intel.read-file-dist-cluster/manager-1..stdout b/testing/btest/Baseline/scripts.base.frameworks.intel.read-file-dist-cluster/manager-1..stdout new file mode 100644 index 0000000000..e69de29bb2 diff --git a/testing/btest/Baseline/scripts.base.frameworks.intel.read-file-dist-cluster/manager-1.intel.log b/testing/btest/Baseline/scripts.base.frameworks.intel.read-file-dist-cluster/manager-1.intel.log new file mode 100644 index 0000000000..8069bad528 --- /dev/null +++ b/testing/btest/Baseline/scripts.base.frameworks.intel.read-file-dist-cluster/manager-1.intel.log @@ -0,0 +1,13 @@ +#separator \x09 +#set_separator , +#empty_field (empty) +#unset_field - +#path intel +#open 2012-10-10-15-05-23 +#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p seen.host seen.str seen.str_type seen.where sources +#types time string addr port addr port addr string enum enum table[string] +1349881523.548946 - - - - - 1.2.3.4 - - Intel::IN_A_TEST source1 +1349881523.548946 - - - - - - e@mail.com Intel::EMAIL Intel::IN_A_TEST source1 +1349881524.567896 - - - - - 1.2.3.4 - - Intel::IN_A_TEST source1 +1349881524.567896 - - - - - - e@mail.com Intel::EMAIL Intel::IN_A_TEST source1 +#close 2012-10-10-15-05-24 diff --git a/testing/btest/Baseline/scripts.base.frameworks.intel.read-file-dist-cluster/worker-1..stdout b/testing/btest/Baseline/scripts.base.frameworks.intel.read-file-dist-cluster/worker-1..stdout new file mode 100644 index 0000000000..e69de29bb2 diff --git a/testing/btest/Baseline/scripts.base.frameworks.intel.read-file-dist-cluster/worker-2..stdout b/testing/btest/Baseline/scripts.base.frameworks.intel.read-file-dist-cluster/worker-2..stdout new file mode 100644 index 0000000000..e69de29bb2 diff --git a/testing/btest/scripts/base/frameworks/intel/read-file-dist-cluster.bro b/testing/btest/scripts/base/frameworks/intel/read-file-dist-cluster.bro new file mode 100644 index 0000000000..6838736249 --- /dev/null +++ b/testing/btest/scripts/base/frameworks/intel/read-file-dist-cluster.bro @@ -0,0 +1,66 @@ +# @TEST-SERIALIZE: comm +# +# @TEST-EXEC: btest-bg-run manager-1 BROPATH=$BROPATH:.. CLUSTER_NODE=manager-1 bro %INPUT +# @TEST-EXEC: sleep 2 +# @TEST-EXEC: btest-bg-run worker-1 BROPATH=$BROPATH:.. CLUSTER_NODE=worker-1 bro %INPUT +# @TEST-EXEC: btest-bg-run worker-2 BROPATH=$BROPATH:.. CLUSTER_NODE=worker-2 bro %INPUT +# @TEST-EXEC: btest-bg-wait -k 10 +# @TEST-EXEC: btest-diff manager-1/.stdout +# @TEST-EXEC: btest-diff manager-1/intel.log +# @TEST-EXEC: btest-diff worker-1/.stdout +# @TEST-EXEC: btest-diff worker-2/.stdout + +@TEST-START-FILE cluster-layout.bro +redef Cluster::nodes = { + ["manager-1"] = [$node_type=Cluster::MANAGER, $ip=127.0.0.1, $p=37757/tcp, $workers=set("worker-1", "worker-2")], + ["worker-1"] = [$node_type=Cluster::WORKER, $ip=127.0.0.1, $p=37760/tcp, $manager="manager-1"], + ["worker-2"] = [$node_type=Cluster::WORKER, $ip=127.0.0.1, $p=37761/tcp, $manager="manager-1"], +}; +@TEST-END-FILE + +@TEST-START-FILE intel.dat +#fields host net str str_type meta.source meta.desc meta.url +1.2.3.4 - - - source1 this host is just plain baaad http://some-data-distributor.com/1234 +1.2.3.4 - - - source1 this host is just plain baaad http://some-data-distributor.com/1234 +- - e@mail.com Intel::EMAIL source1 Phishing email source http://some-data-distributor.com/100000 +@TEST-END-FILE + +@load base/frameworks/control +redef Log::default_rotation_interval=0sec; + +module Intel; + +@if ( Cluster::local_node_type() == Cluster::MANAGER ) +redef Intel::read_files += { "../intel.dat" }; +@endif + +redef enum Intel::Where += { + Intel::IN_A_TEST, +}; + +event do_it() + { + Intel::seen([$host=1.2.3.4, $where=Intel::IN_A_TEST]); + Intel::seen([$str="e@mail.com", $str_type=Intel::EMAIL, $where=Intel::IN_A_TEST]); + } + +event bro_init() + { + # Delay the workers searching for hits briefly to allow for the data distribution + # mechanism to distribute the data to the workers. + if ( Cluster::local_node_type() == Cluster::WORKER ) + schedule 2sec { do_it() }; + } + +global intel_hits=0; +event Intel::log_intel(rec: Intel::Info) + { + ++intel_hits; + # There should be 4 hits since each worker is "seeing" 2 things. + if ( intel_hits == 4 ) + { + # We're delaying shutdown for a second here to make sure that no other + # matches happen (which would be wrong!). + schedule 1sec { Control::shutdown_request() }; + } + } From 49a28b0bf74a0c72ea6051caf80ef762ccdac662 Mon Sep 17 00:00:00 2001 From: Seth Hall Date: Wed, 10 Oct 2012 11:16:11 -0400 Subject: [PATCH 17/22] Initial support for Bro's Intel framework with the Collective Intelligence Framework. --- scripts/policy/integration/collective-intel/README | 6 ++++++ .../policy/integration/collective-intel/__load__.bro | 1 + scripts/policy/integration/collective-intel/main.bro | 12 ++++++++++++ 3 files changed, 19 insertions(+) create mode 100644 scripts/policy/integration/collective-intel/README create mode 100644 scripts/policy/integration/collective-intel/__load__.bro create mode 100644 scripts/policy/integration/collective-intel/main.bro diff --git a/scripts/policy/integration/collective-intel/README b/scripts/policy/integration/collective-intel/README new file mode 100644 index 0000000000..550eb96962 --- /dev/null +++ b/scripts/policy/integration/collective-intel/README @@ -0,0 +1,6 @@ +Collective Intelligence Framework Integration +============================================= + +The scripts in this module are for deeper integration with the Collective Intelligence +Framework (CIF) since Bro's Intel framework doesn't natively behave the same as CIF nor +does it store and maintain the same data in all cases. \ No newline at end of file diff --git a/scripts/policy/integration/collective-intel/__load__.bro b/scripts/policy/integration/collective-intel/__load__.bro new file mode 100644 index 0000000000..d551be57d3 --- /dev/null +++ b/scripts/policy/integration/collective-intel/__load__.bro @@ -0,0 +1 @@ +@load ./main \ No newline at end of file diff --git a/scripts/policy/integration/collective-intel/main.bro b/scripts/policy/integration/collective-intel/main.bro new file mode 100644 index 0000000000..9d98f3f4bc --- /dev/null +++ b/scripts/policy/integration/collective-intel/main.bro @@ -0,0 +1,12 @@ + +module Intel; + +## These are some fields to add extended compatibility between Bro and the Collective +## Intelligence Framework: http://code.google.com/p/collective-intelligence-framework/ +redef record Intel::MetaData += { + cif_impact: string &optional; + ## Maps to the Severity field in the Collective Intelligence Framework. + cif_severity: string &optional; + ## Maps to the Confidence field in the Collective Intelligence Framework. + cif_confidence: double &optional; +}; \ No newline at end of file From 9ea52fe9cd564e923a40c1e8d58e5dfebe7151c4 Mon Sep 17 00:00:00 2001 From: Seth Hall Date: Wed, 10 Oct 2012 13:00:48 -0400 Subject: [PATCH 18/22] Only the manager tries to read files with the input framework now. --- scripts/base/frameworks/intel/__load__.bro | 4 +++- scripts/base/frameworks/intel/input.bro | 18 +++++++++++------- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/scripts/base/frameworks/intel/__load__.bro b/scripts/base/frameworks/intel/__load__.bro index 08d0f62222..d8c77b86e3 100644 --- a/scripts/base/frameworks/intel/__load__.bro +++ b/scripts/base/frameworks/intel/__load__.bro @@ -1,5 +1,4 @@ @load ./main -@load ./input # The cluster framework must be loaded first. @load base/frameworks/cluster @@ -7,3 +6,6 @@ @if ( Cluster::is_enabled() ) @load ./cluster @endif + +# This needs cluster support to only read on the manager. +@load ./input diff --git a/scripts/base/frameworks/intel/input.bro b/scripts/base/frameworks/intel/input.bro index a43a76f793..6ab7c6a674 100644 --- a/scripts/base/frameworks/intel/input.bro +++ b/scripts/base/frameworks/intel/input.bro @@ -16,14 +16,18 @@ event Intel::read_entry(desc: Input::EventDescription, tpe: Input::Event, item: event bro_init() &priority=5 { - for ( a_file in read_files ) + if ( ! Cluster::is_enabled() || + Cluster::local_node_type() == Cluster::MANAGER ) { - Input::add_event([$source=a_file, - $reader=Input::READER_ASCII, - $mode=Input::REREAD, - $name=cat("intel-", a_file), - $fields=Intel::Item, - $ev=Intel::read_entry]); + for ( a_file in read_files ) + { + Input::add_event([$source=a_file, + $reader=Input::READER_ASCII, + $mode=Input::REREAD, + $name=cat("intel-", a_file), + $fields=Intel::Item, + $ev=Intel::read_entry]); + } } } From 2952f2c326267c619913b6de94551460c51c96db Mon Sep 17 00:00:00 2001 From: Seth Hall Date: Thu, 11 Oct 2012 15:30:56 -0400 Subject: [PATCH 19/22] Intelligence framework documentation first draft. --- doc/intel.rst | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 doc/intel.rst diff --git a/doc/intel.rst b/doc/intel.rst new file mode 100644 index 0000000000..2027f6ca54 --- /dev/null +++ b/doc/intel.rst @@ -0,0 +1,69 @@ +Intel Framework +=============== + +Intro +----- + +Intelligence data is critical to the process of monitoring for security purposes. There is always data which will be discovered through the incident response process and data which is shared through private communities. The goals of Bro's Intelligence Framework are to consume that data, make it available for matching, and provide infrastructure around improving performance, memory utilization, and generally making all of this easier. + +Data in the Intelligence Framework is the atomic piece of intelligence such as an IP address or an e-mail address along with a suite of metadata about it such as a freeform source field, a freeform descriptive field and a URL which might lead to more information about the specific item. The metadata in the default scripts has been deliberately kept minimal so that the community can find the appropriate fields that need added by writing scripts which extend the base record using the normal record extension mechanism. + +Quick Start +----------- + +Load the package of scripts that sends data into the Intelligence Framework to be checked by loading this script in local.bro:: + + @load policy/frameworks/intel + +(TODO: find some good mechanism for getting setup with good data quickly) + +Refer to the "Loading Intelligence" section below to see the format for Intelligence Framework text files, then load those text files with this line in local.bro:: + + redef Intel::read_files += { "/somewhere/yourdata.txt" }; + +The data itself only needs to reside on the manager if running in a cluster. + +Architecture +------------ + +The Intelligence Framework can be thought of as containing three separate portions. The first part is how intelligence is loaded, followed by the mechanism for indicating to the intelligence framework that a piece of data which needs to be checked has been seen, and thirdly the part where a positive match has been discovered. + +Loading Intelligence +******************** + +Intelligence data can only be loaded through plain text files using the Input Framework conventions. Additionally, on clusters the manager is the only node that needs the intelligence data. The intelligence framework has distribution mechanisms which will push data out to all of the nodes that need it. + +Here is an example of the intelligence data format. Note that all whitespace separators are literal tabs and fields containing only a hyphen a considered to be null values.:: + + #fields host net str str_type meta.source meta.desc meta.url + 1.2.3.4 - - - source1 Sending phishing email http://source1.com/badhosts/1.2.3.4 + - 31.131.248.0/21 - - spamhaus-drop SBL154982 - - + - - a.b.com Intel::DOMAIN source2 Name used for data exfiltration - + +For more examples of built in `str_type` values, please refer to the autogenerated documentation for the intelligence framework (TODO: figure out how to do this link). + +To load the data once files are created, use the following example code to define files to load with your own file names of course:: + + redef Intel::read_files += { + "/somewhere/feed1.txt", + "/somewhere/feed2.txt", + }; + +Remember, the files only need to be present on the file system of the manager node on cluster deployments. + +Seen Data +********* + +When some bit of data is extracted (such as an email address in the "From" header in a message over SMTP), the Intelligence Framework needs to be informed that this data was discovered and it's presence should be checked within the intelligence data set. This is accomplished through the Intel::seen (TODO: do a reference link) function. + +Typically users won't need to work with this function due to built in hook scripts that Bro ships with that will "see" data and send it into the intelligence framework. A user may only need to load the entire package of hook scripts as a module or pick and choose specific scripts to load. Keep in mind that as more data is sent into the intelligence framework, the CPU load consumed by Bro will increase depending on how many times the Intel::seen function is being called which is heavily traffic dependent. + +The full package of hook scripts that Bro ships with for sending this "seen" data into the intelligence framework can be loading by adding this line to local.bro:: + + @load policy/frameworks/intel + +Intelligence Matches +******************** + +Against all hopes, most networks will eventually have a hit on intelligence data which could indicate a possible compromise or other unwanted activity. The Intelligence Framework provides an event that is generated whenever a match is discovered named Intel::match (TODO: make a link to inline docs). Due to design restrictions placed upon the intelligence framework, there is no assurance as to where this event will be generated. It could be generated on the worker where the data was seen or on the manager. When the Intel::match event is handled, only the data given as event arguments to the event can be assured since the host where the data was seen may not be where Intel::match is handled. + From 1280825f1df4b41237e800bd27e4a5ecb18bb674 Mon Sep 17 00:00:00 2001 From: Seth Hall Date: Thu, 11 Oct 2012 15:31:14 -0400 Subject: [PATCH 20/22] Small comment updates in the Intel framework CIF support. --- scripts/policy/integration/collective-intel/main.bro | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/policy/integration/collective-intel/main.bro b/scripts/policy/integration/collective-intel/main.bro index 9d98f3f4bc..f347ba8867 100644 --- a/scripts/policy/integration/collective-intel/main.bro +++ b/scripts/policy/integration/collective-intel/main.bro @@ -2,8 +2,9 @@ module Intel; ## These are some fields to add extended compatibility between Bro and the Collective -## Intelligence Framework: http://code.google.com/p/collective-intelligence-framework/ +## Intelligence Framework redef record Intel::MetaData += { + ## Maps to the Impact field in the Collective Intelligence Framework. cif_impact: string &optional; ## Maps to the Severity field in the Collective Intelligence Framework. cif_severity: string &optional; From 172a6345b88c8d5cfd0611a02e02ed5723208327 Mon Sep 17 00:00:00 2001 From: Seth Hall Date: Wed, 31 Oct 2012 08:46:40 -0400 Subject: [PATCH 21/22] Extracting URLs from message bodies over SMTP and sending them to Intel framework. - New utils package for URLs. - Two functions in the URLs utils. find_all_urls and find_all_urls_without_scheme. --- scripts/base/init-default.bro | 1 + scripts/base/utils/urls.bro | 25 +++++++++++++++++++ scripts/policy/frameworks/intel/__load__.bro | 3 ++- .../frameworks/intel/smtp-url-extraction.bro | 15 +++++++++++ .../frameworks/intel/where-locations.bro | 1 + 5 files changed, 44 insertions(+), 1 deletion(-) create mode 100644 scripts/base/utils/urls.bro create mode 100644 scripts/policy/frameworks/intel/smtp-url-extraction.bro diff --git a/scripts/base/init-default.bro b/scripts/base/init-default.bro index 91011738d1..72f6d7d619 100644 --- a/scripts/base/init-default.bro +++ b/scripts/base/init-default.bro @@ -14,6 +14,7 @@ @load base/utils/patterns @load base/utils/strings @load base/utils/thresholds +@load base/utils/urls # This has some deep interplay between types and BiFs so it's # loaded in base/init-bare.bro diff --git a/scripts/base/utils/urls.bro b/scripts/base/utils/urls.bro new file mode 100644 index 0000000000..d704e72013 --- /dev/null +++ b/scripts/base/utils/urls.bro @@ -0,0 +1,25 @@ +## Functions for URL handling. + +## A regular expression for matching and extracting URLs. +const url_regex = /^([a-zA-Z\-]{3,5})(:\/\/[^\/?#"'\r\n><]*)([^?#"'\r\n><]*)([^[:blank:]\r\n"'><]*|\??[^"'\r\n><]*)/ &redef; + +## Extracts URLs discovered in arbitrary text. +function find_all_urls(s: string): string_set + { + return find_all(s, url_regex); + } + +## Extracts URLs discovered in arbitrary text without +## the URL scheme included. +function find_all_urls_without_scheme(s: string): string_set + { + local urls = find_all_urls(s); + local return_urls: set[string] = set(); + for ( url in urls ) + { + local no_scheme = sub(url, /^([a-zA-Z\-]{3,5})(:\/\/)/, ""); + add return_urls[no_scheme]; + } + + return return_urls; + } \ No newline at end of file diff --git a/scripts/policy/frameworks/intel/__load__.bro b/scripts/policy/frameworks/intel/__load__.bro index b5bd74a06d..3ffbc35378 100644 --- a/scripts/policy/frameworks/intel/__load__.bro +++ b/scripts/policy/frameworks/intel/__load__.bro @@ -4,4 +4,5 @@ @load ./http-url @load ./http-user-agents @load ./ssl -@load ./smtp \ No newline at end of file +@load ./smtp +@load ./smtp-url-extraction \ No newline at end of file diff --git a/scripts/policy/frameworks/intel/smtp-url-extraction.bro b/scripts/policy/frameworks/intel/smtp-url-extraction.bro new file mode 100644 index 0000000000..78c8f74bf2 --- /dev/null +++ b/scripts/policy/frameworks/intel/smtp-url-extraction.bro @@ -0,0 +1,15 @@ +@load base/frameworks/intel +@load base/utils/urls +@load ./where-locations + +event mime_segment_data(c: connection, length: count, data: string) &priority=3 + { + local urls = find_all_urls_without_scheme(data); + for ( url in urls ) + { + Intel::seen([$str=url, + $str_type=Intel::URL, + $conn=c, + $where=SMTP::IN_MESSAGE]); + } + } \ No newline at end of file diff --git a/scripts/policy/frameworks/intel/where-locations.bro b/scripts/policy/frameworks/intel/where-locations.bro index 45be5892ce..4773de9c73 100644 --- a/scripts/policy/frameworks/intel/where-locations.bro +++ b/scripts/policy/frameworks/intel/where-locations.bro @@ -16,6 +16,7 @@ export { SMTP::IN_RECEIVED_HEADER, SMTP::IN_REPLY_TO, SMTP::IN_X_ORIGINATING_IP_HEADER, + SMTP::IN_MESSAGE, SSL::IN_SERVER_CERT, SSL::IN_CLIENT_CERT, SSL::IN_SERVER_NAME, From 70339e9fed134f71a1b0f9cbb23b13605338f225 Mon Sep 17 00:00:00 2001 From: Robin Sommer Date: Thu, 1 Nov 2012 09:28:59 -0700 Subject: [PATCH 22/22] Fixing tests after intel-framework merge. coverage.bare-mode-errors still failing. --- doc/scripts/DocSourcesList.cmake | 14 ++++++++++++- .../policy/protocols/http/detect-intel.bro | 21 ------------------- scripts/test-all-policy.bro | 13 +++++++++++- .../canonified_loaded_scripts.log | 6 ++++-- .../coverage.init-default/missing_loads | 1 + 5 files changed, 30 insertions(+), 25 deletions(-) delete mode 100644 scripts/policy/protocols/http/detect-intel.bro diff --git a/doc/scripts/DocSourcesList.cmake b/doc/scripts/DocSourcesList.cmake index b127e1526d..2e791739dc 100644 --- a/doc/scripts/DocSourcesList.cmake +++ b/doc/scripts/DocSourcesList.cmake @@ -36,6 +36,8 @@ rest_target(${psd} base/frameworks/input/main.bro) rest_target(${psd} base/frameworks/input/readers/ascii.bro) rest_target(${psd} base/frameworks/input/readers/benchmark.bro) rest_target(${psd} base/frameworks/input/readers/raw.bro) +rest_target(${psd} base/frameworks/intel/cluster.bro) +rest_target(${psd} base/frameworks/intel/input.bro) rest_target(${psd} base/frameworks/intel/main.bro) rest_target(${psd} base/frameworks/logging/main.bro) rest_target(${psd} base/frameworks/logging/postprocessors/scp.bro) @@ -100,11 +102,21 @@ rest_target(${psd} base/utils/patterns.bro) rest_target(${psd} base/utils/site.bro) rest_target(${psd} base/utils/strings.bro) rest_target(${psd} base/utils/thresholds.bro) +rest_target(${psd} base/utils/urls.bro) rest_target(${psd} policy/frameworks/communication/listen.bro) rest_target(${psd} policy/frameworks/control/controllee.bro) rest_target(${psd} policy/frameworks/control/controller.bro) rest_target(${psd} policy/frameworks/dpd/detect-protocols.bro) rest_target(${psd} policy/frameworks/dpd/packet-segment-logging.bro) +rest_target(${psd} policy/frameworks/intel/conn-established.bro) +rest_target(${psd} policy/frameworks/intel/dns.bro) +rest_target(${psd} policy/frameworks/intel/http-host-header.bro) +rest_target(${psd} policy/frameworks/intel/http-url.bro) +rest_target(${psd} policy/frameworks/intel/http-user-agents.bro) +rest_target(${psd} policy/frameworks/intel/smtp-url-extraction.bro) +rest_target(${psd} policy/frameworks/intel/smtp.bro) +rest_target(${psd} policy/frameworks/intel/ssl.bro) +rest_target(${psd} policy/frameworks/intel/where-locations.bro) rest_target(${psd} policy/frameworks/metrics/conn-example.bro) rest_target(${psd} policy/frameworks/metrics/http-example.bro) rest_target(${psd} policy/frameworks/metrics/ssl-example.bro) @@ -112,6 +124,7 @@ rest_target(${psd} policy/frameworks/software/version-changes.bro) rest_target(${psd} policy/frameworks/software/vulnerable.bro) rest_target(${psd} policy/integration/barnyard2/main.bro) rest_target(${psd} policy/integration/barnyard2/types.bro) +rest_target(${psd} policy/integration/collective-intel/main.bro) rest_target(${psd} policy/misc/analysis-groups.bro) rest_target(${psd} policy/misc/capture-loss.bro) rest_target(${psd} policy/misc/loaded-scripts.bro) @@ -126,7 +139,6 @@ rest_target(${psd} policy/protocols/dns/detect-external-names.bro) rest_target(${psd} policy/protocols/ftp/detect.bro) rest_target(${psd} policy/protocols/ftp/software.bro) rest_target(${psd} policy/protocols/http/detect-MHR.bro) -rest_target(${psd} policy/protocols/http/detect-intel.bro) rest_target(${psd} policy/protocols/http/detect-sqli.bro) rest_target(${psd} policy/protocols/http/detect-webapps.bro) rest_target(${psd} policy/protocols/http/header-names.bro) diff --git a/scripts/policy/protocols/http/detect-intel.bro b/scripts/policy/protocols/http/detect-intel.bro deleted file mode 100644 index 281d705c13..0000000000 --- a/scripts/policy/protocols/http/detect-intel.bro +++ /dev/null @@ -1,21 +0,0 @@ -##! Intelligence based HTTP detections. Not yet working! - -@load base/protocols/http/main -@load base/protocols/http/utils -@load base/frameworks/intel/main - -module HTTP; - -event log_http(rec: Info) - { - local url = HTTP::build_url(rec); - local query = [$str=url, $subtype="url", $or_tags=set("malicious", "malware")]; - if ( Intel::matcher(query) ) - { - local msg = fmt("%s accessed a malicious URL from the intelligence framework", rec$id$orig_h); - NOTICE([$note=Intel::Detection, - $msg=msg, - $sub=HTTP::build_url_http(rec), - $id=rec$id]); - } - } diff --git a/scripts/test-all-policy.bro b/scripts/test-all-policy.bro index a7c43b14b3..4fa8a1363d 100644 --- a/scripts/test-all-policy.bro +++ b/scripts/test-all-policy.bro @@ -14,6 +14,16 @@ # @load frameworks/control/controller.bro @load frameworks/dpd/detect-protocols.bro @load frameworks/dpd/packet-segment-logging.bro +@load frameworks/intel/__load__.bro +@load frameworks/intel/conn-established.bro +@load frameworks/intel/dns.bro +@load frameworks/intel/http-host-header.bro +@load frameworks/intel/http-url.bro +@load frameworks/intel/http-user-agents.bro +@load frameworks/intel/smtp-url-extraction.bro +@load frameworks/intel/smtp.bro +@load frameworks/intel/ssl.bro +@load frameworks/intel/where-locations.bro @load frameworks/metrics/conn-example.bro @load frameworks/metrics/http-example.bro @load frameworks/metrics/ssl-example.bro @@ -22,6 +32,8 @@ @load integration/barnyard2/__load__.bro @load integration/barnyard2/main.bro @load integration/barnyard2/types.bro +@load integration/collective-intel/__load__.bro +@load integration/collective-intel/main.bro @load misc/analysis-groups.bro @load misc/capture-loss.bro @load misc/loaded-scripts.bro @@ -35,7 +47,6 @@ @load protocols/dns/detect-external-names.bro @load protocols/ftp/detect.bro @load protocols/ftp/software.bro -@load protocols/http/detect-intel.bro @load protocols/http/detect-MHR.bro @load protocols/http/detect-sqli.bro @load protocols/http/detect-webapps.bro diff --git a/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log b/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log index c3ee64cffe..c43fe7721f 100644 --- a/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log +++ b/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log @@ -3,7 +3,7 @@ #empty_field (empty) #unset_field - #path loaded_scripts -#open 2012-07-20-14-34-40 +#open 2012-11-01-15-37-12 #fields name #types string scripts/base/init-bare.bro @@ -40,6 +40,7 @@ scripts/base/init-default.bro scripts/base/utils/paths.bro scripts/base/utils/strings.bro scripts/base/utils/thresholds.bro + scripts/base/utils/urls.bro scripts/base/frameworks/notice/__load__.bro scripts/base/frameworks/notice/./main.bro scripts/base/frameworks/notice/./weird.bro @@ -69,6 +70,7 @@ scripts/base/init-default.bro scripts/base/frameworks/metrics/./non-cluster.bro scripts/base/frameworks/intel/__load__.bro scripts/base/frameworks/intel/./main.bro + scripts/base/frameworks/intel/./input.bro scripts/base/frameworks/reporter/__load__.bro scripts/base/frameworks/reporter/./main.bro scripts/base/frameworks/tunnels/__load__.bro @@ -112,4 +114,4 @@ scripts/base/init-default.bro scripts/base/protocols/syslog/./consts.bro scripts/base/protocols/syslog/./main.bro scripts/policy/misc/loaded-scripts.bro -#close 2012-07-20-14-34-40 +#close 2012-11-01-15-37-12 diff --git a/testing/btest/Baseline/coverage.init-default/missing_loads b/testing/btest/Baseline/coverage.init-default/missing_loads index 4497bbd185..34ba654dec 100644 --- a/testing/btest/Baseline/coverage.init-default/missing_loads +++ b/testing/btest/Baseline/coverage.init-default/missing_loads @@ -2,5 +2,6 @@ -./frameworks/cluster/nodes/proxy.bro -./frameworks/cluster/nodes/worker.bro -./frameworks/cluster/setup-connections.bro +-./frameworks/intel/cluster.bro -./frameworks/metrics/cluster.bro -./frameworks/notice/cluster.bro