diff --git a/CHANGES b/CHANGES index a12bd8da12..fa5a4cc380 100644 --- a/CHANGES +++ b/CHANGES @@ -1,4 +1,9 @@ +2.1-XXX | 2012-11-01 08:20:00 -0700 + + * Initial version of a completely reworked intelligence framework. + See doc/intel.rst for more information. (Seth Hall) + 2.1-112 | 2012-11-05 13:58:20 -0800 * New base script for detecting cases of checksum offloading. diff --git a/doc/intel.rst b/doc/intel.rst new file mode 100644 index 0000000000..390313461a --- /dev/null +++ b/doc/intel.rst @@ -0,0 +1,125 @@ +Intel Framework +=============== + +Intro +----- + +Intelligence data is critical to the process of monitoring for +security purposes. There is always data which will be discovered +through the incident response process and data which is shared through +private communities. The goals of Bro's Intelligence Framework are to +consume that data, make it available for matching, and provide +infrastructure around improving performance, memory utilization, and +generally making all of this easier. + +Data in the Intelligence Framework is the atomic piece of intelligence +such as an IP address or an e-mail address along with a suite of +metadata about it such as a freeform source field, a freeform +descriptive field and a URL which might lead to more information about +the specific item. The metadata in the default scripts has been +deliberately kept minimal so that the community can find the +appropriate fields that need added by writing scripts which extend the +base record using the normal record extension mechanism. + +Quick Start +----------- + +Load the package of scripts that sends data into the Intelligence +Framework to be checked by loading this script in local.bro:: + + @load policy/frameworks/intel + +(TODO: find some good mechanism for getting setup with good data +quickly) + +Refer to the "Loading Intelligence" section below to see the format +for Intelligence Framework text files, then load those text files with +this line in local.bro:: + + redef Intel::read_files += { "/somewhere/yourdata.txt" }; + +The data itself only needs to reside on the manager if running in a +cluster. + +Architecture +------------ + +The Intelligence Framework can be thought of as containing three +separate portions. The first part is how intelligence is loaded, +followed by the mechanism for indicating to the intelligence framework +that a piece of data which needs to be checked has been seen, and +thirdly the part where a positive match has been discovered. + +Loading Intelligence +******************** + +Intelligence data can only be loaded through plain text files using +the Input Framework conventions. Additionally, on clusters the +manager is the only node that needs the intelligence data. The +intelligence framework has distribution mechanisms which will push +data out to all of the nodes that need it. + +Here is an example of the intelligence data format. Note that all +whitespace separators are literal tabs and fields containing only a +hyphen a considered to be null values.:: + + #fields host net str str_type meta.source meta.desc meta.url + 1.2.3.4 - - - source1 Sending phishing email http://source1.com/badhosts/1.2.3.4 + - 31.131.248.0/21 - - spamhaus-drop SBL154982 - - + - - a.b.com Intel::DOMAIN source2 Name used for data exfiltration - + +For more examples of built in `str_type` values, please refer to the +autogenerated documentation for the intelligence framework (TODO: +figure out how to do this link). + +To load the data once files are created, use the following example +code to define files to load with your own file names of course:: + + redef Intel::read_files += { + "/somewhere/feed1.txt", + "/somewhere/feed2.txt", + }; + +Remember, the files only need to be present on the file system of the +manager node on cluster deployments. + +Seen Data +********* + +When some bit of data is extracted (such as an email address in the +"From" header in a message over SMTP), the Intelligence Framework +needs to be informed that this data was discovered and it's presence +should be checked within the intelligence data set. This is +accomplished through the Intel::seen (TODO: do a reference link) +function. + +Typically users won't need to work with this function due to built in +hook scripts that Bro ships with that will "see" data and send it into +the intelligence framework. A user may only need to load the entire +package of hook scripts as a module or pick and choose specific +scripts to load. Keep in mind that as more data is sent into the +intelligence framework, the CPU load consumed by Bro will increase +depending on how many times the Intel::seen function is being called +which is heavily traffic dependent. + +The full package of hook scripts that Bro ships with for sending this +"seen" data into the intelligence framework can be loading by adding +this line to local.bro:: + + @load policy/frameworks/intel + +Intelligence Matches +******************** + +Against all hopes, most networks will eventually have a hit on +intelligence data which could indicate a possible compromise or other +unwanted activity. The Intelligence Framework provides an event that +is generated whenever a match is discovered named Intel::match (TODO: +make a link to inline docs). Due to design restrictions placed upon +the intelligence framework, there is no assurance as to where this +event will be generated. It could be generated on the worker where +the data was seen or on the manager. When the Intel::match event is +handled, only the data given as event arguments to the event can be +assured since the host where the data was seen may not be where +Intel::match is handled. + diff --git a/doc/scripts/DocSourcesList.cmake b/doc/scripts/DocSourcesList.cmake index 01eeda5188..b95464b6b3 100644 --- a/doc/scripts/DocSourcesList.cmake +++ b/doc/scripts/DocSourcesList.cmake @@ -36,6 +36,8 @@ rest_target(${psd} base/frameworks/input/main.bro) rest_target(${psd} base/frameworks/input/readers/ascii.bro) rest_target(${psd} base/frameworks/input/readers/benchmark.bro) rest_target(${psd} base/frameworks/input/readers/raw.bro) +rest_target(${psd} base/frameworks/intel/cluster.bro) +rest_target(${psd} base/frameworks/intel/input.bro) rest_target(${psd} base/frameworks/intel/main.bro) rest_target(${psd} base/frameworks/logging/main.bro) rest_target(${psd} base/frameworks/logging/postprocessors/scp.bro) @@ -103,11 +105,21 @@ rest_target(${psd} base/utils/patterns.bro) rest_target(${psd} base/utils/site.bro) rest_target(${psd} base/utils/strings.bro) rest_target(${psd} base/utils/thresholds.bro) +rest_target(${psd} base/utils/urls.bro) rest_target(${psd} policy/frameworks/communication/listen.bro) rest_target(${psd} policy/frameworks/control/controllee.bro) rest_target(${psd} policy/frameworks/control/controller.bro) rest_target(${psd} policy/frameworks/dpd/detect-protocols.bro) rest_target(${psd} policy/frameworks/dpd/packet-segment-logging.bro) +rest_target(${psd} policy/frameworks/intel/conn-established.bro) +rest_target(${psd} policy/frameworks/intel/dns.bro) +rest_target(${psd} policy/frameworks/intel/http-host-header.bro) +rest_target(${psd} policy/frameworks/intel/http-url.bro) +rest_target(${psd} policy/frameworks/intel/http-user-agents.bro) +rest_target(${psd} policy/frameworks/intel/smtp-url-extraction.bro) +rest_target(${psd} policy/frameworks/intel/smtp.bro) +rest_target(${psd} policy/frameworks/intel/ssl.bro) +rest_target(${psd} policy/frameworks/intel/where-locations.bro) rest_target(${psd} policy/frameworks/metrics/conn-example.bro) rest_target(${psd} policy/frameworks/metrics/http-example.bro) rest_target(${psd} policy/frameworks/metrics/ssl-example.bro) @@ -115,6 +127,7 @@ rest_target(${psd} policy/frameworks/software/version-changes.bro) rest_target(${psd} policy/frameworks/software/vulnerable.bro) rest_target(${psd} policy/integration/barnyard2/main.bro) rest_target(${psd} policy/integration/barnyard2/types.bro) +rest_target(${psd} policy/integration/collective-intel/main.bro) rest_target(${psd} policy/misc/analysis-groups.bro) rest_target(${psd} policy/misc/capture-loss.bro) rest_target(${psd} policy/misc/loaded-scripts.bro) @@ -129,7 +142,6 @@ rest_target(${psd} policy/protocols/dns/detect-external-names.bro) rest_target(${psd} policy/protocols/ftp/detect.bro) rest_target(${psd} policy/protocols/ftp/software.bro) rest_target(${psd} policy/protocols/http/detect-MHR.bro) -rest_target(${psd} policy/protocols/http/detect-intel.bro) rest_target(${psd} policy/protocols/http/detect-sqli.bro) rest_target(${psd} policy/protocols/http/detect-webapps.bro) rest_target(${psd} policy/protocols/http/header-names.bro) diff --git a/scripts/base/frameworks/intel/__load__.bro b/scripts/base/frameworks/intel/__load__.bro index d551be57d3..d8c77b86e3 100644 --- a/scripts/base/frameworks/intel/__load__.bro +++ b/scripts/base/frameworks/intel/__load__.bro @@ -1 +1,11 @@ -@load ./main \ No newline at end of file +@load ./main + +# The cluster framework must be loaded first. +@load base/frameworks/cluster + +@if ( Cluster::is_enabled() ) +@load ./cluster +@endif + +# This needs cluster support to only read on the manager. +@load ./input diff --git a/scripts/base/frameworks/intel/cluster.bro b/scripts/base/frameworks/intel/cluster.bro new file mode 100644 index 0000000000..8ed17109c1 --- /dev/null +++ b/scripts/base/frameworks/intel/cluster.bro @@ -0,0 +1,61 @@ +##! Cluster transparency support for the intelligence framework. This is mostly oriented +##! toward distributing intelligence information across clusters. + +@load base/frameworks/cluster +@load ./input + +module Intel; + +redef record Item += { + ## This field is used internally for cluster transparency to avoid + ## re-dispatching intelligence items over and over from workers. + first_dispatch: bool &default=T; +}; + +# If this process is not a manager process, we don't want the full metadata +@if ( Cluster::local_node_type() != Cluster::MANAGER ) +redef have_full_data = F; +@endif + +global cluster_new_item: event(item: Item); + +# Primary intelligence distribution comes from manager. +redef Cluster::manager2worker_events += /^Intel::(cluster_new_item)$/; +# If a worker finds intelligence and adds it, it should share it back to the manager. +redef Cluster::worker2manager_events += /^Intel::(cluster_new_item|match_no_items)$/; + +@if ( Cluster::local_node_type() == Cluster::MANAGER ) +event Intel::match_no_items(s: Seen) &priority=5 + { + event Intel::match(s, Intel::get_items(s)); + } + +event remote_connection_handshake_done(p: event_peer) + { + # When a worker connects, send it the complete minimal data store. + # It will be kept up to date after this by the cluster_new_item event. + if ( Cluster::nodes[p$descr]$node_type == Cluster::WORKER ) + { + send_id(p, "Intel::min_data_store"); + } + } +@endif + +event Intel::cluster_new_item(item: Intel::Item) &priority=5 + { + # Ignore locally generated events to avoid event storms. + if ( is_remote_event() ) + Intel::insert(item); + } + +event Intel::new_item(item: Intel::Item) &priority=5 + { + # The cluster manager always rebroadcasts intelligence. + # Workers redistribute it if it was locally generated. + if ( Cluster::local_node_type() == Cluster::MANAGER || + item$first_dispatch ) + { + item$first_dispatch=F; + event Intel::cluster_new_item(item); + } + } diff --git a/scripts/base/frameworks/intel/input.bro b/scripts/base/frameworks/intel/input.bro new file mode 100644 index 0000000000..6ab7c6a674 --- /dev/null +++ b/scripts/base/frameworks/intel/input.bro @@ -0,0 +1,33 @@ +@load ./main + +module Intel; + +export { + ## Intelligence files that will be read off disk. The files are + ## reread everytime they are updated so updates much be atomic with + ## "mv" instead of writing the file in place. + const read_files: set[string] = {} &redef; +} + +event Intel::read_entry(desc: Input::EventDescription, tpe: Input::Event, item: Intel::Item) + { + Intel::insert(item); + } + +event bro_init() &priority=5 + { + if ( ! Cluster::is_enabled() || + Cluster::local_node_type() == Cluster::MANAGER ) + { + for ( a_file in read_files ) + { + Input::add_event([$source=a_file, + $reader=Input::READER_ASCII, + $mode=Input::REREAD, + $name=cat("intel-", a_file), + $fields=Intel::Item, + $ev=Intel::read_entry]); + } + } + } + diff --git a/scripts/base/frameworks/intel/main.bro b/scripts/base/frameworks/intel/main.bro index 9ee1c75100..d66990e611 100644 --- a/scripts/base/frameworks/intel/main.bro +++ b/scripts/base/frameworks/intel/main.bro @@ -1,323 +1,345 @@ ##! The intelligence framework provides a way to store and query IP addresses, -##! strings (with a subtype), and numeric (with a subtype) data. Metadata -##! also be associated with the intelligence like tags which are arbitrary -##! strings, time values, and longer descriptive strings. - -# Example string subtypes: -# url -# email -# domain -# software -# user_name -# file_name -# file_md5 -# x509_md5 - -# Example tags: -# infrastructure -# malicious -# sensitive -# canary -# friend +##! and strings (with a str_type). Metadata can +##! also be associated with the intelligence like for making more informed +##! decisions about matching and handling of intelligence. @load base/frameworks/notice module Intel; export { - ## The intel logging stream identifier. redef enum Log::ID += { LOG }; - redef enum Notice::Type += { - ## This notice should be used in all detector scripts to indicate - ## an intelligence based detection. - Detection, + ## String data needs to be further categoried since it could represent + ## and number of types of data. + type StrType: enum { + ## A complete URL without the prefix "http://". + URL, + ## User-Agent string, typically HTTP or mail message body. + USER_AGENT, + ## Email address. + EMAIL, + ## DNS domain name. + DOMAIN, + ## A user name. + USER_NAME, + ## File hash which is non-hash type specific. It's up to the user to query + ## for any relevant hash types. + FILE_HASH, + ## Certificate SHA-1 hash. + CERT_HASH, }; - ## Record type used for logging information from the intelligence framework. - ## Primarily for problems or oddities with inserting and querying data. - ## This is important since the content of the intelligence framework can - ## change quite dramatically during runtime and problems may be introduced - ## into the data. - type Info: record { - ## The current network time. - ts: time &log; - ## Represents the severity of the message. - ## This value should be one of: "info", "warn", "error" - level: string &log; - ## The message. - message: string &log; - }; - - ## Record to represent metadata associated with a single piece of - ## intelligence. + ## Data about an :bro:type:`Intel::Item` type MetaData: record { - ## A description for the data. + ## An arbitrary string value representing the data source. Typically, + ## the convention for this field will be the source name and feed name + ## separated by a hyphen. For example: "source1-c&c". + source: string; + ## A freeform description for the data. desc: string &optional; - ## A URL where more information may be found about the intelligence. + ## A URL for more information about the data. url: string &optional; - ## The time at which the data was first declared to be intelligence. - first_seen: time &optional; - ## When this data was most recent inserted into the framework. - latest_seen: time &optional; - ## Arbitrary text tags for the data. - tags: set[string]; }; - ## Record to represent a singular piece of intelligence. + ## Represents a piece of intelligence. type Item: record { - ## If the data is an IP address, this hold the address. - ip: addr &optional; - ## If the data is textual, this holds the text. - str: string &optional; - ## If the data is numeric, this holds the number. - num: int &optional; - ## The subtype of the data for when either the $str or $num fields are - ## given. If one of those fields are given, this field must be present. - subtype: string &optional; + ## The IP address if the intelligence is about an IP address. + host: addr &optional; + ## The network if the intelligence is about a CIDR block. + net: subnet &optional; + ## The string if the intelligence is about a string. + str: string &optional; + ## The type of data that is in the string if the $str field is set. + str_type: StrType &optional; - ## The next five fields are temporary until a better model for - ## attaching metadata to an intelligence item is created. - desc: string &optional; - url: string &optional; - first_seen: time &optional; - latest_seen: time &optional; - tags: set[string]; - - ## These single string tags are throw away until pybroccoli supports sets. - tag1: string &optional; - tag2: string &optional; - tag3: string &optional; + ## Metadata for the item. Typically represents more deeply \ + ## descriptive data for a piece of intelligence. + meta: MetaData; }; - ## Record model used for constructing queries against the intelligence - ## framework. - type QueryItem: record { - ## If an IP address is being queried for, this field should be given. - ip: addr &optional; - ## If a string is being queried for, this field should be given. - str: string &optional; - ## If numeric data is being queried for, this field should be given. - num: int &optional; - ## If either a string or number is being queried for, this field should - ## indicate the subtype of the data. - subtype: string &optional; - - ## A set of tags where if a single metadata record attached to an item - ## has any one of the tags defined in this field, it will match. - or_tags: set[string] &optional; - ## A set of tags where a single metadata record attached to an item - ## must have all of the tags defined in this field. - and_tags: set[string] &optional; - - ## The predicate can be given when searching for a match. It will - ## be tested against every :bro:type:`Intel::MetaData` item associated - ## with the data being matched on. If it returns T a single time, the - ## matcher will consider that the item has matched. This field can - ## be used for constructing arbitrarily complex queries that may not - ## be possible with the $or_tags or $and_tags fields. - pred: function(meta: Intel::MetaData): bool &optional; + ## Enum to represent where data came from when it was discovered. + ## The convenction is to prefix the name with "IN_". + type Where: enum { + ## A catchall value to represent data of unknown provenance. + IN_ANYWHERE, }; - - ## Function to insert data into the intelligence framework. - ## - ## item: The data item. + + ## The $host field and combination of $str and $str_type fields are mutually + ## exclusive. These records *must* represent either an IP address being + ## seen or a string being seen. + type Seen: record { + ## The IP address if the data seen is an IP address. + host: addr &log &optional; + ## The string if the data is about a string. + str: string &log &optional; + ## The type of data that is in the string if the $str field is set. + str_type: StrType &log &optional; + + ## Where the data was discovered. + where: Where &log; + + ## If the data was discovered within a connection, the + ## connection record should go into get to give context to the data. + conn: connection &optional; + }; + + ## Record used for the logging framework representing a positive + ## hit within the intelligence framework. + type Info: record { + ## Timestamp when the data was discovered. + ts: time &log; + + ## If a connection was associated with this intelligence hit, + ## this is the uid for the connection + uid: string &log &optional; + ## If a connection was associated with this intelligence hit, + ## this is the conn_id for the connection. + id: conn_id &log &optional; + + ## Where the data was seen. + seen: Seen &log; + ## Sources which supplied data that resulted in this match. + sources: set[string] &log; + }; + + ## Intelligence data manipulation functions. + global insert: function(item: Item); + + ## Function to declare discovery of a piece of data in order to check + ## it against known intelligence for matches. + global seen: function(s: Seen); + + ## Event to represent a match in the intelligence data from data that was seen. + ## On clusters there is no assurance as to where this event will be generated + ## so do not assume that arbitrary global state beyond the given data + ## will be available. ## - ## Returns: T if the data was successfully inserted into the framework, - ## otherwise it returns F. - global insert: function(item: Item): bool; - - ## A wrapper for the :bro:id:`Intel::insert` function. This is primarily - ## used as the external API for inserting data into the intelligence - ## using Broccoli. - global insert_event: event(item: Item); - - ## Function for matching data within the intelligence framework. - global matcher: function(item: QueryItem): bool; + ## This is the primary mechanism where a user will take actions based on data + ## within the intelligence framework. + global match: event(s: Seen, items: set[Item]); + + global log_intel: event(rec: Info); } -type MetaDataStore: table[count] of MetaData; +# Internal handler for matches with no metadata available. +global match_no_items: event(s: Seen); + +# Internal events for cluster data distribution +global new_item: event(item: Item); +global updated_item: event(item: Item); + +# Optionally store metadata. This is used internally depending on +# if this is a cluster deployment or not. +const have_full_data = T &redef; + +# The in memory data structure for holding intelligence. type DataStore: record { - ip_data: table[addr] of MetaDataStore; - # The first string is the actual value and the second string is the subtype. - string_data: table[string, string] of MetaDataStore; - int_data: table[int, string] of MetaDataStore; + net_data: table[subnet] of set[MetaData]; + string_data: table[string, StrType] of set[MetaData]; }; -global data_store: DataStore; +global data_store: DataStore &redef; -event bro_init() +# The in memory data structure for holding the barest matchable intelligence. +# This is primarily for workers to do the initial quick matches and store +# a minimal amount of data for the full match to happen on the manager. +type MinDataStore: record { + net_data: set[subnet]; + string_data: set[string, StrType]; +}; +global min_data_store: MinDataStore &redef; + + +event bro_init() &priority=5 { - Log::create_stream(Intel::LOG, [$columns=Info]); + Log::create_stream(LOG, [$columns=Info, $ev=log_intel]); } - -function insert(item: Item): bool +function find(s: Seen): bool { - local err_msg = ""; - if ( (item?$str || item?$num) && ! item?$subtype ) - err_msg = "You must provide a subtype to insert_sync or this item doesn't make sense."; - - if ( err_msg == "" ) + if ( s?$host && + ((have_full_data && s$host in data_store$net_data) || + (s$host in min_data_store$net_data))) { - # Create and fill out the meta data item. - local meta: MetaData; - if ( item?$first_seen ) - meta$first_seen = item$first_seen; - if ( item?$latest_seen ) - meta$latest_seen = item$latest_seen; - if ( item?$tags ) - meta$tags = item$tags; - if ( item?$desc ) - meta$desc = item$desc; - if ( item?$url ) - meta$url = item$url; - - - # This is hopefully only temporary until pybroccoli supports sets. - if ( item?$tag1 ) - add item$tags[item$tag1]; - if ( item?$tag2 ) - add item$tags[item$tag2]; - if ( item?$tag3 ) - add item$tags[item$tag3]; - - if ( item?$ip ) - { - if ( item$ip !in data_store$ip_data ) - data_store$ip_data[item$ip] = table(); - data_store$ip_data[item$ip][|data_store$ip_data[item$ip]|] = meta; - return T; - } - else if ( item?$str ) - { - if ( [item$str, item$subtype] !in data_store$string_data ) - data_store$string_data[item$str, item$subtype] = table(); - - data_store$string_data[item$str, item$subtype][|data_store$string_data[item$str, item$subtype]|] = meta; - return T; - } - else if ( item?$num ) - { - if ( [item$num, item$subtype] !in data_store$int_data ) - data_store$int_data[item$num, item$subtype] = table(); + return T; + } + else if ( s?$str && s?$str_type && + ((have_full_data && [s$str, s$str_type] in data_store$string_data) || + ([s$str, s$str_type] in min_data_store$string_data))) + { + return T; + } + else + { + return F; + } + } - data_store$int_data[item$num, item$subtype][|data_store$int_data[item$num, item$subtype]|] = meta; - return T; +function get_items(s: Seen): set[Item] + { + local item: Item; + local return_data: set[Item] = set(); + + if ( ! have_full_data ) + { + # A reporter warning should be generated here because this function + # should never be called from a host that doesn't have the full data. + # TODO: do a reporter warning. + return return_data; + } + + if ( s?$host ) + { + # See if the host is known about and it has meta values + if ( s$host in data_store$net_data ) + { + for ( m in data_store$net_data[s$host] ) + { + # TODO: the lookup should be finding all and not just most specific + # and $host/$net should have the correct value. + item = [$host=s$host, $meta=m]; + add return_data[item]; + } + } + } + else if ( s?$str && s?$str_type ) + { + # See if the string is known about and it has meta values + if ( [s$str, s$str_type] in data_store$string_data ) + { + for ( m in data_store$string_data[s$str, s$str_type] ) + { + item = [$str=s$str, $str_type=s$str_type, $meta=m]; + add return_data[item]; + } + } + } + + return return_data; + } + +function Intel::seen(s: Seen) + { + if ( find(s) ) + { + if ( have_full_data ) + { + local items = get_items(s); + event Intel::match(s, items); } else - err_msg = "Failed to insert intelligence item for some unknown reason."; - } - - if ( err_msg != "" ) - Log::write(Intel::LOG, [$ts=network_time(), $level="warn", $message=fmt(err_msg)]); - return F; - } - -event insert_event(item: Item) - { - insert(item); - } - -function match_item_with_metadata(item: QueryItem, meta: MetaData): bool - { - if ( item?$and_tags ) - { - local matched = T; - # Every tag given has to match in a single MetaData entry. - for ( tag in item$and_tags ) { - if ( tag !in meta$tags ) - matched = F; + event Intel::match_no_items(s); } - if ( matched ) + } + } + + +function has_meta(check: MetaData, metas: set[MetaData]): bool + { + local check_hash = md5_hash(check); + for ( m in metas ) + { + if ( check_hash == md5_hash(m) ) return T; } - else if ( item?$or_tags ) - { - # For OR tags, only a single tag has to match. - for ( tag in item$or_tags ) - { - if ( tag in meta$tags ) - return T; - } - } - else if ( item?$pred ) - return item$pred(meta); - # This indicates some sort of failure in the query + # The records must not be equivalent if we made it this far. return F; } - -function matcher(item: QueryItem): bool + +event Intel::match(s: Seen, items: set[Item]) &priority=5 { - local err_msg = ""; - if ( ! (item?$ip || item?$str || item?$num) ) - err_msg = "You must supply one of the $ip, $str, or $num fields to search on"; - else if ( (item?$or_tags || item?$and_tags) && item?$pred ) - err_msg = "You can't match with both tags and a predicate."; - else if ( item?$or_tags && item?$and_tags ) - err_msg = "You can't match with both OR'd together tags and AND'd together tags"; - else if ( (item?$str || item?$num) && ! item?$subtype ) - err_msg = "You must provide a subtype to matcher or this item doesn't make sense."; - else if ( item?$str && item?$num ) - err_msg = "You must only provide $str or $num, not both."; - - local meta: MetaData; + local empty_set: set[string] = set(); + local info: Info = [$ts=network_time(), $seen=s, $sources=empty_set]; - if ( err_msg == "" ) + if ( s?$conn ) { - if ( item?$ip ) - { - if ( item$ip in data_store$ip_data ) - { - if ( ! item?$and_tags && ! item?$or_tags && ! item?$pred ) - return T; - - for ( i in data_store$ip_data[item$ip] ) - { - meta = data_store$ip_data[item$ip][i]; - if ( match_item_with_metadata(item, meta) ) - return T; - } - } - } - - else if ( item?$str ) - { - if ( [item$str, item$subtype] in data_store$string_data ) - { - if ( ! item?$and_tags && ! item?$or_tags && ! item?$pred ) - return T; - - for ( i in data_store$string_data[item$str, item$subtype] ) - { - meta = data_store$string_data[item$str, item$subtype][i]; - if ( match_item_with_metadata(item, meta) ) - return T; - } - } - } - - else if ( item?$num ) - { - if ( [item$num, item$subtype] in data_store$int_data ) - { - if ( ! item?$and_tags && ! item?$or_tags && ! item?$pred ) - return T; - - for ( i in data_store$int_data[item$num, item$subtype] ) - { - meta = data_store$int_data[item$num, item$subtype][i]; - if ( match_item_with_metadata(item, meta) ) - return T; - } - } - } - else - err_msg = "Failed to query intelligence data for some unknown reason."; + info$uid = s$conn$uid; + info$id = s$conn$id; } - - if ( err_msg != "" ) - Log::write(Intel::LOG, [$ts=network_time(), $level="error", $message=fmt(err_msg)]); - return F; + + for ( item in items ) + add info$sources[item$meta$source]; + + Log::write(Intel::LOG, info); } + +function insert(item: Item) + { + if ( item?$str && !item?$str_type ) + { + event reporter_warning(network_time(), fmt("You must provide a str_type for strings or this item doesn't make sense. Item: %s", item), ""); + return; + } + + # Create and fill out the meta data item. + local meta = item$meta; + local metas: set[MetaData]; + + if ( item?$host ) + { + local host = mask_addr(item$host, is_v4_addr(item$host) ? 32 : 128); + if ( have_full_data ) + { + if ( host !in data_store$net_data ) + data_store$net_data[host] = set(); + + metas = data_store$net_data[host]; + } + + add min_data_store$net_data[host]; + } + else if ( item?$net ) + { + if ( have_full_data ) + { + if ( item$net !in data_store$net_data ) + data_store$net_data[item$net] = set(); + + metas = data_store$net_data[item$net]; + } + + add min_data_store$net_data[item$net]; + } + else if ( item?$str ) + { + if ( have_full_data ) + { + if ( [item$str, item$str_type] !in data_store$string_data ) + data_store$string_data[item$str, item$str_type] = set(); + + metas = data_store$string_data[item$str, item$str_type]; + } + + add min_data_store$string_data[item$str, item$str_type]; + } + + local updated = F; + if ( have_full_data ) + { + for ( m in metas ) + { + if ( meta$source == m$source ) + { + if ( has_meta(meta, metas) ) + { + # It's the same item being inserted again. + return; + } + else + { + # Same source, different metadata means updated item. + updated = T; + } + } + } + add metas[item$meta]; + } + + if ( updated ) + event Intel::updated_item(item); + else + event Intel::new_item(item); + } + \ No newline at end of file diff --git a/scripts/base/init-default.bro b/scripts/base/init-default.bro index 0ae4ed77ea..8b36899f10 100644 --- a/scripts/base/init-default.bro +++ b/scripts/base/init-default.bro @@ -14,6 +14,7 @@ @load base/utils/patterns @load base/utils/strings @load base/utils/thresholds +@load base/utils/urls # This has some deep interplay between types and BiFs so it's # loaded in base/init-bare.bro diff --git a/scripts/base/utils/urls.bro b/scripts/base/utils/urls.bro new file mode 100644 index 0000000000..d704e72013 --- /dev/null +++ b/scripts/base/utils/urls.bro @@ -0,0 +1,25 @@ +## Functions for URL handling. + +## A regular expression for matching and extracting URLs. +const url_regex = /^([a-zA-Z\-]{3,5})(:\/\/[^\/?#"'\r\n><]*)([^?#"'\r\n><]*)([^[:blank:]\r\n"'><]*|\??[^"'\r\n><]*)/ &redef; + +## Extracts URLs discovered in arbitrary text. +function find_all_urls(s: string): string_set + { + return find_all(s, url_regex); + } + +## Extracts URLs discovered in arbitrary text without +## the URL scheme included. +function find_all_urls_without_scheme(s: string): string_set + { + local urls = find_all_urls(s); + local return_urls: set[string] = set(); + for ( url in urls ) + { + local no_scheme = sub(url, /^([a-zA-Z\-]{3,5})(:\/\/)/, ""); + add return_urls[no_scheme]; + } + + return return_urls; + } \ No newline at end of file diff --git a/scripts/policy/frameworks/intel/__load__.bro b/scripts/policy/frameworks/intel/__load__.bro new file mode 100644 index 0000000000..3ffbc35378 --- /dev/null +++ b/scripts/policy/frameworks/intel/__load__.bro @@ -0,0 +1,8 @@ +@load ./conn-established +@load ./dns +@load ./http-host-header +@load ./http-url +@load ./http-user-agents +@load ./ssl +@load ./smtp +@load ./smtp-url-extraction \ No newline at end of file diff --git a/scripts/policy/frameworks/intel/conn-established.bro b/scripts/policy/frameworks/intel/conn-established.bro new file mode 100644 index 0000000000..a2e67b292b --- /dev/null +++ b/scripts/policy/frameworks/intel/conn-established.bro @@ -0,0 +1,8 @@ +@load base/frameworks/intel +@load ./where-locations + +event connection_established(c: connection) + { + Intel::seen([$host=c$id$orig_h, $conn=c, $where=Conn::IN_ORIG]); + Intel::seen([$host=c$id$resp_h, $conn=c, $where=Conn::IN_RESP]); + } diff --git a/scripts/policy/frameworks/intel/dns.bro b/scripts/policy/frameworks/intel/dns.bro new file mode 100644 index 0000000000..a0dee47acf --- /dev/null +++ b/scripts/policy/frameworks/intel/dns.bro @@ -0,0 +1,10 @@ +@load base/frameworks/intel +@load ./where-locations + +event dns_request(c: connection, msg: dns_msg, query: string, qtype: count, qclass: count) + { + Intel::seen([$str=query, + $str_type=Intel::DOMAIN, + $conn=c, + $where=DNS::IN_REQUEST]); + } diff --git a/scripts/policy/frameworks/intel/http-host-header.bro b/scripts/policy/frameworks/intel/http-host-header.bro new file mode 100644 index 0000000000..f16b1628aa --- /dev/null +++ b/scripts/policy/frameworks/intel/http-host-header.bro @@ -0,0 +1,11 @@ +@load base/frameworks/intel +@load ./where-locations + +event http_header(c: connection, is_orig: bool, name: string, value: string) + { + if ( is_orig && name == "HOST" ) + Intel::seen([$str=value, + $str_type=Intel::DOMAIN, + $conn=c, + $where=HTTP::IN_HOST_HEADER]); + } diff --git a/scripts/policy/frameworks/intel/http-url.bro b/scripts/policy/frameworks/intel/http-url.bro new file mode 100644 index 0000000000..7c4086a7e6 --- /dev/null +++ b/scripts/policy/frameworks/intel/http-url.bro @@ -0,0 +1,11 @@ +@load base/frameworks/intel +@load ./where-locations + +event http_message_done(c: connection, is_orig: bool, stat: http_message_stat) + { + if ( is_orig && c?$http ) + Intel::seen([$str=HTTP::build_url(c$http), + $str_type=Intel::URL, + $conn=c, + $where=HTTP::IN_URL]); + } diff --git a/scripts/policy/frameworks/intel/http-user-agents.bro b/scripts/policy/frameworks/intel/http-user-agents.bro new file mode 100644 index 0000000000..93445c1e43 --- /dev/null +++ b/scripts/policy/frameworks/intel/http-user-agents.bro @@ -0,0 +1,12 @@ +@load base/frameworks/intel +@load ./where-locations + +event http_header(c: connection, is_orig: bool, name: string, value: string) + { + if ( is_orig && name == "USER-AGENT" ) + Intel::seen([$str=value, + $str_type=Intel::USER_AGENT, + $conn=c, + $where=HTTP::IN_USER_AGENT_HEADER]); + } + diff --git a/scripts/policy/frameworks/intel/smtp-url-extraction.bro b/scripts/policy/frameworks/intel/smtp-url-extraction.bro new file mode 100644 index 0000000000..78c8f74bf2 --- /dev/null +++ b/scripts/policy/frameworks/intel/smtp-url-extraction.bro @@ -0,0 +1,15 @@ +@load base/frameworks/intel +@load base/utils/urls +@load ./where-locations + +event mime_segment_data(c: connection, length: count, data: string) &priority=3 + { + local urls = find_all_urls_without_scheme(data); + for ( url in urls ) + { + Intel::seen([$str=url, + $str_type=Intel::URL, + $conn=c, + $where=SMTP::IN_MESSAGE]); + } + } \ No newline at end of file diff --git a/scripts/policy/frameworks/intel/smtp.bro b/scripts/policy/frameworks/intel/smtp.bro new file mode 100644 index 0000000000..f44f0575dc --- /dev/null +++ b/scripts/policy/frameworks/intel/smtp.bro @@ -0,0 +1,70 @@ +@load base/frameworks/intel +@load ./where-locations + +event mime_end_entity(c: connection) + { + if ( c?$smtp ) + { + if ( c$smtp?$path ) + { + local path = c$smtp$path; + for ( i in path ) + { + Intel::seen([$host=path[i], + $conn=c, + $where=SMTP::IN_RECEIVED_HEADER]); + } + } + + if ( c$smtp?$user_agent ) + Intel::seen([$str=c$smtp$user_agent, + $str_type=Intel::USER_AGENT, + $conn=c, + $where=SMTP::IN_HEADER]); + + if ( c$smtp?$x_originating_ip ) + Intel::seen([$host=c$smtp$x_originating_ip, + $conn=c, + $where=SMTP::IN_X_ORIGINATING_IP_HEADER]); + + if ( c$smtp?$mailfrom ) + Intel::seen([$str=c$smtp$mailfrom, + $str_type=Intel::EMAIL, + $conn=c, + $where=SMTP::IN_MAIL_FROM]); + + if ( c$smtp?$rcptto ) + { + for ( rcptto in c$smtp$rcptto ) + { + Intel::seen([$str=rcptto, + $str_type=Intel::EMAIL, + $conn=c, + $where=SMTP::IN_RCPT_TO]); + } + } + + if ( c$smtp?$from ) + Intel::seen([$str=c$smtp$from, + $str_type=Intel::EMAIL, + $conn=c, + $where=SMTP::IN_FROM]); + + if ( c$smtp?$to ) + { + for ( email_to in c$smtp$to ) + { + Intel::seen([$str=email_to, + $str_type=Intel::EMAIL, + $conn=c, + $where=SMTP::IN_TO]); + } + } + + if ( c$smtp?$reply_to ) + Intel::seen([$str=c$smtp$reply_to, + $str_type=Intel::EMAIL, + $conn=c, + $where=SMTP::IN_REPLY_TO]); + } + } diff --git a/scripts/policy/frameworks/intel/ssl.bro b/scripts/policy/frameworks/intel/ssl.bro new file mode 100644 index 0000000000..394df63020 --- /dev/null +++ b/scripts/policy/frameworks/intel/ssl.bro @@ -0,0 +1,33 @@ +@load base/frameworks/intel +@load ./where-locations + +event x509_certificate(c: connection, is_orig: bool, cert: X509, chain_idx: count, chain_len: count, der_cert: string) + { + if ( chain_idx == 0 ) + { + if ( /emailAddress=/ in cert$subject ) + { + local email = sub(cert$subject, /^.*emailAddress=/, ""); + email = sub(email, /,.*$/, ""); + Intel::seen([$str=email, + $str_type=Intel::EMAIL, + $conn=c, + $where=(is_orig ? SSL::IN_CLIENT_CERT : SSL::IN_SERVER_CERT)]); + } + + Intel::seen([$str=sha1_hash(der_cert), + $str_type=Intel::CERT_HASH, + $conn=c, + $where=(is_orig ? SSL::IN_CLIENT_CERT : SSL::IN_SERVER_CERT)]); + } + } + +event ssl_extension(c: connection, is_orig: bool, code: count, val: string) + { + if ( is_orig && SSL::extensions[code] == "server_name" && + c?$ssl && c$ssl?$server_name ) + Intel::seen([$str=c$ssl$server_name, + $str_type=Intel::DOMAIN, + $conn=c, + $where=SSL::IN_SERVER_NAME]); + } diff --git a/scripts/policy/frameworks/intel/where-locations.bro b/scripts/policy/frameworks/intel/where-locations.bro new file mode 100644 index 0000000000..4773de9c73 --- /dev/null +++ b/scripts/policy/frameworks/intel/where-locations.bro @@ -0,0 +1,25 @@ +@load base/frameworks/intel + +export { + redef enum Intel::Where += { + Conn::IN_ORIG, + Conn::IN_RESP, + DNS::IN_REQUEST, + DNS::IN_RESPONSE, + HTTP::IN_HOST_HEADER, + HTTP::IN_USER_AGENT_HEADER, + HTTP::IN_URL, + SMTP::IN_MAIL_FROM, + SMTP::IN_RCPT_TO, + SMTP::IN_FROM, + SMTP::IN_TO, + SMTP::IN_RECEIVED_HEADER, + SMTP::IN_REPLY_TO, + SMTP::IN_X_ORIGINATING_IP_HEADER, + SMTP::IN_MESSAGE, + SSL::IN_SERVER_CERT, + SSL::IN_CLIENT_CERT, + SSL::IN_SERVER_NAME, + SMTP::IN_HEADER, + }; +} diff --git a/scripts/policy/integration/collective-intel/README b/scripts/policy/integration/collective-intel/README new file mode 100644 index 0000000000..550eb96962 --- /dev/null +++ b/scripts/policy/integration/collective-intel/README @@ -0,0 +1,6 @@ +Collective Intelligence Framework Integration +============================================= + +The scripts in this module are for deeper integration with the Collective Intelligence +Framework (CIF) since Bro's Intel framework doesn't natively behave the same as CIF nor +does it store and maintain the same data in all cases. \ No newline at end of file diff --git a/scripts/policy/integration/collective-intel/__load__.bro b/scripts/policy/integration/collective-intel/__load__.bro new file mode 100644 index 0000000000..d551be57d3 --- /dev/null +++ b/scripts/policy/integration/collective-intel/__load__.bro @@ -0,0 +1 @@ +@load ./main \ No newline at end of file diff --git a/scripts/policy/integration/collective-intel/main.bro b/scripts/policy/integration/collective-intel/main.bro new file mode 100644 index 0000000000..f347ba8867 --- /dev/null +++ b/scripts/policy/integration/collective-intel/main.bro @@ -0,0 +1,13 @@ + +module Intel; + +## These are some fields to add extended compatibility between Bro and the Collective +## Intelligence Framework +redef record Intel::MetaData += { + ## Maps to the Impact field in the Collective Intelligence Framework. + cif_impact: string &optional; + ## Maps to the Severity field in the Collective Intelligence Framework. + cif_severity: string &optional; + ## Maps to the Confidence field in the Collective Intelligence Framework. + cif_confidence: double &optional; +}; \ No newline at end of file diff --git a/scripts/policy/protocols/http/detect-intel.bro b/scripts/policy/protocols/http/detect-intel.bro deleted file mode 100644 index 281d705c13..0000000000 --- a/scripts/policy/protocols/http/detect-intel.bro +++ /dev/null @@ -1,21 +0,0 @@ -##! Intelligence based HTTP detections. Not yet working! - -@load base/protocols/http/main -@load base/protocols/http/utils -@load base/frameworks/intel/main - -module HTTP; - -event log_http(rec: Info) - { - local url = HTTP::build_url(rec); - local query = [$str=url, $subtype="url", $or_tags=set("malicious", "malware")]; - if ( Intel::matcher(query) ) - { - local msg = fmt("%s accessed a malicious URL from the intelligence framework", rec$id$orig_h); - NOTICE([$note=Intel::Detection, - $msg=msg, - $sub=HTTP::build_url_http(rec), - $id=rec$id]); - } - } diff --git a/scripts/test-all-policy.bro b/scripts/test-all-policy.bro index 94c9c04a69..9358ffd06f 100644 --- a/scripts/test-all-policy.bro +++ b/scripts/test-all-policy.bro @@ -14,6 +14,16 @@ # @load frameworks/control/controller.bro @load frameworks/dpd/detect-protocols.bro @load frameworks/dpd/packet-segment-logging.bro +@load frameworks/intel/__load__.bro +@load frameworks/intel/conn-established.bro +@load frameworks/intel/dns.bro +@load frameworks/intel/http-host-header.bro +@load frameworks/intel/http-url.bro +@load frameworks/intel/http-user-agents.bro +@load frameworks/intel/smtp-url-extraction.bro +@load frameworks/intel/smtp.bro +@load frameworks/intel/ssl.bro +@load frameworks/intel/where-locations.bro @load frameworks/metrics/conn-example.bro @load frameworks/metrics/http-example.bro @load frameworks/metrics/ssl-example.bro @@ -22,6 +32,8 @@ @load integration/barnyard2/__load__.bro @load integration/barnyard2/main.bro @load integration/barnyard2/types.bro +@load integration/collective-intel/__load__.bro +@load integration/collective-intel/main.bro @load misc/analysis-groups.bro @load misc/capture-loss.bro @load misc/loaded-scripts.bro @@ -35,7 +47,6 @@ @load protocols/dns/detect-external-names.bro @load protocols/ftp/detect.bro @load protocols/ftp/software.bro -@load protocols/http/detect-intel.bro @load protocols/http/detect-MHR.bro @load protocols/http/detect-sqli.bro @load protocols/http/detect-webapps.bro diff --git a/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log b/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log index 939d85fdbe..7fd3a1bdc8 100644 --- a/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log +++ b/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log @@ -40,6 +40,7 @@ scripts/base/init-default.bro scripts/base/utils/paths.bro scripts/base/utils/strings.bro scripts/base/utils/thresholds.bro + scripts/base/utils/urls.bro scripts/base/frameworks/notice/__load__.bro scripts/base/frameworks/notice/./main.bro scripts/base/frameworks/notice/./weird.bro @@ -69,6 +70,7 @@ scripts/base/init-default.bro scripts/base/frameworks/metrics/./non-cluster.bro scripts/base/frameworks/intel/__load__.bro scripts/base/frameworks/intel/./main.bro + scripts/base/frameworks/intel/./input.bro scripts/base/frameworks/reporter/__load__.bro scripts/base/frameworks/reporter/./main.bro scripts/base/frameworks/tunnels/__load__.bro diff --git a/testing/btest/Baseline/coverage.init-default/missing_loads b/testing/btest/Baseline/coverage.init-default/missing_loads index 4497bbd185..34ba654dec 100644 --- a/testing/btest/Baseline/coverage.init-default/missing_loads +++ b/testing/btest/Baseline/coverage.init-default/missing_loads @@ -2,5 +2,6 @@ -./frameworks/cluster/nodes/proxy.bro -./frameworks/cluster/nodes/worker.bro -./frameworks/cluster/setup-connections.bro +-./frameworks/intel/cluster.bro -./frameworks/metrics/cluster.bro -./frameworks/notice/cluster.bro diff --git a/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/manager-1..stdout b/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/manager-1..stdout new file mode 100644 index 0000000000..c57cda176e --- /dev/null +++ b/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/manager-1..stdout @@ -0,0 +1,2 @@ +cluster_new_item: 123.123.123.123 inserted by worker-1 (from peer: worker-1) +cluster_new_item: 4.3.2.1 inserted by worker-2 (from peer: worker-2) diff --git a/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/manager-1.intel.log b/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/manager-1.intel.log new file mode 100644 index 0000000000..26efc039c4 --- /dev/null +++ b/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/manager-1.intel.log @@ -0,0 +1,10 @@ +#separator \x09 +#set_separator , +#empty_field (empty) +#unset_field - +#path intel +#open 2012-10-03-20-20-39 +#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p seen.host seen.str seen.str_type seen.where sources +#types time string addr port addr port addr string enum enum table[string] +1349295639.424940 - - - - - 123.123.123.123 - - Intel::IN_ANYWHERE worker-1 +#close 2012-10-03-20-20-49 diff --git a/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/worker-1..stdout b/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/worker-1..stdout new file mode 100644 index 0000000000..3be0ae6f70 --- /dev/null +++ b/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/worker-1..stdout @@ -0,0 +1,3 @@ +cluster_new_item: 1.2.3.4 inserted by manager (from peer: manager-1) +cluster_new_item: 123.123.123.123 inserted by worker-1 (from peer: manager-1) +cluster_new_item: 4.3.2.1 inserted by worker-2 (from peer: manager-1) diff --git a/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/worker-2..stdout b/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/worker-2..stdout new file mode 100644 index 0000000000..df950e68c4 --- /dev/null +++ b/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/worker-2..stdout @@ -0,0 +1,4 @@ +cluster_new_item: 1.2.3.4 inserted by manager (from peer: manager-1) +cluster_new_item: 123.123.123.123 inserted by worker-1 (from peer: manager-1) +cluster_new_item: 4.3.2.1 inserted by worker-2 (from peer: manager-1) +Doing a lookup diff --git a/testing/btest/Baseline/scripts.base.frameworks.intel.input-and-match/broproc.intel.log b/testing/btest/Baseline/scripts.base.frameworks.intel.input-and-match/broproc.intel.log new file mode 100644 index 0000000000..d72e9efed3 --- /dev/null +++ b/testing/btest/Baseline/scripts.base.frameworks.intel.input-and-match/broproc.intel.log @@ -0,0 +1,11 @@ +#separator \x09 +#set_separator , +#empty_field (empty) +#unset_field - +#path intel +#open 2012-10-03-20-18-05 +#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p seen.host seen.str seen.str_type seen.where sources +#types time string addr port addr port addr string enum enum table[string] +1349295485.114156 - - - - - - e@mail.com Intel::EMAIL SOMEWHERE source1 +1349295485.114156 - - - - - 1.2.3.4 - - SOMEWHERE source1 +#close 2012-10-03-20-18-05 diff --git a/testing/btest/Baseline/scripts.base.frameworks.intel.insert-and-matcher/out b/testing/btest/Baseline/scripts.base.frameworks.intel.insert-and-matcher/out deleted file mode 100644 index 71fec4e23c..0000000000 --- a/testing/btest/Baseline/scripts.base.frameworks.intel.insert-and-matcher/out +++ /dev/null @@ -1,3 +0,0 @@ -VALID -VALID -VALID diff --git a/testing/btest/Baseline/scripts.base.frameworks.intel.read-file-dist-cluster/manager-1..stdout b/testing/btest/Baseline/scripts.base.frameworks.intel.read-file-dist-cluster/manager-1..stdout new file mode 100644 index 0000000000..e69de29bb2 diff --git a/testing/btest/Baseline/scripts.base.frameworks.intel.read-file-dist-cluster/manager-1.intel.log b/testing/btest/Baseline/scripts.base.frameworks.intel.read-file-dist-cluster/manager-1.intel.log new file mode 100644 index 0000000000..8069bad528 --- /dev/null +++ b/testing/btest/Baseline/scripts.base.frameworks.intel.read-file-dist-cluster/manager-1.intel.log @@ -0,0 +1,13 @@ +#separator \x09 +#set_separator , +#empty_field (empty) +#unset_field - +#path intel +#open 2012-10-10-15-05-23 +#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p seen.host seen.str seen.str_type seen.where sources +#types time string addr port addr port addr string enum enum table[string] +1349881523.548946 - - - - - 1.2.3.4 - - Intel::IN_A_TEST source1 +1349881523.548946 - - - - - - e@mail.com Intel::EMAIL Intel::IN_A_TEST source1 +1349881524.567896 - - - - - 1.2.3.4 - - Intel::IN_A_TEST source1 +1349881524.567896 - - - - - - e@mail.com Intel::EMAIL Intel::IN_A_TEST source1 +#close 2012-10-10-15-05-24 diff --git a/testing/btest/Baseline/scripts.base.frameworks.intel.read-file-dist-cluster/worker-1..stdout b/testing/btest/Baseline/scripts.base.frameworks.intel.read-file-dist-cluster/worker-1..stdout new file mode 100644 index 0000000000..e69de29bb2 diff --git a/testing/btest/Baseline/scripts.base.frameworks.intel.read-file-dist-cluster/worker-2..stdout b/testing/btest/Baseline/scripts.base.frameworks.intel.read-file-dist-cluster/worker-2..stdout new file mode 100644 index 0000000000..e69de29bb2 diff --git a/testing/btest/scripts/base/frameworks/intel/cluster-transparency.bro b/testing/btest/scripts/base/frameworks/intel/cluster-transparency.bro new file mode 100644 index 0000000000..3810de5d4b --- /dev/null +++ b/testing/btest/scripts/base/frameworks/intel/cluster-transparency.bro @@ -0,0 +1,80 @@ +# @TEST-SERIALIZE: comm +# +# @TEST-EXEC: btest-bg-run manager-1 BROPATH=$BROPATH:.. CLUSTER_NODE=manager-1 bro %INPUT +# @TEST-EXEC: btest-bg-run worker-1 BROPATH=$BROPATH:.. CLUSTER_NODE=worker-1 bro %INPUT +# @TEST-EXEC: btest-bg-run worker-2 BROPATH=$BROPATH:.. CLUSTER_NODE=worker-2 bro %INPUT +# @TEST-EXEC: btest-bg-wait -k 10 +# @TEST-EXEC: btest-diff manager-1/.stdout +# @TEST-EXEC: btest-diff manager-1/intel.log +# @TEST-EXEC: btest-diff worker-1/.stdout +# @TEST-EXEC: btest-diff worker-2/.stdout + +@TEST-START-FILE cluster-layout.bro +redef Cluster::nodes = { + ["manager-1"] = [$node_type=Cluster::MANAGER, $ip=127.0.0.1, $p=37757/tcp, $workers=set("worker-1", "worker-2")], + ["worker-1"] = [$node_type=Cluster::WORKER, $ip=127.0.0.1, $p=37760/tcp, $manager="manager-1"], + ["worker-2"] = [$node_type=Cluster::WORKER, $ip=127.0.0.1, $p=37761/tcp, $manager="manager-1"], +}; +@TEST-END-FILE + +@load base/frameworks/control + +module Intel; + +redef Log::default_rotation_interval=0sec; + +event remote_connection_handshake_done(p: event_peer) + { + # Insert the data once both workers are connected. + if ( Cluster::local_node_type() == Cluster::MANAGER && Cluster::worker_count == 2 ) + { + Intel::insert([$host=1.2.3.4,$meta=[$source="manager"]]); + } + } + +global worker2_data = 0; +global sent_data = F; +event Intel::cluster_new_item(item: Intel::Item) + { + if ( ! is_remote_event() ) + return; + + print fmt("cluster_new_item: %s inserted by %s (from peer: %s)", item$host, item$meta$source, get_event_peer()$descr); + + if ( ! sent_data ) + { + # We wait to insert data here because we can now be sure the + # full cluster is constructed. + sent_data = T; + if ( Cluster::node == "worker-1" ) + Intel::insert([$host=123.123.123.123,$meta=[$source="worker-1"]]); + if ( Cluster::node == "worker-2" ) + Intel::insert([$host=4.3.2.1,$meta=[$source="worker-2"]]); + } + + # We're forcing worker-2 to do a lookup when it has three intelligence items + # which were distributed over the cluster (data inserted locally is resent). + if ( Cluster::node == "worker-2" ) + { + ++worker2_data; + if ( worker2_data == 3 ) + { + # Now that everything is inserted, see if we can match on the data inserted + # by worker-1. + print "Doing a lookup"; + Intel::seen([$host=123.123.123.123, $where=Intel::IN_ANYWHERE]); + } + } + } + +event Intel::log_intel(rec: Intel::Info) + { + event Control::shutdown_request(); + } + +event remote_connection_closed(p: event_peer) + { + # Cascading termination + #print fmt("disconnected from: %s", p); + terminate_communication(); + } \ No newline at end of file diff --git a/testing/btest/scripts/base/frameworks/intel/input-and-match.bro b/testing/btest/scripts/base/frameworks/intel/input-and-match.bro new file mode 100644 index 0000000000..f77f5c0f1d --- /dev/null +++ b/testing/btest/scripts/base/frameworks/intel/input-and-match.bro @@ -0,0 +1,40 @@ +# @TEST-SERIALIZE: comm + +# @TEST-EXEC: btest-bg-run broproc bro %INPUT +# @TEST-EXEC: btest-bg-wait -k 5 +# @TEST-EXEC: btest-diff broproc/intel.log + +@TEST-START-FILE intel.dat +#fields host net str str_type meta.source meta.desc meta.url +1.2.3.4 - - - source1 this host is just plain baaad http://some-data-distributor.com/1234 +1.2.3.4 - - - source1 this host is just plain baaad http://some-data-distributor.com/1234 +- - e@mail.com Intel::EMAIL source1 Phishing email source http://some-data-distributor.com/100000 +@TEST-END-FILE + +@load frameworks/communication/listen + +redef Intel::read_files += { "../intel.dat" }; +redef enum Intel::Where += { SOMEWHERE }; + +event do_it() + { + Intel::seen([$str="e@mail.com", + $str_type=Intel::EMAIL, + $where=SOMEWHERE]); + + Intel::seen([$host=1.2.3.4, + $where=SOMEWHERE]); + } + +global log_lines = 0; +event Intel::log_intel(rec: Intel::Info) + { + ++log_lines; + if ( log_lines == 2 ) + terminate(); + } + +event bro_init() &priority=-10 + { + schedule 1sec { do_it() }; + } diff --git a/testing/btest/scripts/base/frameworks/intel/insert-and-matcher.bro b/testing/btest/scripts/base/frameworks/intel/insert-and-matcher.bro deleted file mode 100644 index 67e539c176..0000000000 --- a/testing/btest/scripts/base/frameworks/intel/insert-and-matcher.bro +++ /dev/null @@ -1,34 +0,0 @@ -# -# @TEST-EXEC: bro %INPUT >out -# @TEST-EXEC: btest-diff out - -event bro_init() - { - Intel::insert([$ip=1.2.3.4, $tags=set("zeustracker.abuse.ch", "malicious")]); - Intel::insert([$str="http://www.google.com/", $subtype="url", $tags=set("infrastructure", "google")]); - Intel::insert([$str="Ab439G32F...", $subtype="x509_cert", $tags=set("bad")]); - Intel::insert([$str="Ab439G32F...", $tags=set("bad")]); - } - -event bro_done() - { - local orig_h = 1.2.3.4; - - if ( Intel::matcher([$ip=orig_h, $and_tags=set("malicious")]) ) - print "VALID"; - - if ( Intel::matcher([$ip=orig_h, $and_tags=set("don't match")]) ) - print "INVALID"; - - if ( Intel::matcher([$ip=orig_h, $pred=function(meta: Intel::MetaData): bool { return T; } ]) ) - print "VALID"; - - if ( Intel::matcher([$ip=orig_h, $pred=function(meta: Intel::MetaData): bool { return F; } ]) ) - print "INVALID"; - - if ( Intel::matcher([$str="http://www.google.com/", $subtype="url", $tags=set("google")]) ) - print "VALID"; - - if ( Intel::matcher([$str="http://www.example.com", $subtype="url"]) ) - print "INVALID"; - } diff --git a/testing/btest/scripts/base/frameworks/intel/read-file-dist-cluster.bro b/testing/btest/scripts/base/frameworks/intel/read-file-dist-cluster.bro new file mode 100644 index 0000000000..6838736249 --- /dev/null +++ b/testing/btest/scripts/base/frameworks/intel/read-file-dist-cluster.bro @@ -0,0 +1,66 @@ +# @TEST-SERIALIZE: comm +# +# @TEST-EXEC: btest-bg-run manager-1 BROPATH=$BROPATH:.. CLUSTER_NODE=manager-1 bro %INPUT +# @TEST-EXEC: sleep 2 +# @TEST-EXEC: btest-bg-run worker-1 BROPATH=$BROPATH:.. CLUSTER_NODE=worker-1 bro %INPUT +# @TEST-EXEC: btest-bg-run worker-2 BROPATH=$BROPATH:.. CLUSTER_NODE=worker-2 bro %INPUT +# @TEST-EXEC: btest-bg-wait -k 10 +# @TEST-EXEC: btest-diff manager-1/.stdout +# @TEST-EXEC: btest-diff manager-1/intel.log +# @TEST-EXEC: btest-diff worker-1/.stdout +# @TEST-EXEC: btest-diff worker-2/.stdout + +@TEST-START-FILE cluster-layout.bro +redef Cluster::nodes = { + ["manager-1"] = [$node_type=Cluster::MANAGER, $ip=127.0.0.1, $p=37757/tcp, $workers=set("worker-1", "worker-2")], + ["worker-1"] = [$node_type=Cluster::WORKER, $ip=127.0.0.1, $p=37760/tcp, $manager="manager-1"], + ["worker-2"] = [$node_type=Cluster::WORKER, $ip=127.0.0.1, $p=37761/tcp, $manager="manager-1"], +}; +@TEST-END-FILE + +@TEST-START-FILE intel.dat +#fields host net str str_type meta.source meta.desc meta.url +1.2.3.4 - - - source1 this host is just plain baaad http://some-data-distributor.com/1234 +1.2.3.4 - - - source1 this host is just plain baaad http://some-data-distributor.com/1234 +- - e@mail.com Intel::EMAIL source1 Phishing email source http://some-data-distributor.com/100000 +@TEST-END-FILE + +@load base/frameworks/control +redef Log::default_rotation_interval=0sec; + +module Intel; + +@if ( Cluster::local_node_type() == Cluster::MANAGER ) +redef Intel::read_files += { "../intel.dat" }; +@endif + +redef enum Intel::Where += { + Intel::IN_A_TEST, +}; + +event do_it() + { + Intel::seen([$host=1.2.3.4, $where=Intel::IN_A_TEST]); + Intel::seen([$str="e@mail.com", $str_type=Intel::EMAIL, $where=Intel::IN_A_TEST]); + } + +event bro_init() + { + # Delay the workers searching for hits briefly to allow for the data distribution + # mechanism to distribute the data to the workers. + if ( Cluster::local_node_type() == Cluster::WORKER ) + schedule 2sec { do_it() }; + } + +global intel_hits=0; +event Intel::log_intel(rec: Intel::Info) + { + ++intel_hits; + # There should be 4 hits since each worker is "seeing" 2 things. + if ( intel_hits == 4 ) + { + # We're delaying shutdown for a second here to make sure that no other + # matches happen (which would be wrong!). + schedule 1sec { Control::shutdown_request() }; + } + }