Merge remote-tracking branch 'origin/master' into topic/bernhard/hyperloglog

This commit is contained in:
Bernhard Amann 2013-07-30 14:31:09 -07:00
commit 5b9d80e50d
247 changed files with 2729 additions and 5372 deletions

View file

@ -0,0 +1 @@
@load ./main

View file

@ -0,0 +1,38 @@
@load base/frameworks/files
@load base/utils/paths
module FileExtract;
export {
## The prefix where files are extracted to.
const prefix = "./extract_files/" &redef;
redef record Files::Info += {
## Local filenames of extracted file.
extracted: string &optional &log;
};
redef record Files::AnalyzerArgs += {
## The local filename to which to write an extracted file.
## This field is used in the core by the extraction plugin
## to know where to write the file to. It's also optional
extract_filename: string &optional;
};
}
function on_add(f: fa_file, args: Files::AnalyzerArgs)
{
if ( ! args?$extract_filename )
args$extract_filename = cat("extract-", f$source, "-", f$id);
f$info$extracted = args$extract_filename;
args$extract_filename = build_path_compressed(prefix, args$extract_filename);
}
event bro_init() &priority=10
{
Files::register_analyzer_add_callback(Files::ANALYZER_EXTRACT, on_add);
# Create the extraction directory.
mkdir(prefix);
}

View file

@ -0,0 +1 @@
@load ./main

View file

@ -0,0 +1,32 @@
@load base/frameworks/files
module FileHash;
export {
redef record Files::Info += {
## An MD5 digest of the file contents.
md5: string &log &optional;
## A SHA1 digest of the file contents.
sha1: string &log &optional;
## A SHA256 digest of the file contents.
sha256: string &log &optional;
};
}
event file_hash(f: fa_file, kind: string, hash: string) &priority=5
{
switch ( kind ) {
case "md5":
f$info$md5 = hash;
break;
case "sha1":
f$info$sha1 = hash;
break;
case "sha256":
f$info$sha256 = hash;
break;
}
}

View file

@ -81,6 +81,13 @@ export {
## Returns: The analyzer name corresponding to the tag.
global name: function(tag: Analyzer::Tag) : string;
## Translates an analyzer's name to a tag enum value.
##
## name: The analyzer name.
##
## Returns: The analyzer tag corresponding to the name.
global get_tag: function(name: string): Analyzer::Tag;
## Schedules an analyzer for a future connection originating from a given IP
## address and port.
##
@ -187,6 +194,11 @@ function name(atype: Analyzer::Tag) : string
return __name(atype);
}
function get_tag(name: string): Analyzer::Tag
{
return __tag(name);
}
function schedule_analyzer(orig: addr, resp: addr, resp_p: port,
analyzer: Analyzer::Tag, tout: interval) : bool
{

View file

@ -1,261 +0,0 @@
##! An interface for driving the analysis of files, possibly independent of
##! any network protocol over which they're transported.
@load base/bif/file_analysis.bif
@load base/frameworks/logging
module FileAnalysis;
export {
redef enum Log::ID += {
## Logging stream for file analysis.
LOG
};
## A structure which represents a desired type of file analysis.
type AnalyzerArgs: record {
## The type of analysis.
tag: FileAnalysis::Tag;
## The local filename to which to write an extracted file. Must be
## set when *tag* is :bro:see:`FileAnalysis::ANALYZER_EXTRACT`.
extract_filename: string &optional;
## An event which will be generated for all new file contents,
## chunk-wise. Used when *tag* is
## :bro:see:`FileAnalysis::ANALYZER_DATA_EVENT`.
chunk_event: event(f: fa_file, data: string, off: count) &optional;
## An event which will be generated for all new file contents,
## stream-wise. Used when *tag* is
## :bro:see:`FileAnalysis::ANALYZER_DATA_EVENT`.
stream_event: event(f: fa_file, data: string) &optional;
} &redef;
## Contains all metadata related to the analysis of a given file.
## For the most part, fields here are derived from ones of the same name
## in :bro:see:`fa_file`.
type Info: record {
## An identifier associated with a single file.
id: string &log;
## Identifier associated with a container file from which this one was
## extracted as part of the file analysis.
parent_id: string &log &optional;
## An identification of the source of the file data. E.g. it may be
## a network protocol over which it was transferred, or a local file
## path which was read, or some other input source.
source: string &log &optional;
## If the source of this file is is a network connection, this field
## may be set to indicate the directionality.
is_orig: bool &log &optional;
## The time at which the last activity for the file was seen.
last_active: time &log;
## Number of bytes provided to the file analysis engine for the file.
seen_bytes: count &log &default=0;
## Total number of bytes that are supposed to comprise the full file.
total_bytes: count &log &optional;
## The number of bytes in the file stream that were completely missed
## during the process of analysis e.g. due to dropped packets.
missing_bytes: count &log &default=0;
## The number of not all-in-sequence bytes in the file stream that
## were delivered to file analyzers due to reassembly buffer overflow.
overflow_bytes: count &log &default=0;
## The amount of time between receiving new data for this file that
## the analysis engine will wait before giving up on it.
timeout_interval: interval &log &optional;
## The number of bytes at the beginning of a file to save for later
## inspection in *bof_buffer* field.
bof_buffer_size: count &log &optional;
## A mime type provided by libmagic against the *bof_buffer*, or
## in the cases where no buffering of the beginning of file occurs,
## an initial guess of the mime type based on the first data seen.
mime_type: string &log &optional;
## Whether the file analysis timed out at least once for the file.
timedout: bool &log &default=F;
## Connection UIDS over which the file was transferred.
conn_uids: set[string] &log;
## A set of analysis types done during the file analysis.
analyzers: set[FileAnalysis::Tag];
## Local filenames of extracted files.
extracted_files: set[string] &log;
## An MD5 digest of the file contents.
md5: string &log &optional;
## A SHA1 digest of the file contents.
sha1: string &log &optional;
## A SHA256 digest of the file contents.
sha256: string &log &optional;
} &redef;
## A table that can be used to disable file analysis completely for
## any files transferred over given network protocol analyzers.
const disable: table[Analyzer::Tag] of bool = table() &redef;
## Event that can be handled to access the Info record as it is sent on
## to the logging framework.
global log_file_analysis: event(rec: Info);
## The salt concatenated to unique file handle strings generated by
## :bro:see:`get_file_handle` before hashing them in to a file id
## (the *id* field of :bro:see:`fa_file`).
## Provided to help mitigate the possiblility of manipulating parts of
## network connections that factor in to the file handle in order to
## generate two handles that would hash to the same file id.
const salt = "I recommend changing this." &redef;
## Sets the *timeout_interval* field of :bro:see:`fa_file`, which is
## used to determine the length of inactivity that is allowed for a file
## before internal state related to it is cleaned up. When used within a
## :bro:see:`file_timeout` handler, the analysis will delay timing out
## again for the period specified by *t*.
##
## f: the file.
##
## t: the amount of time the file can remain inactive before discarding.
##
## Returns: true if the timeout interval was set, or false if analysis
## for the *id* isn't currently active.
global set_timeout_interval: function(f: fa_file, t: interval): bool;
## Adds an analyzer to the analysis of a given file.
##
## f: the file.
##
## args: the analyzer type to add along with any arguments it takes.
##
## Returns: true if the analyzer will be added, or false if analysis
## for the *id* isn't currently active or the *args*
## were invalid for the analyzer type.
global add_analyzer: function(f: fa_file, args: AnalyzerArgs): bool;
## Removes an analyzer from the analysis of a given file.
##
## f: the file.
##
## args: the analyzer (type and args) to remove.
##
## Returns: true if the analyzer will be removed, or false if analysis
## for the *id* isn't currently active.
global remove_analyzer: function(f: fa_file, args: AnalyzerArgs): bool;
## Stops/ignores any further analysis of a given file.
##
## f: the file.
##
## Returns: true if analysis for the given file will be ignored for the
## rest of it's contents, or false if analysis for the *id*
## isn't currently active.
global stop: function(f: fa_file): bool;
}
redef record fa_file += {
info: Info &optional;
};
function set_info(f: fa_file)
{
if ( ! f?$info )
{
local tmp: Info;
f$info = tmp;
}
f$info$id = f$id;
if ( f?$parent_id ) f$info$parent_id = f$parent_id;
if ( f?$source ) f$info$source = f$source;
if ( f?$is_orig ) f$info$is_orig = f$is_orig;
f$info$last_active = f$last_active;
f$info$seen_bytes = f$seen_bytes;
if ( f?$total_bytes ) f$info$total_bytes = f$total_bytes;
f$info$missing_bytes = f$missing_bytes;
f$info$overflow_bytes = f$overflow_bytes;
f$info$timeout_interval = f$timeout_interval;
f$info$bof_buffer_size = f$bof_buffer_size;
if ( f?$mime_type ) f$info$mime_type = f$mime_type;
if ( f?$conns )
for ( cid in f$conns )
add f$info$conn_uids[f$conns[cid]$uid];
}
function set_timeout_interval(f: fa_file, t: interval): bool
{
return __set_timeout_interval(f$id, t);
}
function add_analyzer(f: fa_file, args: AnalyzerArgs): bool
{
if ( ! __add_analyzer(f$id, args) ) return F;
set_info(f);
add f$info$analyzers[args$tag];
if ( args$tag == FileAnalysis::ANALYZER_EXTRACT )
add f$info$extracted_files[args$extract_filename];
return T;
}
function remove_analyzer(f: fa_file, args: AnalyzerArgs): bool
{
return __remove_analyzer(f$id, args);
}
function stop(f: fa_file): bool
{
return __stop(f$id);
}
event bro_init() &priority=5
{
Log::create_stream(FileAnalysis::LOG,
[$columns=Info, $ev=log_file_analysis]);
}
event file_timeout(f: fa_file) &priority=5
{
set_info(f);
f$info$timedout = T;
}
event file_hash(f: fa_file, kind: string, hash: string) &priority=5
{
set_info(f);
switch ( kind ) {
case "md5":
f$info$md5 = hash;
break;
case "sha1":
f$info$sha1 = hash;
break;
case "sha256":
f$info$sha256 = hash;
break;
}
}
event file_state_remove(f: fa_file) &priority=5
{
set_info(f);
}
event file_state_remove(f: fa_file) &priority=-5
{
Log::write(FileAnalysis::LOG, f$info);
}

View file

@ -0,0 +1,371 @@
##! An interface for driving the analysis of files, possibly independent of
##! any network protocol over which they're transported.
@load base/bif/file_analysis.bif
@load base/frameworks/analyzer
@load base/frameworks/logging
@load base/utils/site
module Files;
export {
redef enum Log::ID += {
## Logging stream for file analysis.
LOG
};
## A structure which represents a desired type of file analysis.
type AnalyzerArgs: record {
## An event which will be generated for all new file contents,
## chunk-wise. Used when *tag* is
## :bro:see:`Files::ANALYZER_DATA_EVENT`.
chunk_event: event(f: fa_file, data: string, off: count) &optional;
## An event which will be generated for all new file contents,
## stream-wise. Used when *tag* is
## :bro:see:`Files::ANALYZER_DATA_EVENT`.
stream_event: event(f: fa_file, data: string) &optional;
} &redef;
## Contains all metadata related to the analysis of a given file.
## For the most part, fields here are derived from ones of the same name
## in :bro:see:`fa_file`.
type Info: record {
## The time when the file was first seen.
ts: time &log;
## An identifier associated with a single file.
fuid: string &log;
## If this file was transferred over a network
## connection this should show the host or hosts that
## the data sourced from.
tx_hosts: set[addr] &log;
## If this file was transferred over a network
## connection this should show the host or hosts that
## the data traveled to.
rx_hosts: set[addr] &log;
## Connection UIDS over which the file was transferred.
conn_uids: set[string] &log;
## An identification of the source of the file data. E.g. it may be
## a network protocol over which it was transferred, or a local file
## path which was read, or some other input source.
source: string &log &optional;
## A value to represent the depth of this file in relation
## to its source. In SMTP, it is the depth of the MIME
## attachment on the message. In HTTP, it is the depth of the
## request within the TCP connection.
depth: count &default=0 &log;
## A set of analysis types done during the file analysis.
analyzers: set[string] &log;
## A mime type provided by libmagic against the *bof_buffer*, or
## in the cases where no buffering of the beginning of file occurs,
## an initial guess of the mime type based on the first data seen.
mime_type: string &log &optional;
## A filename for the file if one is available from the source
## for the file. These will frequently come from
## "Content-Disposition" headers in network protocols.
filename: string &log &optional;
## The duration the file was analyzed for.
duration: interval &log &default=0secs;
## If the source of this file is a network connection, this field
## indicates if the data originated from the local network or not as
## determined by the configured bro:see:`Site::local_nets`.
local_orig: bool &log &optional;
## If the source of this file is a network connection, this field
## indicates if the file is being sent by the originator of the connection
## or the responder.
is_orig: bool &log &optional;
## Number of bytes provided to the file analysis engine for the file.
seen_bytes: count &log &default=0;
## Total number of bytes that are supposed to comprise the full file.
total_bytes: count &log &optional;
## The number of bytes in the file stream that were completely missed
## during the process of analysis e.g. due to dropped packets.
missing_bytes: count &log &default=0;
## The number of not all-in-sequence bytes in the file stream that
## were delivered to file analyzers due to reassembly buffer overflow.
overflow_bytes: count &log &default=0;
## Whether the file analysis timed out at least once for the file.
timedout: bool &log &default=F;
## Identifier associated with a container file from which this one was
## extracted as part of the file analysis.
parent_fuid: string &log &optional;
} &redef;
## A table that can be used to disable file analysis completely for
## any files transferred over given network protocol analyzers.
const disable: table[Files::Tag] of bool = table() &redef;
## The salt concatenated to unique file handle strings generated by
## :bro:see:`get_file_handle` before hashing them in to a file id
## (the *id* field of :bro:see:`fa_file`).
## Provided to help mitigate the possiblility of manipulating parts of
## network connections that factor in to the file handle in order to
## generate two handles that would hash to the same file id.
const salt = "I recommend changing this." &redef;
## Sets the *timeout_interval* field of :bro:see:`fa_file`, which is
## used to determine the length of inactivity that is allowed for a file
## before internal state related to it is cleaned up. When used within a
## :bro:see:`file_timeout` handler, the analysis will delay timing out
## again for the period specified by *t*.
##
## f: the file.
##
## t: the amount of time the file can remain inactive before discarding.
##
## Returns: true if the timeout interval was set, or false if analysis
## for the *id* isn't currently active.
global set_timeout_interval: function(f: fa_file, t: interval): bool;
## Adds an analyzer to the analysis of a given file.
##
## f: the file.
##
## tag: the analyzer type.
##
## args: any parameters the analyzer takes.
##
## Returns: true if the analyzer will be added, or false if analysis
## for the *id* isn't currently active or the *args*
## were invalid for the analyzer type.
global add_analyzer: function(f: fa_file,
tag: Files::Tag,
args: AnalyzerArgs &default=AnalyzerArgs()): bool;
## Removes an analyzer from the analysis of a given file.
##
## f: the file.
##
## args: the analyzer (type and args) to remove.
##
## Returns: true if the analyzer will be removed, or false if analysis
## for the *id* isn't currently active.
global remove_analyzer: function(f: fa_file,
tag: Files::Tag,
args: AnalyzerArgs &default=AnalyzerArgs()): bool;
## Stops/ignores any further analysis of a given file.
##
## f: the file.
##
## Returns: true if analysis for the given file will be ignored for the
## rest of it's contents, or false if analysis for the *id*
## isn't currently active.
global stop: function(f: fa_file): bool;
## Translates an file analyzer enum value to a string with the analyzer's name.
##
## tag: The analyzer tag.
##
## Returns: The analyzer name corresponding to the tag.
global analyzer_name: function(tag: Files::Tag): string;
## Provides a text description regarding metadata of the file.
## For example, with HTTP it would return a URL.
##
## f: The file to be described.
##
## Returns a text description regarding metadata of the file.
global describe: function(f: fa_file): string;
type ProtoRegistration: record {
## A callback to generate a file handle on demand when
## one is needed by the core.
get_file_handle: function(c: connection, is_orig: bool): string;
## A callback to "describe" a file. In the case of an HTTP
## transfer the most obvious description would be the URL.
## It's like an extremely compressed version of the normal log.
describe: function(f: fa_file): string
&default=function(f: fa_file): string { return ""; };
};
## Register callbacks for protocols that work with the Files framework.
## The callbacks must uniquely identify a file and each protocol can
## only have a single callback registered for it.
##
## tag: Tag for the protocol analyzer having a callback being registered.
##
## reg: A :bro:see:`ProtoRegistration` record.
##
## Returns: true if the protocol being registered was not previously registered.
global register_protocol: function(tag: Analyzer::Tag, reg: ProtoRegistration): bool;
## Register a callback for file analyzers to use if they need to do some manipulation
## when they are being added to a file before the core code takes over. This is
## unlikely to be interesting for users and should only be called by file analyzer
## authors but it *not required*.
##
## tag: Tag for the file analyzer.
##
## callback: Function to execute when the given file analyzer is being added.
global register_analyzer_add_callback: function(tag: Files::Tag, callback: function(f: fa_file, args: AnalyzerArgs));
## Event that can be handled to access the Info record as it is sent on
## to the logging framework.
global log_files: event(rec: Info);
}
redef record fa_file += {
info: Info &optional;
};
redef record AnalyzerArgs += {
# This is used interally for the core file analyzer api.
tag: Files::Tag &optional;
};
# Store the callbacks for protocol analyzers that have files.
global registered_protocols: table[Analyzer::Tag] of ProtoRegistration = table();
global analyzer_add_callbacks: table[Files::Tag] of function(f: fa_file, args: AnalyzerArgs) = table();
event bro_init() &priority=5
{
Log::create_stream(Files::LOG, [$columns=Info, $ev=log_files]);
}
function set_info(f: fa_file)
{
if ( ! f?$info )
{
local tmp: Info = Info($ts=f$last_active,
$fuid=f$id);
f$info = tmp;
}
if ( f?$parent_id )
f$info$parent_fuid = f$parent_id;
if ( f?$source )
f$info$source = f$source;
f$info$duration = f$last_active - f$info$ts;
f$info$seen_bytes = f$seen_bytes;
if ( f?$total_bytes )
f$info$total_bytes = f$total_bytes;
f$info$missing_bytes = f$missing_bytes;
f$info$overflow_bytes = f$overflow_bytes;
if ( f?$is_orig )
f$info$is_orig = f$is_orig;
if ( f?$mime_type )
f$info$mime_type = f$mime_type;
}
function set_timeout_interval(f: fa_file, t: interval): bool
{
return __set_timeout_interval(f$id, t);
}
function add_analyzer(f: fa_file, tag: Files::Tag, args: AnalyzerArgs): bool
{
# This is to construct the correct args for the core API.
args$tag = tag;
add f$info$analyzers[Files::analyzer_name(tag)];
if ( tag in analyzer_add_callbacks )
analyzer_add_callbacks[tag](f, args);
if ( ! __add_analyzer(f$id, args) )
{
Reporter::warning(fmt("Analyzer %s not added successfully to file %s.", tag, f$id));
return F;
}
return T;
}
function register_analyzer_add_callback(tag: Files::Tag, callback: function(f: fa_file, args: AnalyzerArgs))
{
analyzer_add_callbacks[tag] = callback;
}
function remove_analyzer(f: fa_file, tag: Files::Tag, args: AnalyzerArgs): bool
{
args$tag = tag;
return __remove_analyzer(f$id, args);
}
function stop(f: fa_file): bool
{
return __stop(f$id);
}
function analyzer_name(tag: Files::Tag): string
{
return __analyzer_name(tag);
}
event file_new(f: fa_file) &priority=10
{
set_info(f);
}
event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priority=10
{
set_info(f);
add f$info$conn_uids[c$uid];
local cid = c$id;
add f$info$tx_hosts[f$is_orig ? cid$orig_h : cid$resp_h];
if( |Site::local_nets| > 0 )
f$info$local_orig=Site::is_local_addr(f$is_orig ? cid$orig_h : cid$resp_h);
add f$info$rx_hosts[f$is_orig ? cid$resp_h : cid$orig_h];
}
event file_timeout(f: fa_file) &priority=10
{
set_info(f);
f$info$timedout = T;
}
event file_state_remove(f: fa_file) &priority=10
{
set_info(f);
}
event file_state_remove(f: fa_file) &priority=-10
{
Log::write(Files::LOG, f$info);
}
function register_protocol(tag: Analyzer::Tag, reg: ProtoRegistration): bool
{
local result = (tag !in registered_protocols);
registered_protocols[tag] = reg;
return result;
}
function describe(f: fa_file): string
{
local tag = Analyzer::get_tag(f$source);
if ( tag !in registered_protocols )
return "";
local handler = registered_protocols[tag];
return handler$describe(f);
}
event get_file_handle(tag: Analyzer::Tag, c: connection, is_orig: bool) &priority=5
{
if ( tag !in registered_protocols )
return;
local handler = registered_protocols[tag];
set_file_handle(handler$get_file_handle(c, is_orig));
}

View file

@ -10,13 +10,14 @@ module Intel;
export {
redef enum Log::ID += { LOG };
## String data needs to be further categoried since it could represent
## and number of types of data.
type StrType: enum {
## Enum type to represent various types of intelligence data.
type Type: enum {
## An IP address.
ADDR,
## A complete URL without the prefix "http://".
URL,
## User-Agent string, typically HTTP or mail message body.
USER_AGENT,
## Software name.
SOFTWARE,
## Email address.
EMAIL,
## DNS domain name.
@ -44,18 +45,15 @@ export {
## Represents a piece of intelligence.
type Item: record {
## The IP address if the intelligence is about an IP address.
host: addr &optional;
## The network if the intelligence is about a CIDR block.
net: subnet &optional;
## The string if the intelligence is about a string.
str: string &optional;
## The type of data that is in the string if the $str field is set.
str_type: StrType &optional;
## The intelligence indicator.
indicator: string;
## The type of data that the indicator field represents.
indicator_type: Type;
## Metadata for the item. Typically represents more deeply \
## Metadata for the item. Typically represents more deeply
## descriptive data for a piece of intelligence.
meta: MetaData;
meta: MetaData;
};
## Enum to represent where data came from when it was discovered.
@ -65,23 +63,23 @@ export {
IN_ANYWHERE,
};
## The $host field and combination of $str and $str_type fields are mutually
## exclusive. These records *must* represent either an IP address being
## seen or a string being seen.
type Seen: record {
## The IP address if the data seen is an IP address.
host: addr &log &optional;
## The string if the data is about a string.
str: string &log &optional;
## The type of data that is in the string if the $str field is set.
str_type: StrType &log &optional;
indicator: string &log &optional;
## The type of data that the indicator represents.
indicator_type: Type &log &optional;
## If the indicator type was :bro:enum:`Intel::ADDR`, then this
## field will be present.
host: addr &optional;
## Where the data was discovered.
where: Where &log;
where: Where &log;
## If the data was discovered within a connection, the
## connection record should go into get to give context to the data.
conn: connection &optional;
conn: connection &optional;
};
## Record used for the logging framework representing a positive
@ -100,7 +98,7 @@ export {
## Where the data was seen.
seen: Seen &log;
## Sources which supplied data that resulted in this match.
sources: set[string] &log;
sources: set[string] &log &default=string_set();
};
## Intelligence data manipulation functions.
@ -135,8 +133,8 @@ const have_full_data = T &redef;
# The in memory data structure for holding intelligence.
type DataStore: record {
net_data: table[subnet] of set[MetaData];
string_data: table[string, StrType] of set[MetaData];
host_data: table[addr] of set[MetaData];
string_data: table[string, Type] of set[MetaData];
};
global data_store: DataStore &redef;
@ -144,8 +142,8 @@ global data_store: DataStore &redef;
# This is primarily for workers to do the initial quick matches and store
# a minimal amount of data for the full match to happen on the manager.
type MinDataStore: record {
net_data: set[subnet];
string_data: set[string, StrType];
host_data: set[addr];
string_data: set[string, Type];
};
global min_data_store: MinDataStore &redef;
@ -157,15 +155,13 @@ event bro_init() &priority=5
function find(s: Seen): bool
{
if ( s?$host &&
((have_full_data && s$host in data_store$net_data) ||
(s$host in min_data_store$net_data)))
if ( s?$host )
{
return T;
return ((s$host in min_data_store$host_data) ||
(have_full_data && s$host in data_store$host_data));
}
else if ( s?$str && s?$str_type &&
((have_full_data && [s$str, s$str_type] in data_store$string_data) ||
([s$str, s$str_type] in min_data_store$string_data)))
else if ( ([to_lower(s$indicator), s$indicator_type] in min_data_store$string_data) ||
(have_full_data && [to_lower(s$indicator), s$indicator_type] in data_store$string_data) )
{
return T;
}
@ -177,8 +173,7 @@ function find(s: Seen): bool
function get_items(s: Seen): set[Item]
{
local item: Item;
local return_data: set[Item] = set();
local return_data: set[Item];
if ( ! have_full_data )
{
@ -191,26 +186,23 @@ function get_items(s: Seen): set[Item]
if ( s?$host )
{
# See if the host is known about and it has meta values
if ( s$host in data_store$net_data )
if ( s$host in data_store$host_data )
{
for ( m in data_store$net_data[s$host] )
for ( m in data_store$host_data[s$host] )
{
# TODO: the lookup should be finding all and not just most specific
# and $host/$net should have the correct value.
item = [$host=s$host, $meta=m];
add return_data[item];
add return_data[Item($indicator=cat(s$host), $indicator_type=ADDR, $meta=m)];
}
}
}
else if ( s?$str && s?$str_type )
else
{
local lower_indicator = to_lower(s$indicator);
# See if the string is known about and it has meta values
if ( [s$str, s$str_type] in data_store$string_data )
if ( [lower_indicator, s$indicator_type] in data_store$string_data )
{
for ( m in data_store$string_data[s$str, s$str_type] )
for ( m in data_store$string_data[lower_indicator, s$indicator_type] )
{
item = [$str=s$str, $str_type=s$str_type, $meta=m];
add return_data[item];
add return_data[Item($indicator=s$indicator, $indicator_type=s$indicator_type, $meta=m)];
}
}
}
@ -222,6 +214,12 @@ function Intel::seen(s: Seen)
{
if ( find(s) )
{
if ( s?$host )
{
s$indicator = cat(s$host);
s$indicator_type = Intel::ADDR;
}
if ( have_full_data )
{
local items = get_items(s);
@ -250,8 +248,7 @@ function has_meta(check: MetaData, metas: set[MetaData]): bool
event Intel::match(s: Seen, items: set[Item]) &priority=5
{
local empty_set: set[string] = set();
local info: Info = [$ts=network_time(), $seen=s, $sources=empty_set];
local info: Info = [$ts=network_time(), $seen=s];
if ( s?$conn )
{
@ -267,52 +264,37 @@ event Intel::match(s: Seen, items: set[Item]) &priority=5
function insert(item: Item)
{
if ( item?$str && !item?$str_type )
{
event reporter_warning(network_time(), fmt("You must provide a str_type for strings or this item doesn't make sense. Item: %s", item), "");
return;
}
# Create and fill out the meta data item.
local meta = item$meta;
local metas: set[MetaData];
if ( item?$host )
# All intelligence is case insensitive at the moment.
local lower_indicator = to_lower(item$indicator);
if ( item$indicator_type == ADDR )
{
local host = mask_addr(item$host, is_v4_addr(item$host) ? 32 : 128);
local host = to_addr(item$indicator);
if ( have_full_data )
{
if ( host !in data_store$net_data )
data_store$net_data[host] = set();
if ( host !in data_store$host_data )
data_store$host_data[host] = set();
metas = data_store$net_data[host];
metas = data_store$host_data[host];
}
add min_data_store$net_data[host];
add min_data_store$host_data[host];
}
else if ( item?$net )
else
{
if ( have_full_data )
{
if ( item$net !in data_store$net_data )
data_store$net_data[item$net] = set();
if ( [lower_indicator, item$indicator_type] !in data_store$string_data )
data_store$string_data[lower_indicator, item$indicator_type] = set();
metas = data_store$net_data[item$net];
metas = data_store$string_data[lower_indicator, item$indicator_type];
}
add min_data_store$net_data[item$net];
}
else if ( item?$str )
{
if ( have_full_data )
{
if ( [item$str, item$str_type] !in data_store$string_data )
data_store$string_data[item$str, item$str_type] = set();
metas = data_store$string_data[item$str, item$str_type];
}
add min_data_store$string_data[item$str, item$str_type];
add min_data_store$string_data[lower_indicator, item$indicator_type];
}
local updated = F;

View file

@ -68,6 +68,25 @@ export {
## the notice policy.
iconn: icmp_conn &optional;
## A file record if the notice is relted to a file. The
## reference to the actual fa_file record will be deleted after applying
## the notice policy.
f: fa_file &optional;
## A file unique ID if this notice is related to a file. If the $f
## field is provided, this will be automatically filled out.
fuid: string &log &optional;
## A mime type if the notice is related to a file. If the $f field
## is provided, this will be automatically filled out.
file_mime_type: string &log &optional;
## Frequently files can be "described" to give a bit more context.
## This field will typically be automatically filled out from an
## fa_file record. For example, if a notice was related to a
## file over HTTP, the URL of the request would be shown.
file_desc: string &log &optional;
## The transport protocol. Filled automatically when either conn, iconn
## or p is specified.
proto: transport_proto &log &optional;
@ -460,10 +479,28 @@ function apply_policy(n: Notice::Info)
if ( ! n?$ts )
n$ts = network_time();
if ( n?$f )
{
if ( ! n?$fuid )
n$fuid = n$f$id;
if ( ! n?$file_mime_type && n$f?$mime_type )
n$file_mime_type = n$f$mime_type;
n$file_desc = Files::describe(n$f);
if ( n$f?$conns && |n$f$conns| == 1 )
{
for ( id in n$f$conns )
n$conn = n$f$conns[id];
}
}
if ( n?$conn )
{
if ( ! n?$id )
n$id = n$conn$id;
if ( ! n?$uid )
n$uid = n$conn$uid;
}
@ -513,13 +550,15 @@ function apply_policy(n: Notice::Info)
if ( ! n?$suppress_for )
n$suppress_for = default_suppression_interval;
# Delete the connection record if it's there so we aren't sending that
# to remote machines. It can cause problems due to the size of the
# connection record.
# Delete the connection and file records if they're there so we
# aren't sending that to remote machines. It can cause problems
# due to the size of those records.
if ( n?$conn )
delete n$conn;
if ( n?$iconn )
delete n$iconn;
if ( n?$f )
delete n$f;
}
function internal_NOTICE(n: Notice::Info)

View file

@ -328,7 +328,7 @@ type fa_file: record {
## An identification of the source of the file data. E.g. it may be
## a network protocol over which it was transferred, or a local file
## path which was read, or some other input source.
source: string &optional;
source: string;
## If the source of this file is is a network connection, this field
## may be set to indicate the directionality.
@ -3049,6 +3049,6 @@ const snaplen = 8192 &redef;
@load base/frameworks/logging
@load base/frameworks/input
@load base/frameworks/analyzer
@load base/frameworks/file-analysis
@load base/frameworks/files
@load base/bif

View file

@ -5,9 +5,12 @@
##! you actually want.
@load base/utils/site
@load base/utils/active-http
@load base/utils/addrs
@load base/utils/conn-ids
@load base/utils/dir
@load base/utils/directions-and-hosts
@load base/utils/exec
@load base/utils/files
@load base/utils/numbers
@load base/utils/paths
@ -49,4 +52,7 @@
@load base/protocols/syslog
@load base/protocols/tunnels
@load base/files/hash
@load base/files/extract
@load base/misc/find-checksum-offloading

View file

@ -1,7 +1,7 @@
@load ./utils-commands
@load ./main
@load ./file-analysis
@load ./file-extract
@load ./utils
@load ./files
@load ./gridftp
@load-sigs ./dpd.sig
@load-sigs ./dpd.sig

View file

@ -1,48 +0,0 @@
@load ./main
@load base/utils/conn-ids
@load base/frameworks/file-analysis/main
module FTP;
export {
## Default file handle provider for FTP.
global get_file_handle: function(c: connection, is_orig: bool): string;
}
function get_handle_string(c: connection): string
{
return cat(Analyzer::ANALYZER_FTP_DATA, " ", c$start_time, " ", id_string(c$id));
}
function get_file_handle(c: connection, is_orig: bool): string
{
if ( [c$id$resp_h, c$id$resp_p] !in ftp_data_expected ) return "";
local info: FTP::Info = ftp_data_expected[c$id$resp_h, c$id$resp_p];
if ( info$passive )
# FTP client initiates data channel.
if ( is_orig )
# Don't care about FTP client data.
return "";
else
# Do care about FTP server data.
return get_handle_string(c);
else
# FTP server initiates dta channel.
if ( is_orig )
# Do care about FTP server data.
return get_handle_string(c);
else
# Don't care about FTP client data.
return "";
}
module GLOBAL;
event get_file_handle(tag: Analyzer::Tag, c: connection, is_orig: bool)
&priority=5
{
if ( tag != Analyzer::ANALYZER_FTP_DATA ) return;
set_file_handle(FTP::get_file_handle(c, is_orig));
}

View file

@ -1,90 +0,0 @@
##! File extraction support for FTP.
@load ./main
@load base/utils/files
module FTP;
export {
## Pattern of file mime types to extract from FTP transfers.
const extract_file_types = /NO_DEFAULT/ &redef;
## The on-disk prefix for files to be extracted from FTP-data transfers.
const extraction_prefix = "ftp-item" &redef;
}
redef record Info += {
## On disk file where it was extracted to.
extraction_file: string &log &optional;
## Indicates if the current command/response pair should attempt to
## extract the file if a file was transferred.
extract_file: bool &default=F;
};
function get_extraction_name(f: fa_file): string
{
local r = fmt("%s-%s.dat", extraction_prefix, f$id);
return r;
}
event file_new(f: fa_file) &priority=5
{
if ( ! f?$source ) return;
if ( f$source != "FTP_DATA" ) return;
if ( f?$mime_type && extract_file_types in f$mime_type )
{
FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
$extract_filename=get_extraction_name(f)]);
return;
}
if ( ! f?$conns ) return;
for ( cid in f$conns )
{
local c: connection = f$conns[cid];
if ( [cid$resp_h, cid$resp_p] !in ftp_data_expected ) next;
local s = ftp_data_expected[cid$resp_h, cid$resp_p];
if ( ! s$extract_file ) next;
FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
$extract_filename=get_extraction_name(f)]);
return;
}
}
event file_state_remove(f: fa_file) &priority=4
{
if ( ! f?$source ) return;
if ( f$source != "FTP_DATA" ) return;
if ( ! f?$info ) return;
for ( filename in f$info$extracted_files )
{
local s: FTP::Info;
s$ts = network_time();
s$tags = set();
s$user = "<ftp-data>";
s$extraction_file = filename;
if ( f?$conns )
for ( cid in f$conns )
{
s$uid = f$conns[cid]$uid;
s$id = cid;
}
Log::write(FTP::LOG, s);
}
}
event log_ftp(rec: Info) &priority=-10
{
delete rec$extraction_file;
delete rec$extract_file;
}

View file

@ -0,0 +1,60 @@
@load ./main
@load ./utils
@load base/utils/conn-ids
@load base/frameworks/files
module FTP;
export {
redef record Info += {
## File unique ID.
fuid: string &optional &log;
};
## Default file handle provider for FTP.
global get_file_handle: function(c: connection, is_orig: bool): string;
## Describe the file being transferred.
global describe_file: function(f: fa_file): string;
}
function get_file_handle(c: connection, is_orig: bool): string
{
if ( [c$id$resp_h, c$id$resp_p] !in ftp_data_expected )
return "";
return cat(Analyzer::ANALYZER_FTP_DATA, c$start_time, c$id, is_orig);
}
function describe_file(f: fa_file): string
{
# This shouldn't be needed, but just in case...
if ( f$source != "FTP" )
return "";
for ( cid in f$conns )
{
if ( f$conns[cid]?$ftp )
return FTP::describe(f$conns[cid]$ftp);
}
return "";
}
event bro_init() &priority=5
{
Files::register_protocol(Analyzer::ANALYZER_FTP_DATA,
[$get_file_handle = FTP::get_file_handle,
$describe = FTP::describe_file]);
}
event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priority=5
{
if ( [c$id$resp_h, c$id$resp_p] !in ftp_data_expected )
return;
local ftp = ftp_data_expected[c$id$resp_h, c$id$resp_p];
ftp$fuid = f$id;
if ( f?$mime_type )
ftp$mime_type = f$mime_type;
}

View file

@ -63,8 +63,6 @@ export {
reply_code: count &log &optional;
## Reply message from the server in response to the command.
reply_msg: string &log &optional;
## Arbitrary tags that may indicate a particular attribute of this command.
tags: set[string] &log;
## Expected FTP data channel.
data_channel: ExpectedDataChannel &log &optional;
@ -104,6 +102,8 @@ export {
global log_ftp: event(rec: Info);
}
@load ./utils
# Add the state tracking information variable to the connection record
redef record connection += {
ftp: Info &optional;
@ -171,37 +171,26 @@ function set_ftp_session(c: connection)
function ftp_message(s: Info)
{
# If it either has a tag associated with it (something detected)
# or it's a deliberately logged command.
if ( |s$tags| > 0 || (s?$cmdarg && s$cmdarg$cmd in logged_commands) )
s$ts=s$cmdarg$ts;
s$command=s$cmdarg$cmd;
s$arg = s$cmdarg$arg;
if ( s$cmdarg$cmd in file_cmds )
s$arg = build_url_ftp(s);
if ( s$arg == "" )
delete s$arg;
if ( s?$password &&
! s$capture_password &&
to_lower(s$user) !in guest_ids )
{
if ( s?$password &&
! s$capture_password &&
to_lower(s$user) !in guest_ids )
{
s$password = "<hidden>";
}
local arg = s$cmdarg$arg;
if ( s$cmdarg$cmd in file_cmds )
{
local comp_path = build_path_compressed(s$cwd, arg);
if ( comp_path[0] != "/" )
comp_path = cat("/", comp_path);
arg = fmt("ftp://%s%s", addr_to_uri(s$id$resp_h), comp_path);
}
s$ts=s$cmdarg$ts;
s$command=s$cmdarg$cmd;
if ( arg == "" )
delete s$arg;
else
s$arg=arg;
Log::write(FTP::LOG, s);
s$password = "<hidden>";
}
if ( s?$cmdarg && s$command in logged_commands)
Log::write(FTP::LOG, s);
# The MIME and file_size fields are specific to file transfer commands
# and may not be used in all commands so they need reset to "blank"
# values after logging.
@ -209,8 +198,6 @@ function ftp_message(s: Info)
delete s$file_size;
# Same with data channel.
delete s$data_channel;
# Tags are cleared everytime too.
s$tags = set();
}
function add_expected_data_channel(s: Info, chan: ExpectedDataChannel)
@ -218,8 +205,9 @@ function add_expected_data_channel(s: Info, chan: ExpectedDataChannel)
s$passive = chan$passive;
s$data_channel = chan;
ftp_data_expected[chan$resp_h, chan$resp_p] = s;
Analyzer::schedule_analyzer(chan$orig_h, chan$resp_h, chan$resp_p, Analyzer::ANALYZER_FTP_DATA,
5mins);
Analyzer::schedule_analyzer(chan$orig_h, chan$resp_h, chan$resp_p,
Analyzer::ANALYZER_FTP_DATA,
5mins);
}
event ftp_request(c: connection, command: string, arg: string) &priority=5

View file

@ -0,0 +1,47 @@
##! Utilities specific for FTP processing.
@load ./main
@load base/utils/addrs
module FTP;
export {
## Creates a URL from an :bro:type:`FTP::Info` record.
##
## rec: An :bro:type:`FTP::Info` record.
##
## Returns: A URL, not prefixed by "ftp://".
global build_url: function(rec: Info): string;
## Creates a URL from an :bro:type:`FTP::Info` record.
##
## rec: An :bro:type:`FTP::Info` record.
##
## Returns: A URL prefixed with "ftp://".
global build_url_ftp: function(rec: Info): string;
## Create an extremely shortened representation of a log line.
global describe: function(rec: Info): string;
}
function build_url(rec: Info): string
{
if ( !rec?$arg )
return "";
local comp_path = build_path_compressed(rec$cwd, rec$arg);
if ( comp_path[0] != "/" )
comp_path = cat("/", comp_path);
return fmt("%s%s", addr_to_uri(rec$id$resp_h), comp_path);
}
function build_url_ftp(rec: Info): string
{
return fmt("ftp://%s", build_url(rec));
}
function describe(rec: Info): string
{
return build_url_ftp(rec);
}

View file

@ -1,8 +1,6 @@
@load ./main
@load ./entities
@load ./utils
@load ./file-analysis
@load ./file-ident
@load ./file-hash
@load ./file-extract
@load ./files
@load-sigs ./dpd.sig

View file

@ -0,0 +1,109 @@
##! Analysis and logging for MIME entities found in HTTP sessions.
@load base/frameworks/files
@load base/utils/strings
@load base/utils/files
@load ./main
module HTTP;
export {
type Entity: record {
## Filename for the entity if discovered from a header.
filename: string &optional;
};
redef record Info += {
## An ordered vector of file unique IDs.
orig_fuids: vector of string &log &optional;
## An ordered vector of mime types.
orig_mime_types: vector of string &log &optional;
## An ordered vector of file unique IDs.
resp_fuids: vector of string &log &optional;
## An ordered vector of mime types.
resp_mime_types: vector of string &log &optional;
## The current entity.
current_entity: Entity &optional;
## Current number of MIME entities in the HTTP request message body.
orig_mime_depth: count &default=0;
## Current number of MIME entities in the HTTP response message body.
resp_mime_depth: count &default=0;
};
}
event http_begin_entity(c: connection, is_orig: bool) &priority=10
{
set_state(c, F, is_orig);
if ( is_orig )
++c$http$orig_mime_depth;
else
++c$http$resp_mime_depth;
c$http$current_entity = Entity();
}
event http_header(c: connection, is_orig: bool, name: string, value: string) &priority=3
{
if ( name == "CONTENT-DISPOSITION" &&
/[fF][iI][lL][eE][nN][aA][mM][eE]/ in value )
{
c$http$current_entity$filename = extract_filename_from_content_disposition(value);
}
else if ( name == "CONTENT-TYPE" &&
/[nN][aA][mM][eE][:blank:]*=/ in value )
{
c$http$current_entity$filename = extract_filename_from_content_disposition(value);
}
}
event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priority=5
{
if ( f$source == "HTTP" && c?$http )
{
if ( c$http?$current_entity && c$http$current_entity?$filename )
f$info$filename = c$http$current_entity$filename;
if ( f$is_orig )
{
if ( ! c$http?$orig_mime_types )
c$http$orig_fuids = string_vec(f$id);
else
c$http$orig_fuids[|c$http$orig_fuids|] = f$id;
if ( f?$mime_type )
{
if ( ! c$http?$orig_mime_types )
c$http$orig_mime_types = string_vec(f$mime_type);
else
c$http$orig_mime_types[|c$http$orig_mime_types|] = f$mime_type;
}
}
else
{
if ( ! c$http?$resp_mime_types )
c$http$resp_fuids = string_vec(f$id);
else
c$http$resp_fuids[|c$http$resp_fuids|] = f$id;
if ( f?$mime_type )
{
if ( ! c$http?$resp_mime_types )
c$http$resp_mime_types = string_vec(f$mime_type);
else
c$http$resp_mime_types[|c$http$resp_mime_types|] = f$mime_type;
}
}
}
}
event http_end_entity(c: connection, is_orig: bool) &priority=5
{
if ( c?$http && c$http?$current_entity )
delete c$http$current_entity;
}

View file

@ -1,54 +0,0 @@
@load ./main
@load ./utils
@load base/utils/conn-ids
@load base/frameworks/file-analysis/main
module HTTP;
export {
redef record HTTP::Info += {
## Number of MIME entities in the HTTP request message body so far.
request_mime_level: count &default=0;
## Number of MIME entities in the HTTP response message body so far.
response_mime_level: count &default=0;
};
## Default file handle provider for HTTP.
global get_file_handle: function(c: connection, is_orig: bool): string;
}
event http_begin_entity(c: connection, is_orig: bool) &priority=5
{
if ( ! c?$http )
return;
if ( is_orig )
++c$http$request_mime_level;
else
++c$http$response_mime_level;
}
function get_file_handle(c: connection, is_orig: bool): string
{
if ( ! c?$http ) return "";
local mime_level: count =
is_orig ? c$http$request_mime_level : c$http$response_mime_level;
local mime_level_str: string = mime_level > 1 ? cat(mime_level) : "";
if ( c$http$range_request )
return cat(Analyzer::ANALYZER_HTTP, " ", is_orig, " ", c$id$orig_h, " ",
build_url(c$http));
return cat(Analyzer::ANALYZER_HTTP, " ", c$start_time, " ", is_orig, " ",
c$http$trans_depth, mime_level_str, " ", id_string(c$id));
}
module GLOBAL;
event get_file_handle(tag: Analyzer::Tag, c: connection, is_orig: bool)
&priority=5
{
if ( tag != Analyzer::ANALYZER_HTTP ) return;
set_file_handle(HTTP::get_file_handle(c, is_orig));
}

View file

@ -1,100 +0,0 @@
##! Extracts the items from HTTP traffic, one per file. At this time only
##! the message body from the server can be extracted with this script.
@load ./main
@load ./file-analysis
module HTTP;
export {
## Pattern of file mime types to extract from HTTP response entity bodies.
const extract_file_types = /NO_DEFAULT/ &redef;
## The on-disk prefix for files to be extracted from HTTP entity bodies.
const extraction_prefix = "http-item" &redef;
redef record Info += {
## On-disk location where files in request body were extracted.
extracted_request_files: vector of string &log &optional;
## On-disk location where files in response body were extracted.
extracted_response_files: vector of string &log &optional;
## Indicates if the response body is to be extracted or not. Must be
## set before or by the first :bro:see:`file_new` for the file content.
extract_file: bool &default=F;
};
}
function get_extraction_name(f: fa_file): string
{
local r = fmt("%s-%s.dat", extraction_prefix, f$id);
return r;
}
function add_extraction_file(c: connection, is_orig: bool, fn: string)
{
if ( is_orig )
{
if ( ! c$http?$extracted_request_files )
c$http$extracted_request_files = vector();
c$http$extracted_request_files[|c$http$extracted_request_files|] = fn;
}
else
{
if ( ! c$http?$extracted_response_files )
c$http$extracted_response_files = vector();
c$http$extracted_response_files[|c$http$extracted_response_files|] = fn;
}
}
event file_new(f: fa_file) &priority=5
{
if ( ! f?$source ) return;
if ( f$source != "HTTP" ) return;
if ( ! f?$conns ) return;
local fname: string;
local c: connection;
if ( f?$mime_type && extract_file_types in f$mime_type )
{
fname = get_extraction_name(f);
FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
$extract_filename=fname]);
for ( cid in f$conns )
{
c = f$conns[cid];
if ( ! c?$http ) next;
add_extraction_file(c, f$is_orig, fname);
}
return;
}
local extracting: bool = F;
for ( cid in f$conns )
{
c = f$conns[cid];
if ( ! c?$http ) next;
if ( ! c$http$extract_file ) next;
fname = get_extraction_name(f);
FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
$extract_filename=fname]);
extracting = T;
break;
}
if ( extracting )
for ( cid in f$conns )
{
c = f$conns[cid];
if ( ! c?$http ) next;
add_extraction_file(c, f$is_orig, fname);
}
}

View file

@ -1,68 +0,0 @@
##! Calculate hashes for HTTP body transfers.
@load ./main
@load ./file-analysis
module HTTP;
export {
redef record Info += {
## MD5 sum for a file transferred over HTTP calculated from the
## response body.
md5: string &log &optional;
## This value can be set per-transfer to determine per request
## if a file should have an MD5 sum generated. It must be
## set to T at the time of or before the first chunk of body data.
calc_md5: bool &default=F;
};
## Generate MD5 sums for these filetypes.
const generate_md5 = /application\/x-dosexec/ # Windows and DOS executables
| /application\/x-executable/ # *NIX executable binary
&redef;
}
event file_new(f: fa_file) &priority=5
{
if ( ! f?$source ) return;
if ( f$source != "HTTP" ) return;
if ( f?$mime_type && generate_md5 in f$mime_type )
{
FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_MD5]);
return;
}
if ( ! f?$conns ) return;
for ( cid in f$conns )
{
local c: connection = f$conns[cid];
if ( ! c?$http ) next;
if ( ! c$http$calc_md5 ) next;
FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_MD5]);
return;
}
}
event file_state_remove(f: fa_file) &priority=4
{
if ( ! f?$source ) return;
if ( f$source != "HTTP" ) return;
if ( ! f?$conns ) return;
if ( ! f?$info ) return;
if ( ! f$info?$md5 ) return;
for ( cid in f$conns )
{
local c: connection = f$conns[cid];
if ( ! c?$http ) next;
c$http$md5 = f$info$md5;
}
}

View file

@ -1,105 +0,0 @@
##! Identification of file types in HTTP response bodies with file content sniffing.
@load base/frameworks/notice
@load ./main
@load ./utils
@load ./file-analysis
module HTTP;
export {
redef enum Notice::Type += {
## Indicates when the file extension doesn't seem to match the file
## contents.
Incorrect_File_Type,
};
redef record Info += {
## Mime type of response body identified by content sniffing.
mime_type: string &log &optional;
};
## Mapping between mime type strings (without character set) and
## regular expressions for URLs.
## The :bro:enum:`HTTP::Incorrect_File_Type` notice is generated if the
## pattern doesn't match the mime type that was discovered.
const mime_types_extensions: table[string] of pattern = {
["application/x-dosexec"] = /\.([eE][xX][eE]|[dD][lL][lL])/,
} &redef;
## A pattern for filtering out :bro:enum:`HTTP::Incorrect_File_Type` urls
## that are not noteworthy before a notice is created. Each
## pattern added should match the complete URL (the matched URLs include
## "http://" at the beginning).
const ignored_incorrect_file_type_urls = /^$/ &redef;
}
event file_new(f: fa_file) &priority=5
{
if ( ! f?$source ) return;
if ( f$source != "HTTP" ) return;
if ( ! f?$mime_type ) return;
if ( ! f?$conns ) return;
for ( cid in f$conns )
{
local c: connection = f$conns[cid];
if ( ! c?$http ) next;
c$http$mime_type = f$mime_type;
local mime_str: string = c$http$mime_type;
if ( mime_str !in mime_types_extensions ) next;
if ( ! c$http?$uri ) next;
if ( mime_types_extensions[mime_str] in c$http$uri ) next;
local url = build_url_http(c$http);
if ( url == ignored_incorrect_file_type_urls ) next;
local message = fmt("%s %s %s", mime_str, c$http$method, url);
NOTICE([$note=Incorrect_File_Type,
$msg=message,
$conn=c]);
}
}
event file_over_new_connection(f: fa_file, c: connection) &priority=5
{
if ( ! f?$source ) return;
if ( f$source != "HTTP" ) return;
if ( ! f?$mime_type ) return;
if ( ! c?$http ) return;
# Spread the mime around (e.g. for partial content, file_type event only
# happens once for the first connection, but if there's subsequent
# connections to transfer the same file, they'll be lacking the mime_type
# field if we don't do this).
c$http$mime_type = f$mime_type;
}
# Tracks byte-range request / partial content response mime types, indexed
# by [connection, uri] pairs. This is needed because a person can pipeline
# byte-range requests over multiple connections to the same uri. Without
# the tracking, only the first request in the pipeline for each connection
# would get a mime_type field assigned to it (by the FileAnalysis policy hooks).
global partial_types: table[conn_id, string] of string &read_expire=5mins;
# Priority 4 so that it runs before the handler that will write to http.log.
event http_message_done(c: connection, is_orig: bool, stat: http_message_stat)
&priority=4
{
if ( ! c$http$range_request ) return;
if ( ! c$http?$uri ) return;
if ( c$http?$mime_type )
{
partial_types[c$id, c$http$uri] = c$http$mime_type;
return;
}
if ( [c$id, c$http$uri] in partial_types )
c$http$mime_type = partial_types[c$id, c$http$uri];
}

View file

@ -0,0 +1,56 @@
@load ./main
@load ./entities
@load ./utils
@load base/utils/conn-ids
@load base/frameworks/files
module HTTP;
export {
## Default file handle provider for HTTP.
global get_file_handle: function(c: connection, is_orig: bool): string;
## Default file describer for HTTP.
global describe_file: function(f: fa_file): string;
}
function get_file_handle(c: connection, is_orig: bool): string
{
if ( ! c?$http )
return "";
if ( c$http$range_request && ! is_orig )
{
# Any multipart responses from the server are pieces of same file
# that correspond to range requests, so don't use mime depth to
# identify the file.
return cat(Analyzer::ANALYZER_HTTP, is_orig, c$id$orig_h, build_url(c$http));
}
else
{
local mime_depth = is_orig ? c$http$orig_mime_depth : c$http$resp_mime_depth;
return cat(Analyzer::ANALYZER_HTTP, c$start_time, is_orig,
c$http$trans_depth, mime_depth, id_string(c$id));
}
}
function describe_file(f: fa_file): string
{
# This shouldn't be needed, but just in case...
if ( f$source != "HTTP" )
return "";
for ( cid in f$conns )
{
if ( f$conns[cid]?$http )
return build_url_http(f$conns[cid]$http);
}
return "";
}
event bro_init() &priority=5
{
Files::register_protocol(Analyzer::ANALYZER_HTTP,
[$get_file_handle = HTTP::get_file_handle,
$describe = HTTP::describe_file]);
}

View file

@ -1,5 +1,5 @@
##! Implements base functionality for HTTP analysis. The logging model is
##! to log request/response pairs and all relevant metadata together in
##! Implements base functionality for HTTP analysis. The logging model is
##! to log request/response pairs and all relevant metadata together in
##! a single record.
@load base/utils/numbers
@ -15,10 +15,10 @@ export {
## Placeholder.
EMPTY
};
## This setting changes if passwords used in Basic-Auth are captured or not.
const default_capture_password = F &redef;
type Info: record {
## Timestamp for when the request happened.
ts: time &log;
@ -26,7 +26,7 @@ export {
uid: string &log;
## The connection's 4-tuple of endpoint addresses/ports.
id: conn_id &log;
## Represents the pipelined depth into the connection of this
## Represents the pipelined depth into the connection of this
## request/response transaction.
trans_depth: count &log;
## Verb used in the HTTP request (GET, POST, HEAD, etc.).
@ -60,24 +60,24 @@ export {
## A set of indicators of various attributes discovered and
## related to a particular request/response pair.
tags: set[Tags] &log;
## Username if basic-auth is performed for the request.
username: string &log &optional;
## Password if basic-auth is performed for the request.
password: string &log &optional;
## Determines if the password will be captured for this request.
capture_password: bool &default=default_capture_password;
## All of the headers that may indicate if the request was proxied.
proxied: set[string] &log &optional;
## Indicates if this request can assume 206 partial content in
## response.
range_request: bool &default=F;
range_request: bool &default=F;
};
## Structure to maintain state for an HTTP connection with multiple
## Structure to maintain state for an HTTP connection with multiple
## requests and responses.
type State: record {
## Pending requests.
@ -87,7 +87,7 @@ export {
## Current response in the pending queue.
current_response: count &default=0;
};
## A list of HTTP headers typically used to indicate proxied requests.
const proxy_headers: set[string] = {
"FORWARDED",
@ -100,8 +100,8 @@ export {
} &redef;
## A list of HTTP methods. Other methods will generate a weird. Note
## that the HTTP analyzer will only accept methods consisting solely
## of letters ``[A-Za-z]``.
## that the HTTP analyzer will only accept methods consisting solely
## of letters ``[A-Za-z]``.
const http_methods: set[string] = {
"GET", "POST", "HEAD", "OPTIONS",
"PUT", "DELETE", "TRACE", "CONNECT",
@ -111,8 +111,8 @@ export {
"POLL", "REPORT", "SUBSCRIBE", "BMOVE",
"SEARCH"
} &redef;
## Event that can be handled to access the HTTP record as it is sent on
## Event that can be handled to access the HTTP record as it is sent on
## to the logging framework.
global log_http: event(rec: Info);
}
@ -147,12 +147,12 @@ function new_http_session(c: connection): Info
tmp$ts=network_time();
tmp$uid=c$uid;
tmp$id=c$id;
# $current_request is set prior to the Info record creation so we
# $current_request is set prior to the Info record creation so we
# can use the value directly here.
tmp$trans_depth = c$http_state$current_request;
return tmp;
}
function set_state(c: connection, request: bool, is_orig: bool)
{
if ( ! c?$http_state )
@ -160,19 +160,19 @@ function set_state(c: connection, request: bool, is_orig: bool)
local s: State;
c$http_state = s;
}
# These deal with new requests and responses.
if ( request || c$http_state$current_request !in c$http_state$pending )
c$http_state$pending[c$http_state$current_request] = new_http_session(c);
if ( ! is_orig && c$http_state$current_response !in c$http_state$pending )
c$http_state$pending[c$http_state$current_response] = new_http_session(c);
if ( is_orig )
c$http = c$http_state$pending[c$http_state$current_request];
else
c$http = c$http_state$pending[c$http_state$current_response];
}
event http_request(c: connection, method: string, original_URI: string,
unescaped_URI: string, version: string) &priority=5
{
@ -181,17 +181,17 @@ event http_request(c: connection, method: string, original_URI: string,
local s: State;
c$http_state = s;
}
++c$http_state$current_request;
set_state(c, T, T);
c$http$method = method;
c$http$uri = unescaped_URI;
if ( method !in http_methods )
event conn_weird("unknown_HTTP_method", c, method);
}
event http_reply(c: connection, version: string, code: count, reason: string) &priority=5
{
if ( ! c?$http_state )
@ -199,7 +199,7 @@ event http_reply(c: connection, version: string, code: count, reason: string) &p
local s: State;
c$http_state = s;
}
# If the last response was an informational 1xx, we're still expecting
# the real response to the request, so don't create a new Info record yet.
if ( c$http_state$current_response !in c$http_state$pending ||
@ -207,7 +207,7 @@ event http_reply(c: connection, version: string, code: count, reason: string) &p
! code_in_range(c$http_state$pending[c$http_state$current_response]$status_code, 100, 199)) )
++c$http_state$current_response;
set_state(c, F, F);
c$http$status_code = code;
c$http$status_msg = reason;
if ( code_in_range(code, 100, 199) )
@ -216,33 +216,33 @@ event http_reply(c: connection, version: string, code: count, reason: string) &p
c$http$info_msg = reason;
}
}
event http_header(c: connection, is_orig: bool, name: string, value: string) &priority=5
{
set_state(c, F, is_orig);
if ( is_orig ) # client headers
{
if ( name == "REFERER" )
c$http$referrer = value;
else if ( name == "HOST" )
# The split is done to remove the occasional port value that shows up here.
c$http$host = split1(value, /:/)[1];
else if ( name == "RANGE" )
c$http$range_request = T;
else if ( name == "USER-AGENT" )
c$http$user_agent = value;
else if ( name in proxy_headers )
{
if ( ! c$http?$proxied )
c$http$proxied = set();
add c$http$proxied[fmt("%s -> %s", name, value)];
}
else if ( name == "AUTHORIZATION" )
{
if ( /^[bB][aA][sS][iI][cC] / in value )
@ -264,25 +264,19 @@ event http_header(c: connection, is_orig: bool, name: string, value: string) &pr
}
}
}
else # server headers
{
if ( name == "CONTENT-DISPOSITION" &&
/[fF][iI][lL][eE][nN][aA][mM][eE]/ in value )
c$http$filename = extract_filename_from_content_disposition(value);
}
}
event http_message_done(c: connection, is_orig: bool, stat: http_message_stat) &priority = 5
{
set_state(c, F, is_orig);
if ( is_orig )
c$http$request_body_len = stat$body_length;
else
c$http$response_body_len = stat$body_length;
}
event http_message_done(c: connection, is_orig: bool, stat: http_message_stat) &priority = -5
{
# The reply body is done so we're ready to log.
@ -311,4 +305,4 @@ event connection_state_remove(c: connection) &priority=-5
}
}
}

View file

@ -32,6 +32,9 @@ export {
##
## Returns: A URL prefixed with "http://".
global build_url_http: function(rec: Info): string;
## Create an extremely shortened representation of a log line.
global describe: function(rec: Info): string;
}
@ -62,3 +65,8 @@ function build_url_http(rec: Info): string
{
return fmt("http://%s", build_url(rec));
}
function describe(rec: Info): string
{
return build_url_http(rec);
}

View file

@ -1,5 +1,5 @@
@load ./main
@load ./dcc-send
@load ./file-analysis
@load ./files
@load-sigs ./dpd.sig

View file

@ -2,7 +2,7 @@
##!
##! There is a major problem with this script in the cluster context because
##! we might see A send B a message that a DCC connection is to be expected,
##! but that connection will actually be between B and C which could be
##! but that connection will actually be between B and C which could be
##! analyzed on a different worker.
##!
@ -15,12 +15,6 @@
module IRC;
export {
## Pattern of file mime types to extract from IRC DCC file transfers.
const extract_file_types = /NO_DEFAULT/ &redef;
## On-disk prefix for files to be extracted from IRC DCC file transfers.
const extraction_prefix = "irc-dcc-item" &redef;
redef record Info += {
## DCC filename requested.
dcc_file_name: string &log &optional;
@ -28,101 +22,10 @@ export {
dcc_file_size: count &log &optional;
## Sniffed mime type of the file.
dcc_mime_type: string &log &optional;
## The file handle for the file to be extracted
extraction_file: string &log &optional;
## A boolean to indicate if the current file transfer should be extracted.
extract_file: bool &default=F;
};
}
global dcc_expected_transfers: table[addr, port] of Info &read_expire=5mins;
function set_dcc_mime(f: fa_file)
{
if ( ! f?$conns ) return;
for ( cid in f$conns )
{
local c: connection = f$conns[cid];
if ( [cid$resp_h, cid$resp_p] !in dcc_expected_transfers ) next;
local s = dcc_expected_transfers[cid$resp_h, cid$resp_p];
s$dcc_mime_type = f$mime_type;
}
}
function set_dcc_extraction_file(f: fa_file, filename: string)
{
if ( ! f?$conns ) return;
for ( cid in f$conns )
{
local c: connection = f$conns[cid];
if ( [cid$resp_h, cid$resp_p] !in dcc_expected_transfers ) next;
local s = dcc_expected_transfers[cid$resp_h, cid$resp_p];
s$extraction_file = filename;
}
}
function get_extraction_name(f: fa_file): string
{
local r = fmt("%s-%s.dat", extraction_prefix, f$id);
return r;
}
# this handler sets the IRC::Info mime type
event file_new(f: fa_file) &priority=5
{
if ( ! f?$source ) return;
if ( f$source != "IRC_DATA" ) return;
if ( ! f?$mime_type ) return;
set_dcc_mime(f);
}
# this handler check if file extraction is desired
event file_new(f: fa_file) &priority=5
{
if ( ! f?$source ) return;
if ( f$source != "IRC_DATA" ) return;
local fname: string;
if ( f?$mime_type && extract_file_types in f$mime_type )
{
fname = get_extraction_name(f);
FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
$extract_filename=fname]);
set_dcc_extraction_file(f, fname);
return;
}
if ( ! f?$conns ) return;
for ( cid in f$conns )
{
local c: connection = f$conns[cid];
if ( [cid$resp_h, cid$resp_p] !in dcc_expected_transfers ) next;
local s = dcc_expected_transfers[cid$resp_h, cid$resp_p];
if ( ! s$extract_file ) next;
fname = get_extraction_name(f);
FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
$extract_filename=fname]);
s$extraction_file = fname;
return;
}
}
global dcc_expected_transfers: table[addr, port] of Info &synchronized &read_expire=5mins;
function log_dcc(f: fa_file)
{
@ -141,24 +44,21 @@ function log_dcc(f: fa_file)
Log::write(IRC::LOG, irc);
irc$command = tmp;
# Delete these values in case another DCC transfer
# Delete these values in case another DCC transfer
# happens during the IRC session.
delete irc$extract_file;
delete irc$extraction_file;
delete irc$dcc_file_name;
delete irc$dcc_file_size;
delete irc$dcc_mime_type;
delete dcc_expected_transfers[cid$resp_h, cid$resp_p];
return;
}
}
event file_new(f: fa_file) &priority=-5
{
if ( ! f?$source ) return;
if ( f$source != "IRC_DATA" ) return;
log_dcc(f);
if ( f$source == "IRC_DATA" )
log_dcc(f);
}
event irc_dcc_message(c: connection, is_orig: bool,

View file

@ -1,25 +0,0 @@
@load ./dcc-send.bro
@load base/utils/conn-ids
@load base/frameworks/file-analysis/main
module IRC;
export {
## Default file handle provider for IRC.
global get_file_handle: function(c: connection, is_orig: bool): string;
}
function get_file_handle(c: connection, is_orig: bool): string
{
if ( is_orig ) return "";
return cat(Analyzer::ANALYZER_IRC_DATA, " ", c$start_time, " ", id_string(c$id));
}
module GLOBAL;
event get_file_handle(tag: Analyzer::Tag, c: connection, is_orig: bool)
&priority=5
{
if ( tag != Analyzer::ANALYZER_IRC_DATA ) return;
set_file_handle(IRC::get_file_handle(c, is_orig));
}

View file

@ -0,0 +1,39 @@
@load ./dcc-send
@load base/utils/conn-ids
@load base/frameworks/files
module IRC;
export {
redef record Info += {
## File unique ID.
fuid: string &log &optional;
};
## Default file handle provider for IRC.
global get_file_handle: function(c: connection, is_orig: bool): string;
}
function get_file_handle(c: connection, is_orig: bool): string
{
return cat(Analyzer::ANALYZER_IRC_DATA, c$start_time, c$id, is_orig);
}
event bro_init() &priority=5
{
Files::register_protocol(Analyzer::ANALYZER_IRC_DATA,
[$get_file_handle = IRC::get_file_handle]);
}
event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priority=5
{
if ( [c$id$resp_h, c$id$resp_p] !in dcc_expected_transfers )
return;
local irc = dcc_expected_transfers[c$id$resp_h, c$id$resp_p];
irc$fuid = f$id;
if ( irc?$dcc_file_name )
f$info$filename = irc$dcc_file_name;
if ( f?$mime_type )
irc$dcc_mime_type = f$mime_type;
}

View file

@ -1,6 +1,5 @@
@load ./main
@load ./entities
@load ./entities-excerpt
@load ./file-analysis
@load ./files
@load-sigs ./dpd.sig

View file

@ -1,37 +0,0 @@
##! This script is for optionally adding a body excerpt to the SMTP
##! entities log.
@load ./entities
module SMTP;
export {
redef record SMTP::EntityInfo += {
## The entity body excerpt.
excerpt: string &log &default="";
};
## This is the default value for how much of the entity body should be
## included for all MIME entities. The lesser of this value and
## :bro:see:`default_file_bof_buffer_size` will be used.
const default_entity_excerpt_len = 0 &redef;
}
event file_new(f: fa_file) &priority=5
{
if ( ! f?$source ) return;
if ( f$source != "SMTP" ) return;
if ( ! f?$bof_buffer ) return;
if ( ! f?$conns ) return;
for ( cid in f$conns )
{
local c: connection = f$conns[cid];
if ( ! c?$smtp ) next;
if ( default_entity_excerpt_len > 0 )
c$smtp$current_entity$excerpt =
f$bof_buffer[0:default_entity_excerpt_len];
}
}

View file

@ -1,5 +1,6 @@
##! Analysis and logging for MIME entities found in SMTP sessions.
@load base/frameworks/files
@load base/utils/strings
@load base/utils/files
@load ./main
@ -7,217 +8,55 @@
module SMTP;
export {
redef enum Log::ID += { ENTITIES_LOG };
type EntityInfo: record {
## This is the timestamp of when the MIME content transfer began.
ts: time &log;
uid: string &log;
id: conn_id &log;
## A count to represent the depth of this message transaction in a
## single connection where multiple messages were transferred.
trans_depth: count &log;
## The filename seen in the Content-Disposition header.
filename: string &log &optional;
## Track how many bytes of the MIME encoded file have been seen.
content_len: count &log &default=0;
## The mime type of the entity discovered through magic bytes identification.
mime_type: string &log &optional;
## The calculated MD5 sum for the MIME entity.
md5: string &log &optional;
## Optionally calculate the file's MD5 sum. Must be set prior to the
## first data chunk being see in an event.
calc_md5: bool &default=F;
## Optionally write the file to disk. Must be set prior to first
## data chunk being seen in an event.
extract_file: bool &default=F;
## Store the file handle here for the file currently being extracted.
extraction_file: string &log &optional;
type Entity: record {
## Filename for the entity if discovered from a header.
filename: string &optional;
};
redef record Info += {
## The in-progress entity information.
current_entity: EntityInfo &optional;
## The current entity being seen.
entity: Entity &optional;
};
redef record State += {
## Track the number of MIME encoded files transferred during a session.
mime_level: count &default=0;
## Track the number of MIME encoded files transferred
## during a session.
mime_depth: count &default=0;
};
## Generate MD5 sums for these filetypes.
const generate_md5 = /application\/x-dosexec/ # Windows and DOS executables
| /application\/x-executable/ # *NIX executable binary
&redef;
## Pattern of file mime types to extract from MIME bodies.
const extract_file_types = /NO_DEFAULT/ &redef;
## The on-disk prefix for files to be extracted from MIME entity bodies.
const extraction_prefix = "smtp-entity" &redef;
## If set, never generate MD5s. This is mainly for testing purposes to create
## reproducable output in the case that the decision whether to create
## checksums depends on environment specifics.
const never_calc_md5 = F &redef;
global log_mime: event(rec: EntityInfo);
}
event bro_init() &priority=5
{
Log::create_stream(SMTP::ENTITIES_LOG, [$columns=EntityInfo, $ev=log_mime]);
}
function set_session(c: connection, new_entity: bool)
{
if ( ! c$smtp?$current_entity || new_entity )
{
local info: EntityInfo;
info$ts=network_time();
info$uid=c$uid;
info$id=c$id;
info$trans_depth=c$smtp$trans_depth;
c$smtp$current_entity = info;
++c$smtp_state$mime_level;
}
}
function get_extraction_name(f: fa_file): string
{
local r = fmt("%s-%s.dat", extraction_prefix, f$id);
return r;
}
event mime_begin_entity(c: connection) &priority=10
{
if ( ! c?$smtp ) return;
set_session(c, T);
c$smtp$entity = Entity();
++c$smtp_state$mime_depth;
}
event file_new(f: fa_file) &priority=5
event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priority=5
{
if ( ! f?$source ) return;
if ( f$source != "SMTP" ) return;
if ( ! f?$conns ) return;
local fname: string;
local extracting: bool = F;
for ( cid in f$conns )
if ( f$source == "SMTP" && c?$smtp )
{
local c: connection = f$conns[cid];
if ( ! c?$smtp ) next;
if ( ! c$smtp?$current_entity ) next;
if ( c$smtp$current_entity$extract_file )
{
if ( ! extracting )
{
fname = get_extraction_name(f);
FileAnalysis::add_analyzer(f,
[$tag=FileAnalysis::ANALYZER_EXTRACT,
$extract_filename=fname]);
extracting = T;
}
c$smtp$current_entity$extraction_file = fname;
}
if ( c$smtp$current_entity$calc_md5 )
FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_MD5]);
if ( c$smtp?$entity && c$smtp$entity?$filename )
f$info$filename = c$smtp$entity$filename;
f$info$depth = c$smtp_state$mime_depth;
}
}
function check_extract_by_type(f: fa_file)
event mime_one_header(c: connection, h: mime_header_rec) &priority=5
{
if ( extract_file_types !in f$mime_type ) return;
if ( f?$info && FileAnalysis::ANALYZER_EXTRACT in f$info$analyzers )
if ( ! c?$smtp )
return;
local fname: string = get_extraction_name(f);
FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
$extract_filename=fname]);
if ( ! f?$conns ) return;
for ( cid in f$conns )
{
local c: connection = f$conns[cid];
if ( ! c?$smtp ) next;
c$smtp$current_entity$extraction_file = fname;
}
}
function check_md5_by_type(f: fa_file)
{
if ( never_calc_md5 ) return;
if ( generate_md5 !in f$mime_type ) return;
FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_MD5]);
}
event file_new(f: fa_file) &priority=5
{
if ( ! f?$source ) return;
if ( f$source != "SMTP" ) return;
if ( ! f?$mime_type ) return;
if ( f?$conns )
for ( cid in f$conns )
{
local c: connection = f$conns[cid];
if ( ! c?$smtp ) next;
if ( ! c$smtp?$current_entity ) next;
c$smtp$current_entity$mime_type = f$mime_type;
}
check_extract_by_type(f);
check_md5_by_type(f);
}
event file_state_remove(f: fa_file) &priority=4
{
if ( ! f?$source ) return;
if ( f$source != "SMTP" ) return;
if ( ! f?$conns ) return;
for ( cid in f$conns )
{
local c: connection = f$conns[cid];
if ( ! c?$smtp ) next;
if ( ! c$smtp?$current_entity ) next;
# Only log if there was some content.
if ( f$seen_bytes == 0 ) next;
if ( f?$info && f$info?$md5 )
c$smtp$current_entity$md5 = f$info$md5;
c$smtp$current_entity$content_len = f$seen_bytes;
Log::write(SMTP::ENTITIES_LOG, c$smtp$current_entity);
delete c$smtp$current_entity;
return;
}
}
event mime_one_header(c: connection, h: mime_header_rec)
{
if ( ! c?$smtp ) return;
if ( h$name == "CONTENT-DISPOSITION" &&
/[fF][iI][lL][eE][nN][aA][mM][eE]/ in h$value )
c$smtp$current_entity$filename = extract_filename_from_content_disposition(h$value);
c$smtp$entity$filename = extract_filename_from_content_disposition(h$value);
if ( h$name == "CONTENT-TYPE" &&
/[nN][aA][mM][eE][:blank:]*=/ in h$value )
c$smtp$current_entity$filename = extract_filename_from_content_disposition(h$value);
c$smtp$entity$filename = extract_filename_from_content_disposition(h$value);
}
event mime_end_entity(c: connection) &priority=5
{
if ( c?$smtp && c$smtp?$entity )
delete c$smtp$entity;
}

View file

@ -1,27 +0,0 @@
@load ./main
@load ./entities
@load base/utils/conn-ids
@load base/frameworks/file-analysis/main
module SMTP;
export {
## Default file handle provider for SMTP.
global get_file_handle: function(c: connection, is_orig: bool): string;
}
function get_file_handle(c: connection, is_orig: bool): string
{
if ( ! c?$smtp ) return "";
return cat(Analyzer::ANALYZER_SMTP, " ", c$start_time, " ", c$smtp$trans_depth, " ",
c$smtp_state$mime_level);
}
module GLOBAL;
event get_file_handle(tag: Analyzer::Tag, c: connection, is_orig: bool)
&priority=5
{
if ( tag != Analyzer::ANALYZER_SMTP ) return;
set_file_handle(SMTP::get_file_handle(c, is_orig));
}

View file

@ -0,0 +1,53 @@
@load ./main
@load ./entities
@load base/utils/conn-ids
@load base/frameworks/files
module SMTP;
export {
redef record Info += {
## An ordered vector of file unique IDs seen attached to
## the message.
fuids: vector of string &log &default=string_vec();
};
## Default file handle provider for SMTP.
global get_file_handle: function(c: connection, is_orig: bool): string;
## Default file describer for SMTP.
global describe_file: function(f: fa_file): string;
}
function get_file_handle(c: connection, is_orig: bool): string
{
return cat(Analyzer::ANALYZER_SMTP, c$start_time, c$smtp$trans_depth,
c$smtp_state$mime_depth);
}
function describe_file(f: fa_file): string
{
# This shouldn't be needed, but just in case...
if ( f$source != "SMTP" )
return "";
for ( cid in f$conns )
{
local c = f$conns[cid];
return SMTP::describe(c$smtp);
}
return "";
}
event bro_init() &priority=5
{
Files::register_protocol(Analyzer::ANALYZER_SMTP,
[$get_file_handle = SMTP::get_file_handle,
$describe = SMTP::describe_file]);
}
event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priority=5
{
if ( c?$smtp )
c$smtp$fuids[|c$smtp$fuids|] = f$id;
}

View file

@ -72,7 +72,10 @@ export {
## ALL_HOSTS - always capture the entire path.
## NO_HOSTS - never capture the path.
const mail_path_capture = ALL_HOSTS &redef;
## Create an extremely shortened representation of a log line.
global describe: function(rec: Info): string;
global log_smtp: event(rec: Info);
}
@ -223,7 +226,10 @@ event mime_one_header(c: connection, h: mime_header_rec) &priority=5
{
if ( ! c$smtp?$to )
c$smtp$to = set();
add c$smtp$to[h$value];
local to_parts = split(h$value, /[[:blank:]]*,[[:blank:]]*/);
for ( i in to_parts )
add c$smtp$to[to_parts[i]];
}
else if ( h$name == "X-ORIGINATING-IP" )
@ -268,3 +274,29 @@ event connection_state_remove(c: connection) &priority=-5
if ( c?$smtp )
smtp_message(c);
}
function describe(rec: Info): string
{
if ( rec?$mailfrom && rec?$rcptto )
{
local one_to = "";
for ( to in rec$rcptto )
{
one_to = to;
break;
}
local abbrev_subject = "";
if ( rec?$subject )
{
if ( |rec$subject| > 20 )
{
abbrev_subject = rec$subject[0:20] + "...";
}
}
return fmt("%s -> %s%s%s", rec$mailfrom, one_to,
(|rec$rcptto|>1 ? fmt(" (plus %d others)", |rec$rcptto|-1) : ""),
(abbrev_subject != "" ? fmt(": %s", abbrev_subject) : ""));
}
return "";
}

View file

@ -0,0 +1,123 @@
##! A module for performing active HTTP requests and
##! getting the reply at runtime.
@load ./exec
module ActiveHTTP;
export {
## The default timeout for HTTP requests.
const default_max_time = 1min &redef;
## The default HTTP method/verb to use for requests.
const default_method = "GET" &redef;
type Response: record {
## Numeric response code from the server.
code: count;
## String response message from the server.
msg: string;
## Full body of the response.
body: string &optional;
## All headers returned by the server.
headers: table[string] of string &optional;
};
type Request: record {
## The URL being requested.
url: string;
## The HTTP method/verb to use for the request.
method: string &default=default_method;
## Data to send to the server in the client body. Keep in
## mind that you will probably need to set the *method* field
## to "POST" or "PUT".
client_data: string &optional;
## Arbitrary headers to pass to the server. Some headers
## will be included by libCurl.
#custom_headers: table[string] of string &optional;
## Timeout for the request.
max_time: interval &default=default_max_time;
## Additional curl command line arguments. Be very careful
## with this option since shell injection could take place
## if careful handling of untrusted data is not applied.
addl_curl_args: string &optional;
};
## Perform an HTTP request according to the :bro:type:`Request` record.
## This is an asynchronous function and must be called within a "when"
## statement.
##
## req: A record instance representing all options for an HTTP request.
##
## Returns: A record with the full response message.
global request: function(req: ActiveHTTP::Request): ActiveHTTP::Response;
}
function request2curl(r: Request, bodyfile: string, headersfile: string): string
{
local cmd = fmt("curl -s -g -o \"%s\" -D \"%s\" -X \"%s\"",
str_shell_escape(bodyfile),
str_shell_escape(headersfile),
str_shell_escape(r$method));
cmd = fmt("%s -m %.0f", cmd, r$max_time);
if ( r?$client_data )
cmd = fmt("%s -d -", cmd);
if ( r?$addl_curl_args )
cmd = fmt("%s %s", cmd, r$addl_curl_args);
cmd = fmt("%s \"%s\"", cmd, str_shell_escape(r$url));
return cmd;
}
function request(req: Request): ActiveHTTP::Response
{
local tmpfile = "/tmp/bro-activehttp-" + unique_id("");
local bodyfile = fmt("%s_body", tmpfile);
local headersfile = fmt("%s_headers", tmpfile);
local cmd = request2curl(req, bodyfile, headersfile);
local stdin_data = req?$client_data ? req$client_data : "";
local resp: Response;
resp$code = 0;
resp$msg = "";
resp$body = "";
resp$headers = table();
return when ( local result = Exec::run([$cmd=cmd, $stdin=stdin_data, $read_files=set(bodyfile, headersfile)]) )
{
# If there is no response line then nothing else will work either.
if ( ! (result?$files && headersfile in result$files) )
{
Reporter::error(fmt("There was a failure when requesting \"%s\" with ActiveHTTP.", req$url));
return resp;
}
local headers = result$files[headersfile];
for ( i in headers )
{
# The reply is the first line.
if ( i == 0 )
{
local response_line = split_n(headers[0], /[[:blank:]]+/, F, 2);
if ( |response_line| != 3 )
return resp;
resp$code = to_count(response_line[2]);
resp$msg = response_line[3];
resp$body = join_string_vec(result$files[bodyfile], "");
}
else
{
local line = headers[i];
local h = split1(line, /:/);
if ( |h| != 2 )
next;
resp$headers[h[1]] = sub_bytes(h[2], 0, |h[2]|-1);
}
}
return resp;
}
}

View file

@ -0,0 +1,66 @@
@load base/utils/exec
@load base/frameworks/reporter
@load base/utils/paths
module Dir;
export {
## The default interval this module checks for files in directories when
## using the :bro:see:`Dir::monitor` function.
const polling_interval = 30sec &redef;
## Register a directory to monitor with a callback that is called
## every time a previously unseen file is seen. If a file is deleted
## and seen to be gone, the file is available for being seen again in
## the future.
##
## dir: The directory to monitor for files.
##
## callback: Callback that gets executed with each file name
## that is found. Filenames are provided with the full path.
##
## poll_interval: An interval at which to check for new files.
global monitor: function(dir: string, callback: function(fname: string),
poll_interval: interval &default=polling_interval);
}
event Dir::monitor_ev(dir: string, last_files: set[string],
callback: function(fname: string),
poll_interval: interval)
{
when ( local result = Exec::run([$cmd=fmt("ls -i \"%s/\"", str_shell_escape(dir))]) )
{
if ( result$exit_code != 0 )
{
Reporter::warning(fmt("Requested monitoring of non-existent directory (%s).", dir));
return;
}
local current_files: set[string] = set();
local files: vector of string = vector();
if ( result?$stdout )
files = result$stdout;
for ( i in files )
{
local parts = split1(files[i], / /);
if ( parts[1] !in last_files )
callback(build_path_compressed(dir, parts[2]));
add current_files[parts[1]];
}
schedule poll_interval
{
Dir::monitor_ev(dir, current_files, callback, poll_interval)
};
}
}
function monitor(dir: string, callback: function(fname: string),
poll_interval: interval &default=polling_interval)
{
event Dir::monitor_ev(dir, set(), callback, poll_interval);
}

185
scripts/base/utils/exec.bro Normal file
View file

@ -0,0 +1,185 @@
##! A module for executing external command line programs.
@load base/frameworks/input
module Exec;
export {
type Command: record {
## The command line to execute. Use care to avoid injection attacks.
## I.e. if the command uses untrusted/variable data, sanitize
## it with str_shell_escape().
cmd: string;
## Provide standard in to the program as a string.
stdin: string &default="";
## If additional files are required to be read in as part of the output
## of the command they can be defined here.
read_files: set[string] &optional;
# The unique id for tracking executors.
uid: string &default=unique_id("");
};
type Result: record {
## Exit code from the program.
exit_code: count &default=0;
## True if the command was terminated with a signal.
signal_exit: bool &default=F;
## Each line of standard out.
stdout: vector of string &optional;
## Each line of standard error.
stderr: vector of string &optional;
## If additional files were requested to be read in
## the content of the files will be available here.
files: table[string] of string_vec &optional;
};
## Function for running command line programs and getting
## output. This is an asynchronous function which is meant
## to be run with the `when` statement.
##
## cmd: The command to run. Use care to avoid injection attacks!
##
## returns: A record representing the full results from the
## external program execution.
global run: function(cmd: Command): Result;
## The system directory for temp files.
const tmp_dir = "/tmp" &redef;
}
# Indexed by command uid.
global results: table[string] of Result;
global pending_commands: set[string];
global pending_files: table[string] of set[string];
type OneLine: record {
s: string;
is_stderr: bool;
};
type FileLine: record {
s: string;
};
event Exec::line(description: Input::EventDescription, tpe: Input::Event, s: string, is_stderr: bool)
{
local result = results[description$name];
if ( is_stderr )
{
if ( ! result?$stderr )
result$stderr = vector(s);
else
result$stderr[|result$stderr|] = s;
}
else
{
if ( ! result?$stdout )
result$stdout = vector(s);
else
result$stdout[|result$stdout|] = s;
}
}
event Exec::file_line(description: Input::EventDescription, tpe: Input::Event, s: string)
{
local parts = split1(description$name, /_/);
local name = parts[1];
local track_file = parts[2];
local result = results[name];
if ( ! result?$files )
result$files = table();
if ( track_file !in result$files )
result$files[track_file] = vector(s);
else
result$files[track_file][|result$files[track_file]|] = s;
}
event Input::end_of_data(name: string, source:string)
{
local parts = split1(name, /_/);
name = parts[1];
if ( name !in pending_commands || |parts| < 2 )
return;
local track_file = parts[2];
Input::remove(name);
if ( name !in pending_files )
delete pending_commands[name];
else
{
delete pending_files[name][track_file];
if ( |pending_files[name]| == 0 )
delete pending_commands[name];
system(fmt("rm \"%s\"", str_shell_escape(track_file)));
}
}
event InputRaw::process_finished(name: string, source:string, exit_code:count, signal_exit:bool)
{
if ( name !in pending_commands )
return;
Input::remove(name);
results[name]$exit_code = exit_code;
results[name]$signal_exit = signal_exit;
if ( name !in pending_files || |pending_files[name]| == 0 )
# No extra files to read, command is done.
delete pending_commands[name];
else
for ( read_file in pending_files[name] )
Input::add_event([$source=fmt("%s", read_file),
$name=fmt("%s_%s", name, read_file),
$reader=Input::READER_RAW,
$want_record=F,
$fields=FileLine,
$ev=Exec::file_line]);
}
function run(cmd: Command): Result
{
add pending_commands[cmd$uid];
results[cmd$uid] = [];
if ( cmd?$read_files )
{
for ( read_file in cmd$read_files )
{
if ( cmd$uid !in pending_files )
pending_files[cmd$uid] = set();
add pending_files[cmd$uid][read_file];
}
}
local config_strings: table[string] of string = {
["stdin"] = cmd$stdin,
["read_stderr"] = "1",
};
Input::add_event([$name=cmd$uid,
$source=fmt("%s |", cmd$cmd),
$reader=Input::READER_RAW,
$fields=Exec::OneLine,
$ev=Exec::line,
$want_record=F,
$config=config_strings]);
return when ( cmd$uid !in pending_commands )
{
local result = results[cmd$uid];
delete results[cmd$uid];
return result;
}
}
event bro_done()
{
# We are punting here and just deleting any unprocessed files.
for ( uid in pending_files )
for ( fname in pending_files[uid] )
system(fmt("rm \"%s\"", str_shell_escape(fname)));
}

View file

@ -6,22 +6,28 @@ function generate_extraction_filename(prefix: string, c: connection, suffix: str
{
local conn_info = fmt("%s:%d-%s:%d", addr_to_uri(c$id$orig_h), c$id$orig_p,
addr_to_uri(c$id$resp_h), c$id$resp_p);
if ( prefix != "" )
conn_info = fmt("%s_%s", prefix, conn_info);
if ( suffix != "" )
conn_info = fmt("%s_%s", conn_info, suffix);
return conn_info;
}
## For CONTENT-DISPOSITION headers, this function can be used to extract
## For CONTENT-DISPOSITION headers, this function can be used to extract
## the filename.
function extract_filename_from_content_disposition(data: string): string
{
local filename = sub(data, /^.*[nN][aA][mM][eE][[:blank:]]*=[[:blank:]]*/, "");
local filename = sub(data, /^.*[nN][aA][mM][eE][[:blank:]]*\*?=[[:blank:]]*/, "");
# Remove quotes around the filename if they are there.
if ( /^\"/ in filename )
filename = split_n(filename, /\"/, F, 2)[2];
return filename;
filename = split_n(filename, /\"/, F, 2)[2];
# Remove the language and encoding if it's there.
if ( /^[a-zA-Z0-9\!#$%&+-^_`{}~]+'[a-zA-Z0-9\!#$%&+-^_`{}~]*'/ in filename )
filename = sub(filename, /^.+'.*'/, "");
return unescape_URI(filename);
}