Merge remote-tracking branch 'origin/topic/jsiwek/smtp-refactor'

- While updating, I did some further work on the branch.

- New function in the base/utils/files for extracting filenames
  from content-dispositions.

- New script for entity excerpt extraction if you aren't interested
  in full extraction.  The data goes a log field too.

- Some renaming and reorganization of types.

- Updated tests to work with new code.

* origin/topic/jsiwek/smtp-refactor:
  Make the doc.coverage test happy.
  SMTP script refactor. (addresses #509)

Conflicts:
	doc/scripts/DocSourcesList.cmake
	policy/protocols/smtp/__load__.bro
	policy/protocols/smtp/base/__load__.bro
This commit is contained in:
Seth Hall 2011-08-10 13:34:31 -04:00
commit adc486c673
17 changed files with 720 additions and 141 deletions

View file

@ -16,11 +16,13 @@ export {
extract_file: bool &default=F;
## Store the file handle here for the file currently being extracted.
extraction_file: file &optional;
extraction_file: file &log &optional;
};
redef record State += {
## Store a count of the number of files that have been transferred in
## this conversation to create unique file names on disk.
num_extracted_files: count &optional;
num_extracted_files: count &default=0;
};
}
@ -34,7 +36,7 @@ event mime_segment_data(c: connection, length: count, data: string) &priority=3
{
if ( c$mime$extract_file && c$mime$content_len == 0 )
{
local suffix = fmt("%d.dat", ++c$mime$num_extracted_files);
local suffix = fmt("%d.dat", ++c$mime_state$num_extracted_files);
local fname = generate_extraction_filename(extraction_prefix, c, suffix);
c$mime$extraction_file = open(fname);
enable_raw_output(c$mime$extraction_file);
@ -57,4 +59,4 @@ event mime_end_entity(c: connection) &priority=-3
if ( c$mime?$extraction_file )
close(c$mime$extraction_file);
}

View file

@ -1 +1,3 @@
@load ./main
@load ./entities
@load ./entities-excerpt

View file

@ -0,0 +1,52 @@
##! This script is for optionally adding a body excerpt to the SMTP
##! entities log.
@load ./entities
module SMTP;
export {
redef record SMTP::EntityInfo += {
## The entity body excerpt.
excerpt: string &log &default="";
## Internal tracking to know how much of the body should be included
## in the excerpt.
excerpt_len: count &optional;
};
## This is the default value for how much of the entity body should be
## included for all MIME entities.
const default_entity_excerpt_len = 0 &redef;
## This table defines how much of various entity bodies should be
## included in excerpts.
const entity_excerpt_len: table[string] of count = {}
&redef
&default = default_entity_excerpt_len;
}
event mime_segment_data(c: connection, length: count, data: string) &priority=-1
{
if ( ! c?$smtp ) return;
if ( c$smtp$current_entity$content_len == 0 )
c$smtp$current_entity$excerpt_len = entity_excerpt_len[c$smtp$current_entity$mime_type];
}
event mime_segment_data(c: connection, length: count, data: string) &priority=-2
{
if ( ! c?$smtp ) return;
local ent = c$smtp$current_entity;
if ( ent$content_len < ent$excerpt_len )
{
if ( ent$content_len + length < ent$excerpt_len )
ent$excerpt = cat(ent$excerpt, data);
else
{
local x_bytes = ent$excerpt_len - ent$content_len;
ent$excerpt = cat(ent$excerpt, sub_bytes(data, 1, x_bytes));
}
}
}

View file

@ -0,0 +1,234 @@
##! Analysis and logging for MIME entities found in SMTP sessions.
@load base/utils/strings
@load base/utils/files
@load ./main
module SMTP;
export {
redef enum Notice::Type += {
## Indicates that an MD5 sum was calculated for a MIME message.
MD5,
};
redef enum Log::ID += { SMTP_ENTITIES };
type EntityInfo: record {
## This is the timestamp of when the MIME content transfer began.
ts: time &log;
uid: string &log;
id: conn_id &log;
## Internally generated "message id" that ties back to the particular
## message in the SMTP log where this entity was seen.
mid: string &log;
## The filename seen in the Content-Disposition header.
filename: string &log &optional;
## Track how many bytes of the MIME encoded file have been seen.
content_len: count &log &default=0;
## The mime type of the entity discovered through magic bytes identification.
mime_type: string &log &optional;
## The calculated MD5 sum for the MIME entity.
md5: string &log &optional;
## Optionally calculate the file's MD5 sum. Must be set prior to the
## first data chunk being see in an event.
calc_md5: bool &default=F;
## This boolean value indicates if an MD5 sum is being calculated
## for the current file transfer.
calculating_md5: bool &default=F;
## Optionally write the file to disk. Must be set prior to first
## data chunk being seen in an event.
extract_file: bool &default=F;
## Store the file handle here for the file currently being extracted.
extraction_file: file &log &optional;
};
redef record Info += {
## The in-progress entity information.
current_entity: EntityInfo &optional;
};
redef record State += {
## Store a count of the number of files that have been transferred in
## a conversation to create unique file names on disk.
num_extracted_files: count &default=0;
## Track the number of MIME encoded files transferred during a session.
mime_level: count &default=0;
};
## Generate MD5 sums for these filetypes.
const generate_md5 = /application\/x-dosexec/ # Windows and DOS executables
| /application\/x-executable/ # *NIX executable binary
&redef;
## Pattern of file mime types to extract from MIME bodies.
const extract_file_types = /NO_DEFAULT/ &redef;
## The on-disk prefix for files to be extracted from MIME entity bodies.
const extraction_prefix = "smtp-entity" &redef;
global log_mime: event(rec: EntityInfo);
}
event bro_init() &priority=5
{
Log::create_stream(SMTP_ENTITIES, [$columns=EntityInfo, $ev=log_mime]);
}
function set_session(c: connection, new_entity: bool)
{
if ( ! c$smtp?$current_entity || new_entity )
{
local info: EntityInfo;
info$ts=network_time();
info$uid=c$uid;
info$id=c$id;
info$mid=c$smtp$mid;
c$smtp$current_entity = info;
++c$smtp_state$mime_level;
}
}
event mime_begin_entity(c: connection) &priority=10
{
if ( ! c?$smtp ) return;
set_session(c, T);
}
# This has priority -10 because other handlers need to know the current
# content_len before it's updated by this handler.
event mime_segment_data(c: connection, length: count, data: string) &priority=-10
{
if ( ! c?$smtp ) return;
c$smtp$current_entity$content_len = c$smtp$current_entity$content_len + length;
}
event mime_segment_data(c: connection, length: count, data: string) &priority=7
{
if ( ! c?$smtp ) return;
if ( c$smtp$current_entity$content_len == 0 )
c$smtp$current_entity$mime_type = split1(identify_data(data, T), /;/)[1];
}
event mime_segment_data(c: connection, length: count, data: string) &priority=-5
{
if ( ! c?$smtp ) return;
if ( c$smtp$current_entity$content_len == 0 )
{
if ( generate_md5 in c$smtp$current_entity$mime_type )
c$smtp$current_entity$calc_md5 = T;
if ( c$smtp$current_entity$calc_md5 )
{
c$smtp$current_entity$calculating_md5 = T;
md5_hash_init(c$id);
}
}
if ( c$smtp$current_entity$calculating_md5 )
md5_hash_update(c$id, data);
}
## In the event of a content gap during the MIME transfer, detect the state for
## the MD5 sum calculation and stop calculating the MD5 since it would be
## incorrect anyway.
event content_gap(c: connection, is_orig: bool, seq: count, length: count) &priority=5
{
if ( is_orig || ! c?$smtp || ! c$smtp?$current_entity ) return;
if ( c$smtp$current_entity$calculating_md5 )
{
c$smtp$current_entity$calculating_md5 = F;
md5_hash_finish(c$id);
}
}
event mime_end_entity(c: connection) &priority=-3
{
# TODO: this check is only due to a bug in mime_end_entity that
# causes the event to be generated twice for the same real event.
if ( ! c?$smtp || ! c$smtp?$current_entity )
return;
if ( c$smtp$current_entity$calculating_md5 )
{
c$smtp$current_entity$md5 = md5_hash_finish(c$id);
NOTICE([$note=MD5, $msg=fmt("Calculated a hash for a MIME entity from %s", c$id$orig_h),
$sub=c$smtp$current_entity$md5, $conn=c]);
}
}
event mime_one_header(c: connection, h: mime_header_rec)
{
if ( ! c?$smtp ) return;
if ( h$name == "CONTENT-DISPOSITION" &&
/[fF][iI][lL][eE][nN][aA][mM][eE]/ in h$value )
c$smtp$current_entity$filename = extract_filename_from_content_disposition(h$value);
}
event mime_end_entity(c: connection) &priority=-5
{
if ( ! c?$smtp ) return;
# This check and the delete below are just to cope with a bug where
# mime_end_entity can be generated multiple times for the same event.
if ( ! c$smtp?$current_entity )
return;
# Only log is there was some content.
if ( c$smtp$current_entity$content_len > 0 )
Log::write(SMTP_ENTITIES, c$smtp$current_entity);
delete c$smtp$current_entity;
}
event mime_segment_data(c: connection, length: count, data: string) &priority=5
{
if ( ! c?$smtp ) return;
if ( extract_file_types in c$smtp$current_entity$mime_type )
c$smtp$current_entity$extract_file = T;
}
event mime_segment_data(c: connection, length: count, data: string) &priority=3
{
if ( ! c?$smtp ) return;
if ( c$smtp$current_entity$extract_file &&
c$smtp$current_entity$content_len == 0 )
{
local suffix = fmt("%d.dat", ++c$smtp_state$num_extracted_files);
local fname = generate_extraction_filename(extraction_prefix, c, suffix);
c$smtp$current_entity$extraction_file = open(fname);
enable_raw_output(c$smtp$current_entity$extraction_file);
}
}
event mime_segment_data(c: connection, length: count, data: string) &priority=-5
{
if ( ! c?$smtp ) return;
if ( c$smtp$current_entity$extract_file && c$smtp$current_entity?$extraction_file )
print c$smtp$current_entity$extraction_file, data;
}
event mime_end_entity(c: connection) &priority=-3
{
if ( ! c?$smtp ) return;
# TODO: this check is only due to a bug in mime_end_entity that
# causes the event to be generated twice for the same real event.
if ( ! c$smtp?$current_entity )
return;
if ( c$smtp$current_entity?$extraction_file )
close(c$smtp$current_entity$extraction_file);
}

View file

@ -15,6 +15,10 @@ export {
ts: time &log;
uid: string &log;
id: conn_id &log;
## This is an internally generated "message id" that can be used to
## map between SMTP messages and MIME entities in the SMTP entities
## log.
mid: string &log;
helo: string &log &optional;
mailfrom: string &log &optional;
rcptto: set[string] &log &optional;
@ -30,19 +34,13 @@ export {
second_received: string &log &optional;
## The last message the server sent to the client.
last_reply: string &log &optional;
files: set[string] &log &optional;
path: vector of addr &log &optional;
user_agent: string &log &optional;
## Indicate if this session is currently transmitting SMTP message
## envelope headers.
in_headers: bool &default=F;
## Indicate if the "Received: from" headers should still be processed.
process_received_from: bool &default=T;
## Maintain the current header for cases where there is header wrapping.
current_header: string &default="";
## Indicate when the message is logged and no longer applicable.
done: bool &default=F;
## Indicates if client activity has been seen, but not yet logged
has_client_activity: bool &default=F;
};
type State: record {
@ -121,6 +119,7 @@ function new_smtp_log(c: connection): Info
l$ts=network_time();
l$uid=c$uid;
l$id=c$id;
l$mid=unique_id("@");
if ( c?$smtp_state && c$smtp_state?$helo )
l$helo = c$smtp_state$helo;
@ -136,26 +135,23 @@ function set_smtp_session(c: connection)
if ( ! c?$smtp_state )
c$smtp_state = [];
if ( ! c?$smtp || c$smtp$done )
{
if ( ! c?$smtp )
c$smtp = new_smtp_log(c);
}
}
function smtp_message(c: connection)
{
Log::write(SMTP, c$smtp);
c$smtp$done = T;
# Track the number of messages seen in this session.
++c$smtp_state$messages_transferred;
if ( c$smtp$has_client_activity )
Log::write(SMTP, c$smtp);
}
event smtp_request(c: connection, is_orig: bool, command: string, arg: string) &priority=5
{
set_smtp_session(c);
local upper_command = to_upper(command);
if ( upper_command != "QUIT" )
c$smtp$has_client_activity = T;
if ( upper_command == "HELO" || upper_command == "EHLO" )
{
@ -172,26 +168,11 @@ event smtp_request(c: connection, is_orig: bool, command: string, arg: string) &
else if ( upper_command == "MAIL" && /^[fF][rR][oO][mM]:/ in arg )
{
# In case this is not the first message in a session we want to
# essentially write out a log, clear the session tracking, and begin
# new session tracking.
if ( c$smtp_state$messages_transferred > 0 )
{
smtp_message(c);
set_smtp_session(c);
}
local partially_done = split1(arg, /:[[:blank:]]*/)[2];
c$smtp$mailfrom = split1(partially_done, /[[:blank:]]?/)[1];
}
else if ( upper_command == "DATA" )
{
c$smtp$in_headers = T;
}
}
event smtp_reply(c: connection, is_orig: bool, code: count, cmd: string,
msg: string, cont_resp: bool) &priority=5
{
@ -199,10 +180,10 @@ event smtp_reply(c: connection, is_orig: bool, code: count, cmd: string,
# This continually overwrites, but we want the last reply,
# so this actually works fine.
c$smtp$last_reply = fmt("%d %s", code, msg);
if ( code != 421 && code >= 400 )
{
c$smtp$last_reply = fmt("%d %s", code, msg);
# Raise a notice when an SMTP error about a block list is discovered.
if ( bl_error_messages in msg )
{
@ -223,145 +204,95 @@ event smtp_reply(c: connection, is_orig: bool, code: count, cmd: string,
}
}
event smtp_data(c: connection, is_orig: bool, data: string) &priority=5
event smtp_reply(c: connection, is_orig: bool, code: count, cmd: string,
msg: string, cont_resp: bool) &priority=-5
{
# Is there something we should be handling from the server?
if ( ! is_orig ) return;
set_smtp_session(c);
if ( ! c$smtp$in_headers )
if ( cmd == "." )
{
if ( /^[cC][oO][nN][tT][eE][nN][tT]-[dD][iI][sS].*[fF][iI][lL][eE][nN][aA][mM][eE]/ in data )
{
if ( ! c$smtp?$files )
c$smtp$files = set();
data = sub(data, /^.*[fF][iI][lL][eE][nN][aA][mM][eE]=/, "");
add c$smtp$files[data];
}
return;
# Track the number of messages seen in this session.
++c$smtp_state$messages_transferred;
smtp_message(c);
c$smtp = new_smtp_log(c);
}
}
if ( /^[[:blank:]]*$/ in data )
c$smtp$in_headers = F;
event mime_one_header(c: connection, h: mime_header_rec) &priority=5
{
if ( ! c?$smtp ) return;
c$smtp$has_client_activity = T;
# This is to reconstruct headers that tend to wrap around.
if ( /^[[:blank:]]/ in data )
{
# Remove all but a single space at the beginning (this seems to follow
# the most common behavior).
data = sub(data, /^[[:blank:]]*/, " ");
if ( c$smtp$current_header == "MESSAGE-ID" )
c$smtp$msg_id += data;
else if ( c$smtp$current_header == "RECEIVED" )
c$smtp$first_received += data;
else if ( c$smtp$current_header == "IN-REPLY-TO" )
c$smtp$in_reply_to += data;
else if ( c$smtp$current_header == "SUBJECCT" )
c$smtp$subject += data;
else if ( c$smtp$current_header == "FROM" )
c$smtp$from += data;
else if ( c$smtp$current_header == "REPLY-TO" )
c$smtp$reply_to += data;
else if ( c$smtp$current_header == "USER-AGENT" )
c$smtp$user_agent += data;
return;
}
# Once there isn't a line starting with a blank, we're not continuing a
# header anymore.
c$smtp$current_header = "";
local header_parts = split1(data, /:[[:blank:]]*/);
# TODO: do something in this case? This would definitely be odd.
# Header wrapping needs to be handled more elegantly. This will happen
# if the header value is wrapped immediately after the header key.
if ( |header_parts| != 2 )
return;
local header_key = to_upper(header_parts[1]);
c$smtp$current_header = header_key;
local header_val = header_parts[2];
if ( header_key == "MESSAGE-ID" )
c$smtp$msg_id = header_val;
else if ( header_key == "RECEIVED" )
if ( h$name == "MESSAGE-ID" )
c$smtp$msg_id = h$value;
else if ( h$name == "RECEIVED" )
{
if ( c$smtp?$first_received )
c$smtp$second_received = c$smtp$first_received;
c$smtp$first_received = header_val;
c$smtp$first_received = h$value;
}
else if ( header_key == "IN-REPLY-TO" )
c$smtp$in_reply_to = header_val;
else if ( header_key == "DATE" )
c$smtp$date = header_val;
else if ( header_key == "FROM" )
c$smtp$from = header_val;
else if ( header_key == "TO" )
else if ( h$name == "IN-REPLY-TO" )
c$smtp$in_reply_to = h$value;
else if ( h$name == "SUBJECT" )
c$smtp$subject = h$value;
else if ( h$name == "FROM" )
c$smtp$from = h$value;
else if ( h$name == "REPLY-TO" )
c$smtp$reply_to = h$value;
else if ( h$name == "DATE" )
c$smtp$date = h$value;
else if ( h$name == "TO" )
{
if ( ! c$smtp?$to )
c$smtp$to = set();
add c$smtp$to[header_val];
add c$smtp$to[h$value];
}
else if ( header_key == "REPLY-TO" )
c$smtp$reply_to = header_val;
else if ( header_key == "SUBJECT" )
c$smtp$subject = header_val;
else if ( header_key == "X-ORIGINATING-IP" )
else if ( h$name == "X-ORIGINATING-IP" )
{
local addresses = find_ip_addresses(header_val);
local addresses = find_ip_addresses(h$name);
if ( 1 in addresses )
c$smtp$x_originating_ip = to_addr(addresses[1]);
}
else if ( header_key == "X-MAILER" ||
header_key == "USER-AGENT" ||
header_key == "X-USER-AGENT" )
{
c$smtp$user_agent = header_val;
# Explicitly set the current header here because there are several
# headers bulked under this same key.
c$smtp$current_header = "USER-AGENT";
}
else if ( h$name == "X-MAILER" ||
h$name == "USER-AGENT" ||
h$name == "X-USER-AGENT" )
c$smtp$user_agent = h$value;
}
# This event handler builds the "Received From" path by reading the
# headers in the mail
event smtp_data(c: connection, is_orig: bool, data: string) &priority=3
event mime_one_header(c: connection, h: mime_header_rec) &priority=3
{
# If we've decided that we're done watching the received headers for
# whatever reason, we're done. Could be due to only watching until
# local addresses are seen in the received from headers.
if ( c$smtp$current_header != "RECEIVED" ||
! c$smtp$process_received_from )
if ( ! c?$smtp || h$name != "RECEIVED" || ! c$smtp$process_received_from)
return;
local text_ip = find_address_in_smtp_header(data);
local text_ip = find_address_in_smtp_header(h$value);
if ( text_ip == "" )
return;
local ip = to_addr(text_ip);
if ( ! addr_matches_host(ip, mail_path_capture) &&
! Site::is_private_addr(ip) )
{
c$smtp$process_received_from = F;
}
if ( c$smtp$path[|c$smtp$path|-1] != ip )
c$smtp$path[|c$smtp$path|] = ip;
}
event connection_state_remove(c: connection) &priority=-5
{
if ( c?$smtp && ! c$smtp$done )
if ( c?$smtp )
smtp_message(c);
}

View file

@ -12,4 +12,15 @@ function generate_extraction_filename(prefix: string, c: connection, suffix: str
conn_info = fmt("%s_%s", conn_info, suffix);
return conn_info;
}
}
## For CONTENT-DISPOSITION headers, this function can be used to extract
## the filename.
function extract_filename_from_content_disposition(data: string): string
{
local filename = sub(data, /^.*[fF][iI][lL][eE][nN][aA][mM][eE]=/, "");
# Remove quotes around the filename if they are there.
if ( /^\"/ in filename )
filename = split_n(filename, /\"/, F, 2)[2];
return filename;
}

View file

@ -43,10 +43,10 @@ export {
| /ZimbraWebClient/ &redef;
}
event smtp_data(c: connection, is_orig: bool, data: string) &priority=4
event mime_one_header(c: connection, h: mime_header_rec) &priority=4
{
if ( c$smtp$current_header == "USER-AGENT" &&
webmail_user_agents in c$smtp$user_agent )
if ( ! c?$smtp ) return;
if ( h$name == "USER-AGENT" && webmail_user_agents in c$smtp$user_agent )
c$smtp$is_webmail = T;
}

View file

@ -43,4 +43,12 @@ redef signature_files += "frameworks/signatures/detect-windows-shells.sig";
@load protocols/ssl/known-certs
# Load the script to enable SSL/TLS certificate validation.
@load protocols/ssl/validate-certs
@load protocols/ssl/validate-certs
# Uncomment this redef if you want to extract SMTP MIME entities for
# some file types. The numbers given indicate how many bytes to extract for
# the various mime types.
redef SMTP::entity_excerpt_len += {
# ["text/plain"] = 1024,
# ["text/html"] = 1024,
};