FileAnalysis: replace script-layer SMTP file analysis.

Notable differences:

- Removed SMTP::MD5 notice.

- Removed ability to specify mime entity excerpt length per mime-type.
This commit is contained in:
Jon Siwek 2013-03-26 15:48:52 -05:00
parent 84a0c2fdac
commit 497496ec83
14 changed files with 224 additions and 206 deletions

View file

@ -85,5 +85,5 @@ hook FileAnalysis::policy(trig: FileAnalysis::Trigger, info: FileAnalysis::Info)
if ( ! info?$source ) return;
if ( info$source != "HTTP" ) return;
FileAnalysis::add_action(info$file_id, [$act=FileAnalysis::ACTION_MD5]);
FileAnalysis::remove_action(info$file_id, [$act=FileAnalysis::ACTION_MD5]);
}

View file

@ -9,44 +9,41 @@ export {
redef record SMTP::EntityInfo += {
## The entity body excerpt.
excerpt: string &log &default="";
## Internal tracking to know how much of the body should be included
## in the excerpt.
excerpt_len: count &optional;
};
## This is the default value for how much of the entity body should be
## included for all MIME entities.
const default_entity_excerpt_len = 0 &redef;
## This table defines how much of various entity bodies should be
## included in excerpts.
const entity_excerpt_len: table[string] of count = {}
&redef
&default = default_entity_excerpt_len;
}
event mime_segment_data(c: connection, length: count, data: string) &priority=-1
hook FileAnalysis::policy(trig: FileAnalysis::Trigger, info: FileAnalysis::Info)
&priority=5
{
if ( ! c?$smtp ) return;
if ( c$smtp$current_entity$content_len == 0 )
c$smtp$current_entity$excerpt_len = entity_excerpt_len[c$smtp$current_entity$mime_type];
if ( trig != FileAnalysis::TRIGGER_NEW ) return;
if ( ! info?$source ) return;
if ( info$source != "SMTP" ) return;
if ( default_entity_excerpt_len > info$bof_buffer_size )
info$bof_buffer_size = default_entity_excerpt_len;
}
event mime_segment_data(c: connection, length: count, data: string) &priority=-2
hook FileAnalysis::policy(trig: FileAnalysis::Trigger, info: FileAnalysis::Info)
&priority=5
{
if ( ! c?$smtp ) return;
local ent = c$smtp$current_entity;
if ( ent$content_len < ent$excerpt_len )
if ( trig != FileAnalysis::TRIGGER_BOF_BUFFER ) return;
if ( ! info?$bof_buffer ) return;
if ( ! info?$source ) return;
if ( info$source != "SMTP" ) return;
if ( ! info?$conns ) return;
for ( cid in info$conns )
{
if ( ent$content_len + length < ent$excerpt_len )
ent$excerpt = cat(ent$excerpt, data);
else
{
local x_bytes = ent$excerpt_len - ent$content_len;
ent$excerpt = cat(ent$excerpt, sub_bytes(data, 1, x_bytes));
}
local c: connection = info$conns[cid];
if ( ! c?$smtp ) next;
if ( default_entity_excerpt_len > 0 )
c$smtp$current_entity$excerpt =
info$bof_buffer[0:default_entity_excerpt_len];
}
}

View file

@ -7,11 +7,6 @@
module SMTP;
export {
redef enum Notice::Type += {
## Indicates that an MD5 sum was calculated for a MIME message.
MD5,
};
redef enum Log::ID += { ENTITIES_LOG };
type EntityInfo: record {
@ -34,15 +29,12 @@ export {
## Optionally calculate the file's MD5 sum. Must be set prior to the
## first data chunk being see in an event.
calc_md5: bool &default=F;
## This boolean value indicates if an MD5 sum is being calculated
## for the current file transfer.
md5_handle: opaque of md5 &optional;
## Optionally write the file to disk. Must be set prior to first
## data chunk being seen in an event.
extract_file: bool &default=F;
## Store the file handle here for the file currently being extracted.
extraction_file: file &log &optional;
extraction_file: string &log &optional;
};
redef record Info += {
@ -77,6 +69,8 @@ export {
global log_mime: event(rec: EntityInfo);
}
global extract_count: count = 0;
event bro_init() &priority=5
{
Log::create_stream(SMTP::ENTITIES_LOG, [$columns=EntityInfo, $ev=log_mime]);
@ -100,74 +94,155 @@ function set_session(c: connection, new_entity: bool)
event mime_begin_entity(c: connection) &priority=10
{
if ( ! c?$smtp ) return;
set_session(c, T);
}
# This has priority -10 because other handlers need to know the current
# content_len before it's updated by this handler.
event mime_segment_data(c: connection, length: count, data: string) &priority=-10
hook FileAnalysis::policy(trig: FileAnalysis::Trigger, info: FileAnalysis::Info)
&priority=5
{
if ( ! c?$smtp ) return;
c$smtp$current_entity$content_len = c$smtp$current_entity$content_len + length;
}
if ( trig != FileAnalysis::TRIGGER_NEW ) return;
if ( ! info?$source ) return;
if ( info$source != "SMTP" ) return;
if ( ! info?$conns ) return;
event mime_segment_data(c: connection, length: count, data: string) &priority=7
{
if ( ! c?$smtp ) return;
if ( c$smtp$current_entity$content_len == 0 )
c$smtp$current_entity$mime_type = split1(identify_data(data, T), /;/)[1];
}
local fname: string = fmt("%s-%s-%d.dat", extraction_prefix, info$file_id,
extract_count);
local extracting: bool = F;
event mime_segment_data(c: connection, length: count, data: string) &priority=-5
{
if ( ! c?$smtp ) return;
if ( c$smtp$current_entity$content_len == 0 )
for ( cid in info$conns )
{
local entity = c$smtp$current_entity;
if ( generate_md5 in entity$mime_type && ! never_calc_md5 )
entity$calc_md5 = T;
local c: connection = info$conns[cid];
if ( entity$calc_md5 )
entity$md5_handle = md5_hash_init();
}
if ( ! c?$smtp ) next;
if ( c$smtp$current_entity?$md5_handle )
md5_hash_update(entity$md5_handle, data);
}
if ( c$smtp$current_entity$extract_file )
{
if ( ! extracting )
{
FileAnalysis::add_action(info$file_id,
[$act=FileAnalysis::ACTION_EXTRACT,
$extract_filename=fname]);
extracting = T;
++extract_count;
}
## In the event of a content gap during the MIME transfer, detect the state for
## the MD5 sum calculation and stop calculating the MD5 since it would be
## incorrect anyway.
event content_gap(c: connection, is_orig: bool, seq: count, length: count) &priority=5
{
if ( is_orig || ! c?$smtp || ! c$smtp?$current_entity ) return;
c$smtp$current_entity$extraction_file = fname;
}
local entity = c$smtp$current_entity;
if ( entity?$md5_handle )
{
md5_hash_finish(entity$md5_handle);
delete entity$md5_handle;
if ( c$smtp$current_entity$calc_md5 )
FileAnalysis::add_action(info$file_id,
[$act=FileAnalysis::ACTION_MD5]);
}
}
event mime_end_entity(c: connection) &priority=-3
{
# TODO: this check is only due to a bug in mime_end_entity that
# causes the event to be generated twice for the same real event.
if ( ! c?$smtp || ! c$smtp?$current_entity )
function check_extract_by_type(info: FileAnalysis::Info)
{
if ( extract_file_types !in info$mime_type ) return;
for ( act in info$actions )
if ( act$act == FileAnalysis::ACTION_EXTRACT ) return;
local fname: string = fmt("%s-%s-%d.dat", extraction_prefix, info$file_id,
extract_count);
++extract_count;
FileAnalysis::add_action(info$file_id, [$act=FileAnalysis::ACTION_EXTRACT,
$extract_filename=fname]);
if ( ! info?$conns ) return;
for ( cid in info$conns )
{
local c: connection = info$conns[cid];
if ( ! c?$smtp ) next;
c$smtp$current_entity$extraction_file = fname;
}
}
function check_md5_by_type(info: FileAnalysis::Info)
{
if ( never_calc_md5 ) return;
if ( generate_md5 !in info$mime_type ) return;
FileAnalysis::add_action(info$file_id, [$act=FileAnalysis::ACTION_MD5]);
}
hook FileAnalysis::policy(trig: FileAnalysis::Trigger, info: FileAnalysis::Info)
&priority=5
{
if ( trig != FileAnalysis::TRIGGER_TYPE ) return;
if ( ! info?$mime_type ) return;
if ( ! info?$source ) return;
if ( info$source != "SMTP" ) return;
if ( info?$conns )
for ( cid in info$conns )
{
local c: connection = info$conns[cid];
if ( ! c?$smtp ) next;
c$smtp$current_entity$mime_type = info$mime_type;
}
check_extract_by_type(info);
check_md5_by_type(info);
}
hook FileAnalysis::policy(trig: FileAnalysis::Trigger, info: FileAnalysis::Info)
&priority=5
{
if ( trig != FileAnalysis::TRIGGER_GAP ) return;
if ( ! info?$source ) return;
if ( info$source != "SMTP" ) return;
if ( ! info?$conns ) return;
for ( cid in info$conns )
{
local c: connection = info$conns[cid];
if ( ! c?$smtp ) next;
if ( ! c$smtp?$current_entity ) next;
FileAnalysis::remove_action(info$file_id,
[$act=FileAnalysis::ACTION_MD5]);
}
}
hook FileAnalysis::policy(trig: FileAnalysis::Trigger, info: FileAnalysis::Info)
&priority=5
{
if ( trig != FileAnalysis::TRIGGER_EOF &&
trig != FileAnalysis::TRIGGER_DONE ) return;
if ( ! info?$source ) return;
if ( info$source != "SMTP" ) return;
if ( ! info?$conns ) return;
for ( cid in info$conns )
{
local c: connection = info$conns[cid];
if ( ! c?$smtp ) next;
if ( ! c$smtp?$current_entity ) next;
# Only log is there was some content.
if ( info$seen_bytes == 0 ) next;
local act: FileAnalysis::ActionArgs = [$act=FileAnalysis::ACTION_MD5];
if ( act in info$actions )
{
local result = info$actions[act];
if ( result?$md5 )
c$smtp$current_entity$md5 = result$md5;
}
c$smtp$current_entity$content_len = info$seen_bytes;
Log::write(SMTP::ENTITIES_LOG, c$smtp$current_entity);
delete c$smtp$current_entity;
return;
local entity = c$smtp$current_entity;
if ( entity?$md5_handle )
{
entity$md5 = md5_hash_finish(entity$md5_handle);
delete entity$md5_handle;
NOTICE([$note=MD5, $msg=fmt("Calculated a hash for a MIME entity from %s", c$id$orig_h),
$sub=entity$md5, $conn=c]);
}
}
@ -179,66 +254,7 @@ event mime_one_header(c: connection, h: mime_header_rec)
/[fF][iI][lL][eE][nN][aA][mM][eE]/ in h$value )
c$smtp$current_entity$filename = extract_filename_from_content_disposition(h$value);
if ( h$name == "CONTENT-TYPE" &&
if ( h$name == "CONTENT-TYPE" &&
/[nN][aA][mM][eE][:blank:]*=/ in h$value )
c$smtp$current_entity$filename = extract_filename_from_content_disposition(h$value);
}
event mime_end_entity(c: connection) &priority=-5
{
if ( ! c?$smtp ) return;
# This check and the delete below are just to cope with a bug where
# mime_end_entity can be generated multiple times for the same event.
if ( ! c$smtp?$current_entity )
return;
# Only log is there was some content.
if ( c$smtp$current_entity$content_len > 0 )
Log::write(SMTP::ENTITIES_LOG, c$smtp$current_entity);
delete c$smtp$current_entity;
}
event mime_segment_data(c: connection, length: count, data: string) &priority=5
{
if ( ! c?$smtp ) return;
if ( extract_file_types in c$smtp$current_entity$mime_type )
c$smtp$current_entity$extract_file = T;
}
event mime_segment_data(c: connection, length: count, data: string) &priority=3
{
if ( ! c?$smtp ) return;
if ( c$smtp$current_entity$extract_file &&
c$smtp$current_entity$content_len == 0 )
{
local suffix = fmt("%d.dat", ++c$smtp_state$num_extracted_files);
local fname = generate_extraction_filename(extraction_prefix, c, suffix);
c$smtp$current_entity$extraction_file = open(fname);
enable_raw_output(c$smtp$current_entity$extraction_file);
}
}
event mime_segment_data(c: connection, length: count, data: string) &priority=-5
{
if ( ! c?$smtp ) return;
if ( c$smtp$current_entity$extract_file && c$smtp$current_entity?$extraction_file )
print c$smtp$current_entity$extraction_file, data;
}
event mime_end_entity(c: connection) &priority=-3
{
if ( ! c?$smtp ) return;
# TODO: this check is only due to a bug in mime_end_entity that
# causes the event to be generated twice for the same real event.
if ( ! c$smtp?$current_entity )
return;
if ( c$smtp$current_entity?$extraction_file )
close(c$smtp$current_entity$extraction_file);
}

View file

@ -1,15 +1,34 @@
@load base/frameworks/intel
@load base/protocols/smtp/file-analysis
@load base/utils/urls
@load ./where-locations
event mime_segment_data(c: connection, length: count, data: string) &priority=3
event intel_mime_data(info: FileAnalysis::Info, data: string)
{
local urls = find_all_urls_without_scheme(data);
for ( url in urls )
if ( ! info?$conns ) return;
for ( cid in info$conns )
{
Intel::seen([$str=url,
$str_type=Intel::URL,
$conn=c,
$where=SMTP::IN_MESSAGE]);
local c: connection = info$conns[cid];
local urls = find_all_urls_without_scheme(data);
for ( url in urls )
{
Intel::seen([$str=url,
$str_type=Intel::URL,
$conn=c,
$where=SMTP::IN_MESSAGE]);
}
}
}
}
hook FileAnalysis::policy(trig: FileAnalysis::Trigger, info: FileAnalysis::Info)
&priority=5
{
if ( trig != FileAnalysis::TRIGGER_NEW ) return;
if ( ! info?$source ) return;
if ( info$source != "SMTP" ) return;
FileAnalysis::add_action(info$file_id,
[$act=FileAnalysis::ACTION_DATA_EVENT,
$stream_event=intel_mime_data]);
}

View file

@ -1033,6 +1033,7 @@ MIME_Mail::~MIME_Mail()
void MIME_Mail::BeginEntity(MIME_Entity* /* entity */)
{
cur_entity_len = 0;
if ( mime_begin_entity )
{
val_list* vl = new val_list;
@ -1130,6 +1131,7 @@ void MIME_Mail::SubmitData(int len, const char* buf)
// is_orig param not available, doesn't matter as long as it's consistent
file_mgr->DataIn(reinterpret_cast<const u_char*>(buf), len,
analyzer->GetTag(), analyzer->Conn(), false);
cur_entity_len += len;
buffer_start = (buf + len) - (char*)data_buffer->Bytes();
}
@ -1202,6 +1204,12 @@ void MIME_Mail::SubmitEvent(int event_type, const char* detail)
}
}
void MIME_Mail::Undelivered(int len)
{
// is_orig param not available, doesn't matter as long as it's consistent
file_mgr->Gap(cur_entity_len, len, analyzer->GetTag(), analyzer->Conn(),
false);
}
int strcasecmp_n(data_chunk_t s, const char* t)
{

View file

@ -238,6 +238,7 @@ public:
int RequestBuffer(int* plen, char** pbuf);
void SubmitAllData();
void SubmitEvent(int event_type, const char* detail);
void Undelivered(int len);
protected:
int min_overlap_length;
@ -252,6 +253,8 @@ protected:
vector<const BroString*> all_content;
BroString* data_buffer;
uint64 cur_entity_len;
};

View file

@ -85,9 +85,13 @@ void SMTP_Analyzer::Undelivered(int seq, int len, bool is_orig)
Unexpected(is_orig, "content gap", buf_len, buf);
if ( state == SMTP_IN_DATA )
{
// Record the SMTP data gap and terminate the
// ongoing mail transaction.
if ( mail )
mail->Undelivered(len);
EndData();
}
if ( line_after_gap )
{

View file

@ -3,8 +3,8 @@
#empty_field (empty)
#unset_field -
#path file_analysis
#open 2013-03-25-19-46-10
#open 2013-03-26-20-26-26
#fields file_id parent_file_id source last_active seen_bytes total_bytes missing_bytes overflow_bytes timeout_interval bof_buffer_size file_type mime_type conn_uids actions_taken extracted_files md5 sha1 sha256
#types string string string time count count count count interval count string string table[string] table[enum] table[string] string string string
Cx92a0ym5R8 - HTTP 1362692527.009775 4705 4705 0 0 120.000000 1024 set set UWkUyAuUGXf FileAnalysis::ACTION_SHA1,FileAnalysis::ACTION_DATA_EVENT,FileAnalysis::ACTION_EXTRACT,FileAnalysis::ACTION_MD5,FileAnalysis::ACTION_SHA256 Cx92a0ym5R8-file 397168fd09991a0e712254df7bc639ac 1dd7ac0398df6cbc0696445a91ec681facf4dc47 4e7c7ef0984119447e743e3ec77e1de52713e345cde03fe7df753a35849bed18
#close 2013-03-25-19-46-10
Cx92a0ym5R8 - HTTP 1362692527.009775 4705 4705 0 0 120.000000 1024 set set UWkUyAuUGXf FileAnalysis::ACTION_SHA1,FileAnalysis::ACTION_EXTRACT,FileAnalysis::ACTION_DATA_EVENT,FileAnalysis::ACTION_MD5,FileAnalysis::ACTION_SHA256 Cx92a0ym5R8-file 397168fd09991a0e712254df7bc639ac 1dd7ac0398df6cbc0696445a91ec681facf4dc47 4e7c7ef0984119447e743e3ec77e1de52713e345cde03fe7df753a35849bed18
#close 2013-03-26-20-26-26

View file

@ -3,10 +3,10 @@
#empty_field (empty)
#unset_field -
#path smtp_entities
#open 2009-10-05-06-06-10
#open 2013-03-26-20-43-14
#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p trans_depth filename content_len mime_type md5 extraction_file excerpt
#types time string addr port addr port count string count string string file string
1254722770.692743 arKYeMETxOg 10.10.1.4 1470 74.53.140.153 25 1 - 79 FAKE_MIME - smtp-entity_10.10.1.4:1470-74.53.140.153:25_1.dat (empty)
1254722770.692743 arKYeMETxOg 10.10.1.4 1470 74.53.140.153 25 1 - 1918 FAKE_MIME - - (empty)
1254722770.692804 arKYeMETxOg 10.10.1.4 1470 74.53.140.153 25 1 NEWS.txt 10823 FAKE_MIME - smtp-entity_10.10.1.4:1470-74.53.140.153:25_2.dat (empty)
#close 2009-10-05-06-06-16
#types time string addr port addr port count string count string string string string
1254722770.692743 arKYeMETxOg 10.10.1.4 1470 74.53.140.153 25 1 - 79 text/plain; charset=us-ascii - smtp-entity-cwR7l6Zctxb-0.dat (empty)
1254722770.692743 arKYeMETxOg 10.10.1.4 1470 74.53.140.153 25 1 - 1918 text/html; charset=us-ascii - - (empty)
1254722770.692804 arKYeMETxOg 10.10.1.4 1470 74.53.140.153 25 1 NEWS.txt 10823 text/plain; charset=us-ascii - smtp-entity-Ltd7QO7jEv3-1.dat (empty)
#close 2013-03-26-20-43-14

View file

@ -3,10 +3,10 @@
#empty_field (empty)
#unset_field -
#path smtp_entities
#open 2009-10-05-06-06-10
#open 2013-03-26-20-39-07
#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p trans_depth filename content_len mime_type md5 extraction_file excerpt
#types time string addr port addr port count string count string string file string
1254722770.692743 arKYeMETxOg 10.10.1.4 1470 74.53.140.153 25 1 - 79 FAKE_MIME 92bca2e6cdcde73647125da7dccbdd07 - (empty)
1254722770.692743 arKYeMETxOg 10.10.1.4 1470 74.53.140.153 25 1 - 1918 FAKE_MIME - - (empty)
1254722770.692804 arKYeMETxOg 10.10.1.4 1470 74.53.140.153 25 1 NEWS.txt 10823 FAKE_MIME a968bb0f9f9d95835b2e74c845877e87 - (empty)
#close 2009-10-05-06-06-16
#types time string addr port addr port count string count string string string string
1254722770.692743 arKYeMETxOg 10.10.1.4 1470 74.53.140.153 25 1 - 79 text/plain; charset=us-ascii 92bca2e6cdcde73647125da7dccbdd07 - (empty)
1254722770.692743 arKYeMETxOg 10.10.1.4 1470 74.53.140.153 25 1 - 1918 text/html; charset=us-ascii - - (empty)
1254722770.692804 arKYeMETxOg 10.10.1.4 1470 74.53.140.153 25 1 NEWS.txt 10823 text/plain; charset=us-ascii a968bb0f9f9d95835b2e74c845877e87 - (empty)
#close 2013-03-26-20-39-07

View file

@ -1,24 +1,11 @@
# @TEST-EXEC: bro -r $TRACES/smtp.trace %INPUT
# @TEST-EXEC: btest-diff smtp_entities.log
# @TEST-EXEC: btest-diff smtp-entity_10.10.1.4:1470-74.53.140.153:25_1.dat
# @TEST-EXEC: btest-diff smtp-entity_10.10.1.4:1470-74.53.140.153:25_2.dat
# @TEST-EXEC: btest-diff smtp-entity-cwR7l6Zctxb-0.dat
# @TEST-EXEC: btest-diff smtp-entity-Ltd7QO7jEv3-1.dat
# @TEST-EXEC: bro -r $TRACES/smtp.trace %INPUT SMTP::extraction_prefix="test"
# @TEST-EXEC: test -e test_10.10.1.4:1470-74.53.140.153:25_1.dat
# @TEST-EXEC: test -e test_10.10.1.4:1470-74.53.140.153:25_2.dat
# @TEST-EXEC: test -e test-cwR7l6Zctxb-0.dat
# @TEST-EXEC: test -e test-Ltd7QO7jEv3-1.dat
@load base/protocols/smtp
redef SMTP::extract_file_types=/text\/plain/;
event bro_init()
{
Log::remove_default_filter(SMTP::ENTITIES_LOG);
Log::add_filter(SMTP::ENTITIES_LOG, [$name="normalized-mime-types",
$pred=function(rec: SMTP::EntityInfo): bool
{
if ( rec?$mime_type )
rec$mime_type = "FAKE_MIME";
return T;
}
]);
}

View file

@ -1,22 +1,6 @@
# Checks logging of mime types and md5 calculation. Mime type in the log
# is normalized to prevent sensitivity to libmagic version.
# @TEST-EXEC: bro -r $TRACES/smtp.trace %INPUT
# @TEST-EXEC: btest-diff smtp_entities.log
@load base/protocols/smtp
redef SMTP::generate_md5=/text\/plain/;
event bro_init()
{
Log::remove_default_filter(SMTP::ENTITIES_LOG);
Log::add_filter(SMTP::ENTITIES_LOG, [$name="normalized-mime-types",
$pred=function(rec: SMTP::EntityInfo): bool
{
if ( rec?$mime_type )
rec$mime_type = "FAKE_MIME";
return T;
}
]);
}