From d89ee3cee0b1ae6d8c186efeb81860c6d41ae30c Mon Sep 17 00:00:00 2001 From: Seth Hall Date: Wed, 15 Jun 2016 10:32:06 -0400 Subject: [PATCH] Change the meaning of some email fields. We now extract email addresses in the fields that one would expect to contain addresses. This makes further downstream processing of these fields easier like log analysis or using these fields in the Intel framework. The primary downside is that any other content in these fields is no longer available such as full name and any group information. I believe the simplification of the content in these fields is worth the change. Added "cc" to the script that feeds information from SMTP into the Intel framework. A new script for email handling utility functions has been created as a side effect of these changes. --- scripts/base/init-default.bro | 1 + scripts/base/protocols/smtp/main.bro | 38 +++++++----- scripts/base/utils/email.bro | 47 ++++++++++++++ scripts/policy/frameworks/intel/seen/smtp.bro | 61 ++++++++----------- .../frameworks/intel/seen/where-locations.bro | 1 + .../canonified_loaded_scripts.log | 5 +- 6 files changed, 100 insertions(+), 53 deletions(-) create mode 100644 scripts/base/utils/email.bro diff --git a/scripts/base/init-default.bro b/scripts/base/init-default.bro index 473d94fc84..5926663535 100644 --- a/scripts/base/init-default.bro +++ b/scripts/base/init-default.bro @@ -10,6 +10,7 @@ @load base/utils/conn-ids @load base/utils/dir @load base/utils/directions-and-hosts +@load base/utils/email @load base/utils/exec @load base/utils/files @load base/utils/numbers diff --git a/scripts/base/protocols/smtp/main.bro b/scripts/base/protocols/smtp/main.bro index 6df9bddb54..2042a1ba16 100644 --- a/scripts/base/protocols/smtp/main.bro +++ b/scripts/base/protocols/smtp/main.bro @@ -1,6 +1,7 @@ @load base/frameworks/notice @load base/utils/addrs @load base/utils/directions-and-hosts +@load base/utils/email module SMTP; @@ -99,7 +100,7 @@ event bro_init() &priority=5 } function find_address_in_smtp_header(header: string): string -{ + { local ips = extract_ip_addresses(header); # If there are more than one IP address found, return the second. if ( |ips| > 1 ) @@ -110,7 +111,7 @@ function find_address_in_smtp_header(header: string): string # Otherwise, there wasn't an IP address found. else return ""; -} + } function new_smtp_log(c: connection): Info { @@ -165,7 +166,11 @@ event smtp_request(c: connection, is_orig: bool, command: string, arg: string) & { if ( ! c$smtp?$rcptto ) c$smtp$rcptto = set(); - add c$smtp$rcptto[split_string1(arg, /:[[:blank:]]*/)[1]]; + local rcptto = extract_email_addrs_set(split_string1(arg, /:[[:blank:]]*/)[1]); + if ( |rcptto| > 0 ) + { + c$smtp$rcptto = rcptto; + } c$smtp$has_client_activity = T; } @@ -175,7 +180,9 @@ event smtp_request(c: connection, is_orig: bool, command: string, arg: string) & smtp_message(c); local partially_done = split_string1(arg, /:[[:blank:]]*/)[1]; - c$smtp$mailfrom = split_string1(partially_done, /[[:blank:]]?/)[0]; + local mailfrom = extract_first_email_addr(split_string1(partially_done, /[[:blank:]]?/)[0]); + if ( mailfrom != "" ) + c$smtp$mailfrom = mailfrom; c$smtp$has_client_activity = T; } } @@ -223,22 +230,25 @@ event mime_one_header(c: connection, h: mime_header_rec) &priority=5 c$smtp$subject = h$value; else if ( h$name == "FROM" ) - c$smtp$from = h$value; + { + local from = extract_first_email_addr(h$value); + if ( from != "" ) + c$smtp$from = from; + } else if ( h$name == "REPLY-TO" ) - c$smtp$reply_to = h$value; + { + local replyto = extract_first_email_addr(h$value); + if ( replyto != "" ) + c$smtp$reply_to = replyto; + } else if ( h$name == "DATE" ) c$smtp$date = h$value; else if ( h$name == "TO" ) { - if ( ! c$smtp?$to ) - c$smtp$to = set(); - - local to_parts = split_string(h$value, /[[:blank:]]*,[[:blank:]]*/); - for ( i in to_parts ) - add c$smtp$to[to_parts[i]]; + c$smtp$to = extract_email_addrs_set(h$value); } else if ( h$name == "CC" ) @@ -308,9 +318,9 @@ function describe(rec: Info): string if ( rec?$mailfrom && rec?$rcptto ) { local one_to = ""; - for ( to in rec$rcptto ) + for ( email in rec$rcptto ) { - one_to = to; + one_to = email; break; } local abbrev_subject = ""; diff --git a/scripts/base/utils/email.bro b/scripts/base/utils/email.bro new file mode 100644 index 0000000000..1d01e85656 --- /dev/null +++ b/scripts/base/utils/email.bro @@ -0,0 +1,47 @@ +## Extract mail addresses out of address specifications conforming to RFC5322. +## +## str: A string potentially containing email addresses. +## +## Returns: A vector of extracted email addresses. An empty vector is returned +## if no email addresses are discovered. +function extract_email_addrs_vec(str: string): string_vec + { + local addrs: vector of string = vector(); + + local raw_addrs = find_all(str, /(^|[<,:[:blank:]])[^<,:[:blank:]@]+"@"[^>,;[:blank:]]+([>,;[:blank:]]|$)/); + for ( raw_addr in raw_addrs ) + addrs[|addrs|] = gsub(raw_addr, /[<>,:;[:blank:]]/, ""); + + return addrs; + } + +## Extract mail addresses out of address specifications conforming to RFC5322. +## +## str: A string potentially containing email addresses. +## +## Returns: A set of extracted email addresses. An empty set is returned +## if no email addresses are discovered. +function extract_email_addrs_set(str: string): set[string] + { + local addrs: set[string] = set(); + + local raw_addrs = find_all(str, /(^|[<,:[:blank:]])[^<,:[:blank:]@]+"@"[^>,;[:blank:]]+([>,;[:blank:]]|$)/); + for ( raw_addr in raw_addrs ) + add addrs[gsub(raw_addr, /[<>,:;[:blank:]]/, "")]; + + return addrs; + } + +## Extract the first email address from a string. +## +## str: A string potentially containing email addresses. +## +## Returns: An email address or empty string if none found. +function extract_first_email_addr(str: string): string + { + local addrs = extract_email_addrs_vec(str); + if ( |addrs| > 0 ) + return addrs[0]; + else + return ""; + } \ No newline at end of file diff --git a/scripts/policy/frameworks/intel/seen/smtp.bro b/scripts/policy/frameworks/intel/seen/smtp.bro index fdcbb62b39..4ea949b43a 100644 --- a/scripts/policy/frameworks/intel/seen/smtp.bro +++ b/scripts/policy/frameworks/intel/seen/smtp.bro @@ -2,19 +2,6 @@ @load base/protocols/smtp @load ./where-locations -# Extract mail addresses out of address specifications conforming RFC 5322 -function extract_mail_addrs(str: string) : set[string] - { - local raw_addrs = find_all(str, /(^|[<,:[:blank:]])[^<,:[:blank:]@]+"@"[^>,;[:blank:]]+([>,;[:blank:]]|$)/); - local addrs: set[string]; - - for ( raw_addr in raw_addrs ) - add addrs[gsub(raw_addr, /[<>,:;[:blank:]]/, "")]; - - return addrs; - } - - event mime_end_entity(c: connection) { if ( c?$smtp ) @@ -43,8 +30,7 @@ event mime_end_entity(c: connection) if ( c$smtp?$mailfrom ) { - local mailfrom_addrs = extract_mail_addrs(c$smtp$mailfrom); - for ( mailfrom_addr in mailfrom_addrs ) + for ( mailfrom_addr in c$smtp$mailfrom ) { Intel::seen([$indicator=mailfrom_addr, $indicator_type=Intel::EMAIL, @@ -55,23 +41,18 @@ event mime_end_entity(c: connection) if ( c$smtp?$rcptto ) { - for ( rcptto in c$smtp$rcptto ) + for ( rcptto_addr in c$smtp$rcptto ) { - local rcptto_addrs = extract_mail_addrs(rcptto); - for ( rcptto_addr in rcptto_addrs ) - { - Intel::seen([$indicator=rcptto_addr, - $indicator_type=Intel::EMAIL, - $conn=c, - $where=SMTP::IN_RCPT_TO]); - } + Intel::seen([$indicator=rcptto_addr, + $indicator_type=Intel::EMAIL, + $conn=c, + $where=SMTP::IN_RCPT_TO]); } } if ( c$smtp?$from ) { - local from_addrs = extract_mail_addrs(c$smtp$from); - for ( from_addr in from_addrs ) + for ( from_addr in c$smtp$from ) { Intel::seen([$indicator=from_addr, $indicator_type=Intel::EMAIL, @@ -82,23 +63,29 @@ event mime_end_entity(c: connection) if ( c$smtp?$to ) { - for ( email_to in c$smtp$to ) + for ( email_to_addr in c$smtp$to ) { - local email_to_addrs = extract_mail_addrs(email_to); - for ( email_to_addr in email_to_addrs ) - { - Intel::seen([$indicator=email_to_addr, - $indicator_type=Intel::EMAIL, - $conn=c, - $where=SMTP::IN_TO]); - } + Intel::seen([$indicator=email_to_addr, + $indicator_type=Intel::EMAIL, + $conn=c, + $where=SMTP::IN_TO]); + } + } + + if ( c$smtp?$cc ) + { + for ( cc_addr in c$smtp$cc ) + { + Intel::seen([$indicator=cc_addr, + $indicator_type=Intel::EMAIL, + $conn=c, + $where=SMTP::IN_CC]); } } if ( c$smtp?$reply_to ) { - local replyto_addrs = extract_mail_addrs(c$smtp$reply_to); - for ( replyto_addr in replyto_addrs ) + for ( replyto_addr in c$smtp$reply_to ) { Intel::seen([$indicator=replyto_addr, $indicator_type=Intel::EMAIL, diff --git a/scripts/policy/frameworks/intel/seen/where-locations.bro b/scripts/policy/frameworks/intel/seen/where-locations.bro index f286cc2ff7..59a89b0eb2 100644 --- a/scripts/policy/frameworks/intel/seen/where-locations.bro +++ b/scripts/policy/frameworks/intel/seen/where-locations.bro @@ -17,6 +17,7 @@ export { SMTP::IN_RCPT_TO, SMTP::IN_FROM, SMTP::IN_TO, + SMTP::IN_CC, SMTP::IN_RECEIVED_HEADER, SMTP::IN_REPLY_TO, SMTP::IN_X_ORIGINATING_IP_HEADER, diff --git a/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log b/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log index 6a240c88ad..dcb3ce4b03 100644 --- a/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log +++ b/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log @@ -3,7 +3,7 @@ #empty_field (empty) #unset_field - #path loaded_scripts -#open 2015-08-31-05-07-15 +#open 2016-06-15-14-17-00 #fields name #types string scripts/base/init-bare.bro @@ -136,6 +136,7 @@ scripts/base/init-default.bro scripts/base/frameworks/reporter/main.bro scripts/base/utils/paths.bro scripts/base/utils/directions-and-hosts.bro + scripts/base/utils/email.bro scripts/base/utils/files.bro scripts/base/utils/numbers.bro scripts/base/utils/queue.bro @@ -273,4 +274,4 @@ scripts/base/init-default.bro scripts/base/misc/find-checksum-offloading.bro scripts/base/misc/find-filtered-trace.bro scripts/policy/misc/loaded-scripts.bro -#close 2015-08-31-05-07-15 +#close 2016-06-15-14-17-01