diff --git a/scripts/base/init-default.bro b/scripts/base/init-default.bro index 473d94fc84..5926663535 100644 --- a/scripts/base/init-default.bro +++ b/scripts/base/init-default.bro @@ -10,6 +10,7 @@ @load base/utils/conn-ids @load base/utils/dir @load base/utils/directions-and-hosts +@load base/utils/email @load base/utils/exec @load base/utils/files @load base/utils/numbers diff --git a/scripts/base/protocols/smtp/main.bro b/scripts/base/protocols/smtp/main.bro index 6df9bddb54..2042a1ba16 100644 --- a/scripts/base/protocols/smtp/main.bro +++ b/scripts/base/protocols/smtp/main.bro @@ -1,6 +1,7 @@ @load base/frameworks/notice @load base/utils/addrs @load base/utils/directions-and-hosts +@load base/utils/email module SMTP; @@ -99,7 +100,7 @@ event bro_init() &priority=5 } function find_address_in_smtp_header(header: string): string -{ + { local ips = extract_ip_addresses(header); # If there are more than one IP address found, return the second. if ( |ips| > 1 ) @@ -110,7 +111,7 @@ function find_address_in_smtp_header(header: string): string # Otherwise, there wasn't an IP address found. else return ""; -} + } function new_smtp_log(c: connection): Info { @@ -165,7 +166,11 @@ event smtp_request(c: connection, is_orig: bool, command: string, arg: string) & { if ( ! c$smtp?$rcptto ) c$smtp$rcptto = set(); - add c$smtp$rcptto[split_string1(arg, /:[[:blank:]]*/)[1]]; + local rcptto = extract_email_addrs_set(split_string1(arg, /:[[:blank:]]*/)[1]); + if ( |rcptto| > 0 ) + { + c$smtp$rcptto = rcptto; + } c$smtp$has_client_activity = T; } @@ -175,7 +180,9 @@ event smtp_request(c: connection, is_orig: bool, command: string, arg: string) & smtp_message(c); local partially_done = split_string1(arg, /:[[:blank:]]*/)[1]; - c$smtp$mailfrom = split_string1(partially_done, /[[:blank:]]?/)[0]; + local mailfrom = extract_first_email_addr(split_string1(partially_done, /[[:blank:]]?/)[0]); + if ( mailfrom != "" ) + c$smtp$mailfrom = mailfrom; c$smtp$has_client_activity = T; } } @@ -223,22 +230,25 @@ event mime_one_header(c: connection, h: mime_header_rec) &priority=5 c$smtp$subject = h$value; else if ( h$name == "FROM" ) - c$smtp$from = h$value; + { + local from = extract_first_email_addr(h$value); + if ( from != "" ) + c$smtp$from = from; + } else if ( h$name == "REPLY-TO" ) - c$smtp$reply_to = h$value; + { + local replyto = extract_first_email_addr(h$value); + if ( replyto != "" ) + c$smtp$reply_to = replyto; + } else if ( h$name == "DATE" ) c$smtp$date = h$value; else if ( h$name == "TO" ) { - if ( ! c$smtp?$to ) - c$smtp$to = set(); - - local to_parts = split_string(h$value, /[[:blank:]]*,[[:blank:]]*/); - for ( i in to_parts ) - add c$smtp$to[to_parts[i]]; + c$smtp$to = extract_email_addrs_set(h$value); } else if ( h$name == "CC" ) @@ -308,9 +318,9 @@ function describe(rec: Info): string if ( rec?$mailfrom && rec?$rcptto ) { local one_to = ""; - for ( to in rec$rcptto ) + for ( email in rec$rcptto ) { - one_to = to; + one_to = email; break; } local abbrev_subject = ""; diff --git a/scripts/base/utils/email.bro b/scripts/base/utils/email.bro new file mode 100644 index 0000000000..1d01e85656 --- /dev/null +++ b/scripts/base/utils/email.bro @@ -0,0 +1,47 @@ +## Extract mail addresses out of address specifications conforming to RFC5322. +## +## str: A string potentially containing email addresses. +## +## Returns: A vector of extracted email addresses. An empty vector is returned +## if no email addresses are discovered. +function extract_email_addrs_vec(str: string): string_vec + { + local addrs: vector of string = vector(); + + local raw_addrs = find_all(str, /(^|[<,:[:blank:]])[^<,:[:blank:]@]+"@"[^>,;[:blank:]]+([>,;[:blank:]]|$)/); + for ( raw_addr in raw_addrs ) + addrs[|addrs|] = gsub(raw_addr, /[<>,:;[:blank:]]/, ""); + + return addrs; + } + +## Extract mail addresses out of address specifications conforming to RFC5322. +## +## str: A string potentially containing email addresses. +## +## Returns: A set of extracted email addresses. An empty set is returned +## if no email addresses are discovered. +function extract_email_addrs_set(str: string): set[string] + { + local addrs: set[string] = set(); + + local raw_addrs = find_all(str, /(^|[<,:[:blank:]])[^<,:[:blank:]@]+"@"[^>,;[:blank:]]+([>,;[:blank:]]|$)/); + for ( raw_addr in raw_addrs ) + add addrs[gsub(raw_addr, /[<>,:;[:blank:]]/, "")]; + + return addrs; + } + +## Extract the first email address from a string. +## +## str: A string potentially containing email addresses. +## +## Returns: An email address or empty string if none found. +function extract_first_email_addr(str: string): string + { + local addrs = extract_email_addrs_vec(str); + if ( |addrs| > 0 ) + return addrs[0]; + else + return ""; + } \ No newline at end of file diff --git a/scripts/policy/frameworks/intel/seen/smtp.bro b/scripts/policy/frameworks/intel/seen/smtp.bro index fdcbb62b39..4ea949b43a 100644 --- a/scripts/policy/frameworks/intel/seen/smtp.bro +++ b/scripts/policy/frameworks/intel/seen/smtp.bro @@ -2,19 +2,6 @@ @load base/protocols/smtp @load ./where-locations -# Extract mail addresses out of address specifications conforming RFC 5322 -function extract_mail_addrs(str: string) : set[string] - { - local raw_addrs = find_all(str, /(^|[<,:[:blank:]])[^<,:[:blank:]@]+"@"[^>,;[:blank:]]+([>,;[:blank:]]|$)/); - local addrs: set[string]; - - for ( raw_addr in raw_addrs ) - add addrs[gsub(raw_addr, /[<>,:;[:blank:]]/, "")]; - - return addrs; - } - - event mime_end_entity(c: connection) { if ( c?$smtp ) @@ -43,8 +30,7 @@ event mime_end_entity(c: connection) if ( c$smtp?$mailfrom ) { - local mailfrom_addrs = extract_mail_addrs(c$smtp$mailfrom); - for ( mailfrom_addr in mailfrom_addrs ) + for ( mailfrom_addr in c$smtp$mailfrom ) { Intel::seen([$indicator=mailfrom_addr, $indicator_type=Intel::EMAIL, @@ -55,23 +41,18 @@ event mime_end_entity(c: connection) if ( c$smtp?$rcptto ) { - for ( rcptto in c$smtp$rcptto ) + for ( rcptto_addr in c$smtp$rcptto ) { - local rcptto_addrs = extract_mail_addrs(rcptto); - for ( rcptto_addr in rcptto_addrs ) - { - Intel::seen([$indicator=rcptto_addr, - $indicator_type=Intel::EMAIL, - $conn=c, - $where=SMTP::IN_RCPT_TO]); - } + Intel::seen([$indicator=rcptto_addr, + $indicator_type=Intel::EMAIL, + $conn=c, + $where=SMTP::IN_RCPT_TO]); } } if ( c$smtp?$from ) { - local from_addrs = extract_mail_addrs(c$smtp$from); - for ( from_addr in from_addrs ) + for ( from_addr in c$smtp$from ) { Intel::seen([$indicator=from_addr, $indicator_type=Intel::EMAIL, @@ -82,23 +63,29 @@ event mime_end_entity(c: connection) if ( c$smtp?$to ) { - for ( email_to in c$smtp$to ) + for ( email_to_addr in c$smtp$to ) { - local email_to_addrs = extract_mail_addrs(email_to); - for ( email_to_addr in email_to_addrs ) - { - Intel::seen([$indicator=email_to_addr, - $indicator_type=Intel::EMAIL, - $conn=c, - $where=SMTP::IN_TO]); - } + Intel::seen([$indicator=email_to_addr, + $indicator_type=Intel::EMAIL, + $conn=c, + $where=SMTP::IN_TO]); + } + } + + if ( c$smtp?$cc ) + { + for ( cc_addr in c$smtp$cc ) + { + Intel::seen([$indicator=cc_addr, + $indicator_type=Intel::EMAIL, + $conn=c, + $where=SMTP::IN_CC]); } } if ( c$smtp?$reply_to ) { - local replyto_addrs = extract_mail_addrs(c$smtp$reply_to); - for ( replyto_addr in replyto_addrs ) + for ( replyto_addr in c$smtp$reply_to ) { Intel::seen([$indicator=replyto_addr, $indicator_type=Intel::EMAIL, diff --git a/scripts/policy/frameworks/intel/seen/where-locations.bro b/scripts/policy/frameworks/intel/seen/where-locations.bro index f286cc2ff7..59a89b0eb2 100644 --- a/scripts/policy/frameworks/intel/seen/where-locations.bro +++ b/scripts/policy/frameworks/intel/seen/where-locations.bro @@ -17,6 +17,7 @@ export { SMTP::IN_RCPT_TO, SMTP::IN_FROM, SMTP::IN_TO, + SMTP::IN_CC, SMTP::IN_RECEIVED_HEADER, SMTP::IN_REPLY_TO, SMTP::IN_X_ORIGINATING_IP_HEADER, diff --git a/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log b/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log index 6a240c88ad..dcb3ce4b03 100644 --- a/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log +++ b/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log @@ -3,7 +3,7 @@ #empty_field (empty) #unset_field - #path loaded_scripts -#open 2015-08-31-05-07-15 +#open 2016-06-15-14-17-00 #fields name #types string scripts/base/init-bare.bro @@ -136,6 +136,7 @@ scripts/base/init-default.bro scripts/base/frameworks/reporter/main.bro scripts/base/utils/paths.bro scripts/base/utils/directions-and-hosts.bro + scripts/base/utils/email.bro scripts/base/utils/files.bro scripts/base/utils/numbers.bro scripts/base/utils/queue.bro @@ -273,4 +274,4 @@ scripts/base/init-default.bro scripts/base/misc/find-checksum-offloading.bro scripts/base/misc/find-filtered-trace.bro scripts/policy/misc/loaded-scripts.bro -#close 2015-08-31-05-07-15 +#close 2016-06-15-14-17-01