Change the meaning of some email fields.

We now extract email addresses in the fields that one would expect
to contain addresses.  This makes further downstream processing of
these fields easier like log analysis or using these fields in the
Intel framework.  The primary downside is that any other content
in these fields is no longer available such as full name and any
group information.  I believe the simplification of the content in
these fields is worth the change.

Added "cc" to the script that feeds information from SMTP into the
Intel framework.

A new script for email handling utility functions has been created
as a side effect of these changes.
This commit is contained in:
Seth Hall 2016-06-15 10:32:06 -04:00
parent 2509f79a10
commit d89ee3cee0
6 changed files with 100 additions and 53 deletions

View file

@ -0,0 +1,47 @@
## Extract mail addresses out of address specifications conforming to RFC5322.
##
## str: A string potentially containing email addresses.
##
## Returns: A vector of extracted email addresses. An empty vector is returned
## if no email addresses are discovered.
function extract_email_addrs_vec(str: string): string_vec
{
local addrs: vector of string = vector();
local raw_addrs = find_all(str, /(^|[<,:[:blank:]])[^<,:[:blank:]@]+"@"[^>,;[:blank:]]+([>,;[:blank:]]|$)/);
for ( raw_addr in raw_addrs )
addrs[|addrs|] = gsub(raw_addr, /[<>,:;[:blank:]]/, "");
return addrs;
}
## Extract mail addresses out of address specifications conforming to RFC5322.
##
## str: A string potentially containing email addresses.
##
## Returns: A set of extracted email addresses. An empty set is returned
## if no email addresses are discovered.
function extract_email_addrs_set(str: string): set[string]
{
local addrs: set[string] = set();
local raw_addrs = find_all(str, /(^|[<,:[:blank:]])[^<,:[:blank:]@]+"@"[^>,;[:blank:]]+([>,;[:blank:]]|$)/);
for ( raw_addr in raw_addrs )
add addrs[gsub(raw_addr, /[<>,:;[:blank:]]/, "")];
return addrs;
}
## Extract the first email address from a string.
##
## str: A string potentially containing email addresses.
##
## Returns: An email address or empty string if none found.
function extract_first_email_addr(str: string): string
{
local addrs = extract_email_addrs_vec(str);
if ( |addrs| > 0 )
return addrs[0];
else
return "";
}