mirror of
https://github.com/zeek/zeek.git
synced 2025-10-02 14:48:21 +00:00
Change the meaning of some email fields.
We now extract email addresses in the fields that one would expect to contain addresses. This makes further downstream processing of these fields easier like log analysis or using these fields in the Intel framework. The primary downside is that any other content in these fields is no longer available such as full name and any group information. I believe the simplification of the content in these fields is worth the change. Added "cc" to the script that feeds information from SMTP into the Intel framework. A new script for email handling utility functions has been created as a side effect of these changes.
This commit is contained in:
parent
2509f79a10
commit
d89ee3cee0
6 changed files with 100 additions and 53 deletions
47
scripts/base/utils/email.bro
Normal file
47
scripts/base/utils/email.bro
Normal file
|
@ -0,0 +1,47 @@
|
|||
## Extract mail addresses out of address specifications conforming to RFC5322.
|
||||
##
|
||||
## str: A string potentially containing email addresses.
|
||||
##
|
||||
## Returns: A vector of extracted email addresses. An empty vector is returned
|
||||
## if no email addresses are discovered.
|
||||
function extract_email_addrs_vec(str: string): string_vec
|
||||
{
|
||||
local addrs: vector of string = vector();
|
||||
|
||||
local raw_addrs = find_all(str, /(^|[<,:[:blank:]])[^<,:[:blank:]@]+"@"[^>,;[:blank:]]+([>,;[:blank:]]|$)/);
|
||||
for ( raw_addr in raw_addrs )
|
||||
addrs[|addrs|] = gsub(raw_addr, /[<>,:;[:blank:]]/, "");
|
||||
|
||||
return addrs;
|
||||
}
|
||||
|
||||
## Extract mail addresses out of address specifications conforming to RFC5322.
|
||||
##
|
||||
## str: A string potentially containing email addresses.
|
||||
##
|
||||
## Returns: A set of extracted email addresses. An empty set is returned
|
||||
## if no email addresses are discovered.
|
||||
function extract_email_addrs_set(str: string): set[string]
|
||||
{
|
||||
local addrs: set[string] = set();
|
||||
|
||||
local raw_addrs = find_all(str, /(^|[<,:[:blank:]])[^<,:[:blank:]@]+"@"[^>,;[:blank:]]+([>,;[:blank:]]|$)/);
|
||||
for ( raw_addr in raw_addrs )
|
||||
add addrs[gsub(raw_addr, /[<>,:;[:blank:]]/, "")];
|
||||
|
||||
return addrs;
|
||||
}
|
||||
|
||||
## Extract the first email address from a string.
|
||||
##
|
||||
## str: A string potentially containing email addresses.
|
||||
##
|
||||
## Returns: An email address or empty string if none found.
|
||||
function extract_first_email_addr(str: string): string
|
||||
{
|
||||
local addrs = extract_email_addrs_vec(str);
|
||||
if ( |addrs| > 0 )
|
||||
return addrs[0];
|
||||
else
|
||||
return "";
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue