mirror of
https://github.com/zeek/zeek.git
synced 2025-10-02 06:38:20 +00:00
Fix extract_first_email_addr() to really return the first email
The use of find_all() in extract_email_addrs_vec() extracted occurrences to an intermediate set and thus lost any sense of ordering. This changes extract_email_addrs_vec() to use find_all_ordered() and return all occurrences of email addresses found in the argument, included duplicates, with their order of occurrence preserved.
This commit is contained in:
parent
a852ab4c39
commit
170752fa99
5 changed files with 47 additions and 4 deletions
5
NEWS
5
NEWS
|
@ -27,6 +27,11 @@ Changed Functionality
|
|||
- The use as enum of type ``NetControl::RuleType`` is unchanged and still
|
||||
named ``NetControl::DROP``
|
||||
|
||||
- The extract_email_addrs_vec() BIF now returns all occurrences of emails,
|
||||
including duplicates, with preserved order of occurrence. This seems like
|
||||
the original/documented intent of the function, but the previous
|
||||
implementation did not preserve ordering or duplicates.
|
||||
|
||||
Removed Functionality
|
||||
---------------------
|
||||
|
||||
|
|
|
@ -8,9 +8,9 @@ function extract_email_addrs_vec(str: string): string_vec
|
|||
{
|
||||
local addrs: vector of string = vector();
|
||||
|
||||
local raw_addrs = find_all(str, /(^|[<,:[:blank:]])[^<,:[:blank:]@]+"@"[^>,;[:blank:]]+([>,;[:blank:]]|$)/);
|
||||
for ( raw_addr in raw_addrs )
|
||||
addrs += gsub(raw_addr, /[<>,:;[:blank:]]/, "");
|
||||
local raw_addrs = find_all_ordered(str, /(^|[<,:[:blank:]])[^<,:[:blank:]@]+"@"[^>,;[:blank:]]+([>,;[:blank:]]|$)/);
|
||||
for ( i in raw_addrs )
|
||||
addrs += gsub(raw_addrs[i], /[<>,:;[:blank:]]/, "");
|
||||
|
||||
return addrs;
|
||||
}
|
||||
|
|
21
testing/btest/Baseline/scripts.base.utils.email/output
Normal file
21
testing/btest/Baseline/scripts.base.utils.email/output
Normal file
|
@ -0,0 +1,21 @@
|
|||
one@example.com
|
||||
[one@example.com, two@example.com, three@example.com, one@example.com]
|
||||
{
|
||||
three@example.com,
|
||||
two@example.com,
|
||||
one@example.com
|
||||
}
|
||||
one@example.com
|
||||
[one@example.com, two@example.com, three@example.com, one@example.com]
|
||||
{
|
||||
three@example.com,
|
||||
two@example.com,
|
||||
one@example.com
|
||||
}
|
||||
one@example.com
|
||||
[one@example.com, two@example.com, three@example.com, one@example.com]
|
||||
{
|
||||
three@example.com,
|
||||
one@example.com,
|
||||
two@example.com
|
||||
}
|
17
testing/btest/scripts/base/utils/email.zeek
Normal file
17
testing/btest/scripts/base/utils/email.zeek
Normal file
|
@ -0,0 +1,17 @@
|
|||
# @TEST-EXEC: zeek -b %INPUT >output
|
||||
# @TEST-EXEC: btest-diff output
|
||||
|
||||
@load base/utils/email
|
||||
|
||||
local s = "one@example.com two@example.com three@example.com one@example.com";
|
||||
print extract_first_email_addr(s);
|
||||
print extract_email_addrs_vec(s);
|
||||
print extract_email_addrs_set(s);
|
||||
s = "one@example.com,two@example.com,three@example.com,one@example.com";
|
||||
print extract_first_email_addr(s);
|
||||
print extract_email_addrs_vec(s);
|
||||
print extract_email_addrs_set(s);
|
||||
s = "ieje one@example.com, eifj two@example.com, asdf three@example.com, one@example.com";
|
||||
print extract_first_email_addr(s);
|
||||
print extract_email_addrs_vec(s);
|
||||
print extract_email_addrs_set(s);
|
|
@ -1 +1 @@
|
|||
8cee05d3405d85a4ae8163d1999b78d182b25213
|
||||
c6e8c83cf4d74cde39b57f70a3259cac5a833c1a
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue