From 1e37e91eda4a530c539d755a966c48f76a34e4ae Mon Sep 17 00:00:00 2001 From: TheAvgJojo Date: Fri, 5 Aug 2022 15:13:21 -0400 Subject: [PATCH] UPDATED: improving email address splitting for common comma case --- scripts/base/utils/email.zeek | 2 +- .../btest/Baseline/scripts.base.utils.email/output | 13 +++++++++++++ testing/btest/scripts/base/utils/email.zeek | 3 +++ 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/scripts/base/utils/email.zeek b/scripts/base/utils/email.zeek index b647149bdc..0217fa12bc 100644 --- a/scripts/base/utils/email.zeek +++ b/scripts/base/utils/email.zeek @@ -58,7 +58,7 @@ function extract_first_email_addr(str: string): string function split_mime_email_addresses(line: string): set[string] { local output = string_set(); - local addrs = find_all(line, /(\"[^"]*\")?[^,]+/); + local addrs = find_all(line, /(\"[^"]*\")?[^,]+@[^,]+/); for ( part in addrs ) { add output[strip(part)]; diff --git a/testing/btest/Baseline/scripts.base.utils.email/output b/testing/btest/Baseline/scripts.base.utils.email/output index dc0958ff64..267cdf340c 100644 --- a/testing/btest/Baseline/scripts.base.utils.email/output +++ b/testing/btest/Baseline/scripts.base.utils.email/output @@ -13,6 +13,11 @@ three@example.com, two@example.com, one@example.com } +{ +three@example.com, +two@example.com, +one@example.com +} one@example.com [one@example.com, two@example.com, three@example.com, one@example.com] { @@ -26,9 +31,17 @@ john.smith@email.com john.smith@email.com, jane.doe@email.com } +{ +"Smith, John" , +"Doe, Jane" +} john.smith@email.com [john.smith@email.com, jane.doe@email.com] { john.smith@email.com, jane.doe@email.com } +{ +"Smith, John" , +"Doe, Jane" +} diff --git a/testing/btest/scripts/base/utils/email.zeek b/testing/btest/scripts/base/utils/email.zeek index 6d4838765f..2866ebc465 100644 --- a/testing/btest/scripts/base/utils/email.zeek +++ b/testing/btest/scripts/base/utils/email.zeek @@ -11,6 +11,7 @@ s = "one@example.com,two@example.com,three@example.com,one@example.com"; print extract_first_email_addr(s); print extract_email_addrs_vec(s); print extract_email_addrs_set(s); +print split_mime_email_addresses(s); s = "ieje one@example.com, eifj two@example.com, asdf three@example.com, one@example.com"; print extract_first_email_addr(s); print extract_email_addrs_vec(s); @@ -19,7 +20,9 @@ s = "\"Smith, John\" , \"Doe, Jane\" " print extract_first_email_addr(s); print extract_email_addrs_vec(s); print extract_email_addrs_set(s); +print split_mime_email_addresses(s); s = "\"Smith, John\" ,\"Doe, Jane\" "; print extract_first_email_addr(s); print extract_email_addrs_vec(s); print extract_email_addrs_set(s); +print split_mime_email_addresses(s); \ No newline at end of file