diff --git a/CHANGES b/CHANGES index 20485f0114..6c1f81ecf0 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,13 @@ +4.2.0-dev.99 | 2021-09-03 17:36:09 +0000 + + * GH-1589: Avoid extracting IP-like strings from SMTP headers (Tim Wojtulewicz, Corelight) + + * Minor updates to the external-testsuite scripts (Christian Kreibich, Corelight) + + Delete the unused Baseline folder, remove a Bro-era name from gitignore, + change create-new-repo to work with a second argument that is actually + a URL, and tweak whitespace. Expand description in README and update the + explanation of OPENSSL_ENABLE_MD5_VERIFY in subdir-btest.cfg. 4.2.0-dev.94 | 2021-08-31 15:58:00 +0200 diff --git a/VERSION b/VERSION index 411ade6790..5c76bcd962 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -4.2.0-dev.94 +4.2.0-dev.99 diff --git a/scripts/base/protocols/smtp/main.zeek b/scripts/base/protocols/smtp/main.zeek index 3de3922717..fdc7a9542a 100644 --- a/scripts/base/protocols/smtp/main.zeek +++ b/scripts/base/protocols/smtp/main.zeek @@ -109,7 +109,7 @@ event zeek_init() &priority=5 function find_address_in_smtp_header(header: string): string { - local ips = extract_ip_addresses(header); + local ips = extract_ip_addresses(header, T); # If there are more than one IP address found, return the second. if ( |ips| > 1 ) return ips[1]; diff --git a/scripts/base/utils/addrs.zeek b/scripts/base/utils/addrs.zeek index 92af26be96..9e63d1e732 100644 --- a/scripts/base/utils/addrs.zeek +++ b/scripts/base/utils/addrs.zeek @@ -83,8 +83,12 @@ function has_valid_octets(octets: string_vec): bool ## ## input: a string that may contain an IP address anywhere within it. ## +## check_wrapping: if true, will only return IP addresses that are wrapped in matching +## pairs of spaces, square brackets, curly braces, or parens. This can be used to avoid +## extracting strings that look like IPs from innocuous strings, such as SMTP headers. +## ## Returns: an array containing all valid IP address strings found in *input*. -function extract_ip_addresses(input: string): string_vec +function extract_ip_addresses(input: string, check_wrapping: bool &default=F): string_vec { local parts = split_string_all(input, ip_addr_regex); local output: string_vec; @@ -92,7 +96,24 @@ function extract_ip_addresses(input: string): string_vec for ( i in parts ) { if ( i % 2 == 1 && is_valid_ip(parts[i]) ) - output += parts[i]; + { + if ( ! check_wrapping ) + { + output += parts[i]; + } + else if ( i > 0 && i < |parts| - 1 ) + { + local p1 = parts[i-1]; + local p3 = parts[i+1]; + + if ( ( |p1| == 0 && |p3| == 0 ) || + ( p1[-1] == "\[" && p3[0] == "\]" ) || + ( p1[-1] == "\(" && p3[0] == "\)" ) || + ( p1[-1] == "\{" && p3[0] == "\}" ) || + ( p1[-1] == " " && p3[0] == " " ) ) + output += parts[i]; + } + } } return output; } diff --git a/testing/btest/Baseline/scripts.base.utils.addrs/output b/testing/btest/Baseline/scripts.base.utils.addrs/output index 839799e18a..380efe357b 100644 --- a/testing/btest/Baseline/scripts.base.utils.addrs/output +++ b/testing/btest/Baseline/scripts.base.utils.addrs/output @@ -51,4 +51,13 @@ F ============ test extract_ip_addresses() [1.1.1.1, 2.2.2.2, 3.3.3.3] [1.1.1.1, 0:0:0:0:0:0:0:0, 3.3.3.3] +[1.1.1.1] +[1.1.1.1] +[] +[1.1.1.1] +[1.1.1.1] +[1.1.1.1] +[1.1.1.1, 2.2.2.2] +[1.1.1.1] +[1.1.1.1] [6:1:2::3:4:5:6] diff --git a/testing/btest/scripts/base/utils/addrs.test b/testing/btest/scripts/base/utils/addrs.test index 274887fbce..6de029270c 100644 --- a/testing/btest/scripts/base/utils/addrs.test +++ b/testing/btest/scripts/base/utils/addrs.test @@ -135,6 +135,15 @@ event zeek_init() print "============ test extract_ip_addresses()"; print extract_ip_addresses("this is 1.1.1.1 a test 2.2.2.2 string with ip addresses 3.3.3.3"); print extract_ip_addresses("this is 1.1.1.1 a test 0:0:0:0:0:0:0:0 string with ip addresses 3.3.3.3"); + print extract_ip_addresses("text 1.1.1.1 text", T); + print extract_ip_addresses("text 1.1.1.1 text", F); + print extract_ip_addresses("text1.1.1.1text", T); + print extract_ip_addresses("text1.1.1.1text", F); + print extract_ip_addresses("text[1.1.1.1]text", T); + print extract_ip_addresses("text[1.1.1.1]text", F); + print extract_ip_addresses("[1.1.1.1] [2.2.2.2]", T); + print extract_ip_addresses("1.1.1.1", T); + print extract_ip_addresses("1.1.1.1", F); # This will use the leading 6 from "IPv6" (maybe that's not intended # by a person trying to parse such a string, but that's just what's going diff --git a/testing/external/commit-hash.zeek-testing-private b/testing/external/commit-hash.zeek-testing-private index cacb4cffe1..c2bef97a74 100644 --- a/testing/external/commit-hash.zeek-testing-private +++ b/testing/external/commit-hash.zeek-testing-private @@ -1 +1 @@ -b33b7c939df51317595039ed408838712b738bbb +4b88837c49ade5d9fd980d5e6cf02ec91d19a3bb