From b3b0c3db7d777a78b9a20a1de6ab16336c0bb153 Mon Sep 17 00:00:00 2001 From: Arne Welzel Date: Tue, 23 Sep 2025 10:12:45 +0200 Subject: [PATCH] utils/decompose_uri: Support URIs containing IPv6 addresses An URI containing a bracketed or non-bracketed IPv6 address of the form http://[::1]:42 was previously split on the first colon for port extraction, causing a subsequent to_count() call to fail. Harden this to check for a digits in the last :[0-9]+ component. Fixes #4842 --- scripts/base/utils/urls.zeek | 15 +++++------ .../scripts.base.utils.decompose_uri/.stderr | 1 + .../scripts.base.utils.decompose_uri/output | 27 +++++++++++++++++++ .../scripts/base/utils/decompose_uri.zeek | 17 ++++++++++++ 4 files changed, 52 insertions(+), 8 deletions(-) create mode 100644 testing/btest/Baseline/scripts.base.utils.decompose_uri/.stderr diff --git a/scripts/base/utils/urls.zeek b/scripts/base/utils/urls.zeek index cdc8548d52..2dc835fa7a 100644 --- a/scripts/base/utils/urls.zeek +++ b/scripts/base/utils/urls.zeek @@ -117,15 +117,14 @@ function decompose_uri(uri: string): URI } } - if ( /:/ in s ) + if ( /:[0-9]*$/ in s ) { - # Parse location and port. - parts = split_string1(s, /:/); - u$netlocation = parts[0]; - if ( parts[1] != "" ) - { - u$portnum = to_count(parts[1]); - } + # Input ends with a numeric port or just colon: Strip it + # for netlocation and convert any port digits into portnum. + u$netlocation = gsub(s, /:[0-9]*$/, ""); + local portstr = s[|u$netlocation| + 1:]; + if ( portstr != "" ) + u$portnum = to_count(portstr); } else { diff --git a/testing/btest/Baseline/scripts.base.utils.decompose_uri/.stderr b/testing/btest/Baseline/scripts.base.utils.decompose_uri/.stderr new file mode 100644 index 0000000000..49d861c74c --- /dev/null +++ b/testing/btest/Baseline/scripts.base.utils.decompose_uri/.stderr @@ -0,0 +1 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. diff --git a/testing/btest/Baseline/scripts.base.utils.decompose_uri/output b/testing/btest/Baseline/scripts.base.utils.decompose_uri/output index 9d650c7f83..696a1df585 100644 --- a/testing/btest/Baseline/scripts.base.utils.decompose_uri/output +++ b/testing/btest/Baseline/scripts.base.utils.decompose_uri/output @@ -50,3 +50,30 @@ file:///documentation/faq.html?=v www.bro.org/?foo=bar -> [scheme=, netlocation=www.bro.org, portnum=, path=/, file_name=, file_base=, file_ext=, params={\x0a\x09[foo] = bar\x0a}] +http://[::1]:8080/?foo=bar&baz=qux + -> [scheme=http, netlocation=[::1], portnum=8080, path=/, file_name=, file_base=, file_ext=, params={\x0a\x09[foo] = bar,\x0a\x09[baz] = qux\x0a}] + +http://[::1]/foo/bar + -> [scheme=http, netlocation=[::1], portnum=, path=/foo/bar, file_name=bar, file_base=bar, file_ext=, params=] + +http://[::1]/foo/bar + -> [scheme=http, netlocation=[::1], portnum=, path=/foo/bar, file_name=bar, file_base=bar, file_ext=, params=] + +[::1]:80/test/a/b.exe?a=b + -> [scheme=, netlocation=[::1], portnum=80, path=/test/a/b.exe, file_name=b.exe, file_base=b, file_ext=exe, params={\x0a\x09[a] = b\x0a}] + +http://beeb:deed::1/test + -> [scheme=http, netlocation=beeb:deed:, portnum=1, path=/test, file_name=test, file_base=test, file_ext=, params=] + +http://beeb:deed::1:8080/test + -> [scheme=http, netlocation=beeb:deed::1, portnum=8080, path=/test, file_name=test, file_base=test, file_ext=, params=] + +https://en.wikipedia.org/wiki/Template:Welcome + -> [scheme=https, netlocation=en.wikipedia.org, portnum=, path=/wiki/Template:Welcome, file_name=Template:Welcome, file_base=Template:Welcome, file_ext=, params=] + +https://[::1]:8080/wiki/Template:Welcome + -> [scheme=https, netlocation=[::1], portnum=8080, path=/wiki/Template:Welcome, file_name=Template:Welcome, file_base=Template:Welcome, file_ext=, params=] + +https://[::1]:8080/wiki/Template:Welcome?key=:&value=: + -> [scheme=https, netlocation=[::1], portnum=8080, path=/wiki/Template:Welcome, file_name=Template:Welcome, file_base=Template:Welcome, file_ext=, params={\x0a\x09[key] = :,\x0a\x09[value] = :\x0a}] + diff --git a/testing/btest/scripts/base/utils/decompose_uri.zeek b/testing/btest/scripts/base/utils/decompose_uri.zeek index 30ba9cd245..714e977d7d 100644 --- a/testing/btest/scripts/base/utils/decompose_uri.zeek +++ b/testing/btest/scripts/base/utils/decompose_uri.zeek @@ -1,5 +1,6 @@ # @TEST-EXEC: zeek -b %INPUT > output # @TEST-EXEC: btest-diff output +# @TEST-EXEC: btest-diff .stderr @load base/utils/urls @@ -29,5 +30,21 @@ event zeek_init() dc("https://www.bro.org/documentation/faq.html?=v"); dc("file:///documentation/faq.html?=v"); dc("www.bro.org/?foo=bar"); + + # Bracketed IPv6 + dc("http://[::1]:8080/?foo=bar&baz=qux"); + dc("http://[::1]/foo/bar"); + dc("http://[::1]/foo/bar"); + dc("[::1]:80/test/a/b.exe?a=b"); + + # Un-bracketed is ambiguous, but not causing errors. + dc("http://beeb:deed::1/test"); + dc("http://beeb:deed::1:8080/test"); + + # Ensure colons in path or query parameters do not + # cause trouble. + dc("https://en.wikipedia.org/wiki/Template:Welcome"); + dc("https://[::1]:8080/wiki/Template:Welcome"); + dc("https://[::1]:8080/wiki/Template:Welcome?key=:&value=:"); }