diff --git a/scripts/base/utils/urls.bro b/scripts/base/utils/urls.bro index 41a2ab5639..a34b6a02c1 100644 --- a/scripts/base/utils/urls.bro +++ b/scripts/base/utils/urls.bro @@ -6,23 +6,23 @@ const url_regex = /^([a-zA-Z\-]{3,5})(:\/\/[^\/?#"'\r\n><]*)([^?#"'\r\n><]*)([^[ ## A URI, as parsed by :bro:id:`decompose_uri`. type URI: record { ## The URL's scheme.. - scheme: string &optional; + scheme: string &optional; ## The location, which could be a domain name or an IP address. Left empty if not ## specified. - netlocation: string; + netlocation: string; ## Port number, if included in URI. - portnum: count &optional; + portnum: count &optional; ## Full including the file name. Will be '/' if there's not path given. - path: string; + path: string; ## Full file name, including extension, if there is a file name. - file_name: string &optional; + file_name: string &optional; ## The base filename, without extension, if there is a file name. - file_base: string &optional; + file_base: string &optional; ## The filename's extension, if there is a file name. - file_ext: string &optional; + file_ext: string &optional; ## A table of all query parameters, mapping their keys to values, if there's a ## query. - params: table[string] of string &optional; + params: table[string] of string &optional; }; ## Extracts URLs discovered in arbitrary text. @@ -46,19 +46,19 @@ function find_all_urls_without_scheme(s: string): string_set return return_urls; } -function decompose_uri(s: string): URI +function decompose_uri(uri: string): URI { local parts: string_vec; - local u: URI = [$netlocation="", $path="/"]; + local u = URI($netlocation="", $path="/"); + local s = uri; - if ( /\?/ in s) + if ( /\?/ in s ) { - # Parse query. u$params = table(); parts = split_string1(s, /\?/); s = parts[0]; - local query: string = parts[1]; + local query = parts[1]; if ( /&/ in query ) { @@ -73,7 +73,7 @@ function decompose_uri(s: string): URI } } } - else + else if ( /=/ in query ) { parts = split_string1(query, /=/); u$params[parts[0]] = parts[1]; @@ -97,14 +97,14 @@ function decompose_uri(s: string): URI if ( |u$path| > 1 && u$path[|u$path| - 1] != "/" ) { - local last_token: string = find_last(u$path, /\/.+/); + local last_token = find_last(u$path, /\/.+/); local full_filename = split_string1(last_token, /\//)[1]; if ( /\./ in full_filename ) { u$file_name = full_filename; u$file_base = split_string1(full_filename, /\./)[0]; - u$file_ext = split_string1(full_filename, /\./)[1]; + u$file_ext = split_string1(full_filename, /\./)[1]; } else { @@ -122,7 +122,9 @@ function decompose_uri(s: string): URI u$portnum = to_count(parts[1]); } else + { u$netlocation = s; + } return u; } diff --git a/testing/btest/Baseline/scripts.base.utils.urls/output b/testing/btest/Baseline/scripts.base.utils.urls/output new file mode 100644 index 0000000000..2d8f5b2c4d --- /dev/null +++ b/testing/btest/Baseline/scripts.base.utils.urls/output @@ -0,0 +1,11 @@ +[scheme=https, netlocation=www.example.com, portnum=, path=/, file_name=, file_base=, file_ext=, params=] +[scheme=http, netlocation=example.com, portnum=99, path=/test//, file_name=, file_base=, file_ext=, params={ +[foo] = bar +}] +[scheme=ftp, netlocation=1.2.3.4, portnum=, path=/pub/files/something.exe, file_name=something.exe, file_base=something, file_ext=exe, params=] +[scheme=http, netlocation=hyphen-example.com, portnum=, path=/index.asp, file_name=index.asp, file_base=index, file_ext=asp, params={ +[q] = 123 +}] +[scheme=, netlocation=dfasjdfasdfasdf, portnum=, path=/, file_name=, file_base=, file_ext=, params={ + +}] diff --git a/testing/btest/scripts/base/utils/urls.test b/testing/btest/scripts/base/utils/urls.test new file mode 100644 index 0000000000..fd8c0a8622 --- /dev/null +++ b/testing/btest/scripts/base/utils/urls.test @@ -0,0 +1,19 @@ +# @TEST-EXEC: bro %INPUT >output +# @TEST-EXEC: btest-diff output + +# This is loaded by default. +#@load base/utils/urls + +print decompose_uri("https://www.example.com/"); +print decompose_uri("http://example.com:99/test//?foo=bar"); +print decompose_uri("ftp://1.2.3.4/pub/files/something.exe"); +print decompose_uri("http://hyphen-example.com/index.asp?q=123"); + +# This is mostly undefined behavior but it doesn't give any +# reporter messages at least. +print decompose_uri("dfasjdfasdfasdf?asd"); + +# These aren't supported yet. +#print decompose_uri("mailto:foo@bar.com?subject=test!"); +#print decompose_uri("http://example.com/?test=ampersand&test"); +#print decompose_uri("http://user:password@example.com/"); \ No newline at end of file