mirror of
https://github.com/zeek/zeek.git
synced 2025-10-10 10:38:20 +00:00
Updates for the urls.bro script. Fixes BIT-1404.
This commit is contained in:
parent
2b1cd66f17
commit
097354a43f
3 changed files with 48 additions and 16 deletions
|
@ -6,23 +6,23 @@ const url_regex = /^([a-zA-Z\-]{3,5})(:\/\/[^\/?#"'\r\n><]*)([^?#"'\r\n><]*)([^[
|
||||||
## A URI, as parsed by :bro:id:`decompose_uri`.
|
## A URI, as parsed by :bro:id:`decompose_uri`.
|
||||||
type URI: record {
|
type URI: record {
|
||||||
## The URL's scheme..
|
## The URL's scheme..
|
||||||
scheme: string &optional;
|
scheme: string &optional;
|
||||||
## The location, which could be a domain name or an IP address. Left empty if not
|
## The location, which could be a domain name or an IP address. Left empty if not
|
||||||
## specified.
|
## specified.
|
||||||
netlocation: string;
|
netlocation: string;
|
||||||
## Port number, if included in URI.
|
## Port number, if included in URI.
|
||||||
portnum: count &optional;
|
portnum: count &optional;
|
||||||
## Full including the file name. Will be '/' if there's not path given.
|
## Full including the file name. Will be '/' if there's not path given.
|
||||||
path: string;
|
path: string;
|
||||||
## Full file name, including extension, if there is a file name.
|
## Full file name, including extension, if there is a file name.
|
||||||
file_name: string &optional;
|
file_name: string &optional;
|
||||||
## The base filename, without extension, if there is a file name.
|
## The base filename, without extension, if there is a file name.
|
||||||
file_base: string &optional;
|
file_base: string &optional;
|
||||||
## The filename's extension, if there is a file name.
|
## The filename's extension, if there is a file name.
|
||||||
file_ext: string &optional;
|
file_ext: string &optional;
|
||||||
## A table of all query parameters, mapping their keys to values, if there's a
|
## A table of all query parameters, mapping their keys to values, if there's a
|
||||||
## query.
|
## query.
|
||||||
params: table[string] of string &optional;
|
params: table[string] of string &optional;
|
||||||
};
|
};
|
||||||
|
|
||||||
## Extracts URLs discovered in arbitrary text.
|
## Extracts URLs discovered in arbitrary text.
|
||||||
|
@ -46,19 +46,19 @@ function find_all_urls_without_scheme(s: string): string_set
|
||||||
return return_urls;
|
return return_urls;
|
||||||
}
|
}
|
||||||
|
|
||||||
function decompose_uri(s: string): URI
|
function decompose_uri(uri: string): URI
|
||||||
{
|
{
|
||||||
local parts: string_vec;
|
local parts: string_vec;
|
||||||
local u: URI = [$netlocation="", $path="/"];
|
local u = URI($netlocation="", $path="/");
|
||||||
|
local s = uri;
|
||||||
|
|
||||||
if ( /\?/ in s)
|
if ( /\?/ in s )
|
||||||
{
|
{
|
||||||
# Parse query.
|
|
||||||
u$params = table();
|
u$params = table();
|
||||||
|
|
||||||
parts = split_string1(s, /\?/);
|
parts = split_string1(s, /\?/);
|
||||||
s = parts[0];
|
s = parts[0];
|
||||||
local query: string = parts[1];
|
local query = parts[1];
|
||||||
|
|
||||||
if ( /&/ in query )
|
if ( /&/ in query )
|
||||||
{
|
{
|
||||||
|
@ -73,7 +73,7 @@ function decompose_uri(s: string): URI
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else if ( /=/ in query )
|
||||||
{
|
{
|
||||||
parts = split_string1(query, /=/);
|
parts = split_string1(query, /=/);
|
||||||
u$params[parts[0]] = parts[1];
|
u$params[parts[0]] = parts[1];
|
||||||
|
@ -97,14 +97,14 @@ function decompose_uri(s: string): URI
|
||||||
|
|
||||||
if ( |u$path| > 1 && u$path[|u$path| - 1] != "/" )
|
if ( |u$path| > 1 && u$path[|u$path| - 1] != "/" )
|
||||||
{
|
{
|
||||||
local last_token: string = find_last(u$path, /\/.+/);
|
local last_token = find_last(u$path, /\/.+/);
|
||||||
local full_filename = split_string1(last_token, /\//)[1];
|
local full_filename = split_string1(last_token, /\//)[1];
|
||||||
|
|
||||||
if ( /\./ in full_filename )
|
if ( /\./ in full_filename )
|
||||||
{
|
{
|
||||||
u$file_name = full_filename;
|
u$file_name = full_filename;
|
||||||
u$file_base = split_string1(full_filename, /\./)[0];
|
u$file_base = split_string1(full_filename, /\./)[0];
|
||||||
u$file_ext = split_string1(full_filename, /\./)[1];
|
u$file_ext = split_string1(full_filename, /\./)[1];
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -122,7 +122,9 @@ function decompose_uri(s: string): URI
|
||||||
u$portnum = to_count(parts[1]);
|
u$portnum = to_count(parts[1]);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
{
|
||||||
u$netlocation = s;
|
u$netlocation = s;
|
||||||
|
}
|
||||||
|
|
||||||
return u;
|
return u;
|
||||||
}
|
}
|
||||||
|
|
11
testing/btest/Baseline/scripts.base.utils.urls/output
Normal file
11
testing/btest/Baseline/scripts.base.utils.urls/output
Normal file
|
@ -0,0 +1,11 @@
|
||||||
|
[scheme=https, netlocation=www.example.com, portnum=<uninitialized>, path=/, file_name=<uninitialized>, file_base=<uninitialized>, file_ext=<uninitialized>, params=<uninitialized>]
|
||||||
|
[scheme=http, netlocation=example.com, portnum=99, path=/test//, file_name=<uninitialized>, file_base=<uninitialized>, file_ext=<uninitialized>, params={
|
||||||
|
[foo] = bar
|
||||||
|
}]
|
||||||
|
[scheme=ftp, netlocation=1.2.3.4, portnum=<uninitialized>, path=/pub/files/something.exe, file_name=something.exe, file_base=something, file_ext=exe, params=<uninitialized>]
|
||||||
|
[scheme=http, netlocation=hyphen-example.com, portnum=<uninitialized>, path=/index.asp, file_name=index.asp, file_base=index, file_ext=asp, params={
|
||||||
|
[q] = 123
|
||||||
|
}]
|
||||||
|
[scheme=<uninitialized>, netlocation=dfasjdfasdfasdf, portnum=<uninitialized>, path=/, file_name=<uninitialized>, file_base=<uninitialized>, file_ext=<uninitialized>, params={
|
||||||
|
|
||||||
|
}]
|
19
testing/btest/scripts/base/utils/urls.test
Normal file
19
testing/btest/scripts/base/utils/urls.test
Normal file
|
@ -0,0 +1,19 @@
|
||||||
|
# @TEST-EXEC: bro %INPUT >output
|
||||||
|
# @TEST-EXEC: btest-diff output
|
||||||
|
|
||||||
|
# This is loaded by default.
|
||||||
|
#@load base/utils/urls
|
||||||
|
|
||||||
|
print decompose_uri("https://www.example.com/");
|
||||||
|
print decompose_uri("http://example.com:99/test//?foo=bar");
|
||||||
|
print decompose_uri("ftp://1.2.3.4/pub/files/something.exe");
|
||||||
|
print decompose_uri("http://hyphen-example.com/index.asp?q=123");
|
||||||
|
|
||||||
|
# This is mostly undefined behavior but it doesn't give any
|
||||||
|
# reporter messages at least.
|
||||||
|
print decompose_uri("dfasjdfasdfasdf?asd");
|
||||||
|
|
||||||
|
# These aren't supported yet.
|
||||||
|
#print decompose_uri("mailto:foo@bar.com?subject=test!");
|
||||||
|
#print decompose_uri("http://example.com/?test=ampersand&test");
|
||||||
|
#print decompose_uri("http://user:password@example.com/");
|
Loading…
Add table
Add a link
Reference in a new issue