mirror of
https://github.com/zeek/zeek.git
synced 2025-10-02 14:48:21 +00:00
uri_decompose complete, need btests
This commit is contained in:
parent
3c42350e77
commit
69ce4d3038
1 changed files with 43 additions and 4 deletions
|
@ -3,14 +3,16 @@
|
||||||
## A regular expression for matching and extracting URLs.
|
## A regular expression for matching and extracting URLs.
|
||||||
const url_regex = /^([a-zA-Z\-]{3,5})(:\/\/[^\/?#"'\r\n><]*)([^?#"'\r\n><]*)([^[:blank:]\r\n"'><]*|\??[^"'\r\n><]*)/ &redef;
|
const url_regex = /^([a-zA-Z\-]{3,5})(:\/\/[^\/?#"'\r\n><]*)([^?#"'\r\n><]*)([^[:blank:]\r\n"'><]*|\??[^"'\r\n><]*)/ &redef;
|
||||||
|
|
||||||
type uri_record: record {
|
type URI: record {
|
||||||
protocol: string &optional;
|
scheme: string &optional;
|
||||||
# this could be a domain name or an IP address
|
# this could be a domain name or an IP address
|
||||||
netlocation: string;
|
netlocation: string;
|
||||||
portnum: count &optional;
|
portnum: count &optional;
|
||||||
path: string &optional;
|
path: string &optional;
|
||||||
file_name: string &optional;
|
file_name: string &optional;
|
||||||
file_ext: string &optional;
|
file_ext: string &optional;
|
||||||
|
params_k: table[count] of string;
|
||||||
|
params_v: table[count] of string;
|
||||||
};
|
};
|
||||||
|
|
||||||
## Extracts URLs discovered in arbitrary text.
|
## Extracts URLs discovered in arbitrary text.
|
||||||
|
@ -34,11 +36,49 @@ function find_all_urls_without_scheme(s: string): string_set
|
||||||
return return_urls;
|
return return_urls;
|
||||||
}
|
}
|
||||||
|
|
||||||
function decompose_uri(s: string): uri_record
|
function decompose_uri(s: string): URI
|
||||||
{
|
{
|
||||||
local parts: string_array;
|
local parts: string_array;
|
||||||
local u: uri = [$netlocation=""];
|
local u: uri = [$netlocation=""];
|
||||||
|
|
||||||
|
if ( /\?/ in s)
|
||||||
|
{
|
||||||
|
local k: table[count] of string;
|
||||||
|
local v: table[count] of string;
|
||||||
|
u$params_k = k;
|
||||||
|
u$params_v = v;
|
||||||
|
|
||||||
|
parts = split1(s, /\?/);
|
||||||
|
s = parts[1];
|
||||||
|
local query: string = parts[2];
|
||||||
|
if (/&/ in query)
|
||||||
|
{
|
||||||
|
local opv: table[count] of string = split(query, /&/);
|
||||||
|
|
||||||
|
for (each in opv)
|
||||||
|
{
|
||||||
|
if (/=/ in opv[each])
|
||||||
|
{
|
||||||
|
parts = split1(opv[each], /=/);
|
||||||
|
|
||||||
|
# why does the order here matter?
|
||||||
|
u$params_k[each] = parts[1];
|
||||||
|
u$params_v[each] = parts[2];
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
# malformed URI
|
||||||
|
# domain.tld/path/file.ext?foo&
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
parts = split1(query, /=/);
|
||||||
|
u$params_k[0] = parts[1];
|
||||||
|
u$params_v[0] = parts[2];
|
||||||
|
}
|
||||||
|
}
|
||||||
if (/:\/\// in s)
|
if (/:\/\// in s)
|
||||||
{
|
{
|
||||||
parts = split1(s, /:\/\//);
|
parts = split1(s, /:\/\//);
|
||||||
|
@ -78,6 +118,5 @@ function decompose_uri(s: string): uri_record
|
||||||
{
|
{
|
||||||
u$netlocation = s;
|
u$netlocation = s;
|
||||||
}
|
}
|
||||||
|
|
||||||
return u;
|
return u;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue