mirror of
https://github.com/zeek/zeek.git
synced 2025-10-14 20:48:21 +00:00
Add urldecoding for the unofficial %u00AE style of encoding.
This commit is contained in:
parent
3151a95381
commit
2f6e069c00
1 changed files with 30 additions and 0 deletions
|
@ -1868,6 +1868,36 @@ BroString* analyzer::http::unescape_URI(const u_char* line, const u_char* line_e
|
||||||
++line; // place line at the last hex digit
|
++line; // place line at the last hex digit
|
||||||
}
|
}
|
||||||
|
|
||||||
|
else if ( line_end - line >= 5 &&
|
||||||
|
line[0] == 'u' &&
|
||||||
|
isxdigit(line[1]) &&
|
||||||
|
isxdigit(line[2]) &&
|
||||||
|
isxdigit(line[3]) &&
|
||||||
|
isxdigit(line[4]) )
|
||||||
|
{
|
||||||
|
// Decode escaping like this: %u00AE
|
||||||
|
// The W3C rejected escaping this way, and
|
||||||
|
// there is no RFC that specifies it.
|
||||||
|
// Appparently there is some software doing
|
||||||
|
// this sort of 4 byte unicode encoding anyway.
|
||||||
|
// Likely causing an increase in it's use is
|
||||||
|
// the third edition of the ECMAScript spec
|
||||||
|
// having functions for encoding and decoding
|
||||||
|
// data in this format.
|
||||||
|
|
||||||
|
// If the first byte is null, let's eat it.
|
||||||
|
// It could just be ASCII encoded into this
|
||||||
|
// unicode escaping structure.
|
||||||
|
if ( ! (line[1] == '0' && line[2] == '0' ) )
|
||||||
|
*URI_p++ = (decode_hex(line[1]) << 4) +
|
||||||
|
decode_hex(line[2]);
|
||||||
|
|
||||||
|
*URI_p++ = (decode_hex(line[3]) << 4) +
|
||||||
|
decode_hex(line[4]);
|
||||||
|
|
||||||
|
++line; ++line; ++line; ++line;
|
||||||
|
}
|
||||||
|
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if ( analyzer )
|
if ( analyzer )
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue