mirror of
https://github.com/zeek/zeek.git
synced 2025-10-10 18:48:20 +00:00
Merge remote-tracking branch 'origin/topic/seth/remove-unescaped_special_char-weird'
BIT-1611 #merged * origin/topic/seth/remove-unescaped_special_char-weird: Add urldecoding for the unofficial %u00AE style of encoding. Remove the unescaped_special_char HTTP weird.
This commit is contained in:
commit
17dc28b8a8
3 changed files with 53 additions and 28 deletions
19
CHANGES
19
CHANGES
|
@ -1,4 +1,23 @@
|
||||||
|
|
||||||
|
2.4-588 | 2016-06-06 17:59:34 -0700
|
||||||
|
|
||||||
|
* Moved link-layer addresses into endpoints. The link-layer
|
||||||
|
addresses are now part of the connection endpoints following the
|
||||||
|
originator/responder pattern. (Jan Grashoefer)
|
||||||
|
|
||||||
|
* Link-layer addresses are extracted for 802.11 plus RadioTap. (Jan
|
||||||
|
Grashoefer)
|
||||||
|
|
||||||
|
* Fix coverity error (uninitialized variable) (Johanna Amann)
|
||||||
|
|
||||||
|
* Use ether_ntoa instead of ether_ntoa_r
|
||||||
|
|
||||||
|
The latter is thread-safe, but a GNU addition which does not exist on
|
||||||
|
OS-X. Since the function only is called in the main thread, it should
|
||||||
|
not matter if it is or is not threadsafe. (Johanna Amann)
|
||||||
|
|
||||||
|
* Fix FreeBSD/OSX compile problem due to headers (Johanna Amann)
|
||||||
|
|
||||||
2.4-581 | 2016-05-30 10:58:19 -0700
|
2.4-581 | 2016-05-30 10:58:19 -0700
|
||||||
|
|
||||||
* Adding missing new script file mac-logging.bro. (Robin Sommer)
|
* Adding missing new script file mac-logging.bro. (Robin Sommer)
|
||||||
|
|
2
VERSION
2
VERSION
|
@ -1 +1 @@
|
||||||
2.4-581
|
2.4-588
|
||||||
|
|
|
@ -1813,12 +1813,12 @@ void HTTP_Analyzer::SkipEntityData(int is_orig)
|
||||||
}
|
}
|
||||||
|
|
||||||
int analyzer::http::is_reserved_URI_char(unsigned char ch)
|
int analyzer::http::is_reserved_URI_char(unsigned char ch)
|
||||||
{ // see RFC 2396 (definition of URI)
|
{ // see RFC 3986 (definition of URI)
|
||||||
return strchr(";/?:@&=+$,", ch) != 0;
|
return strchr(":/?#[]@!$&'()*+,;=", ch) != 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int analyzer::http::is_unreserved_URI_char(unsigned char ch)
|
int analyzer::http::is_unreserved_URI_char(unsigned char ch)
|
||||||
{ // see RFC 2396 (definition of URI)
|
{ // see RFC 3986 (definition of URI)
|
||||||
return isalnum(ch) || strchr("-_.!~*\'()", ch) != 0;
|
return isalnum(ch) || strchr("-_.!~*\'()", ch) != 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1835,19 +1835,6 @@ BroString* analyzer::http::unescape_URI(const u_char* line, const u_char* line_e
|
||||||
byte_vec decoded_URI = new u_char[line_end - line + 1];
|
byte_vec decoded_URI = new u_char[line_end - line + 1];
|
||||||
byte_vec URI_p = decoded_URI;
|
byte_vec URI_p = decoded_URI;
|
||||||
|
|
||||||
// An 'unescaped_special_char' here means a character that *should*
|
|
||||||
// be escaped, but isn't in the URI. A control characters that
|
|
||||||
// appears directly in the URI would be an example. The RFC implies
|
|
||||||
// that if we do not unescape the URI that we see in the trace, every
|
|
||||||
// character should be a printable one -- either reserved or unreserved
|
|
||||||
// (or '%').
|
|
||||||
//
|
|
||||||
// Counting the number of unescaped characters and generating a weird
|
|
||||||
// event on URI's with unescaped characters (which are rare) will
|
|
||||||
// let us locate strange-looking URI's in the trace -- those URI's
|
|
||||||
// are often interesting.
|
|
||||||
int unescaped_special_char = 0;
|
|
||||||
|
|
||||||
while ( line < line_end )
|
while ( line < line_end )
|
||||||
{
|
{
|
||||||
if ( *line == '%' )
|
if ( *line == '%' )
|
||||||
|
@ -1881,6 +1868,36 @@ BroString* analyzer::http::unescape_URI(const u_char* line, const u_char* line_e
|
||||||
++line; // place line at the last hex digit
|
++line; // place line at the last hex digit
|
||||||
}
|
}
|
||||||
|
|
||||||
|
else if ( line_end - line >= 5 &&
|
||||||
|
line[0] == 'u' &&
|
||||||
|
isxdigit(line[1]) &&
|
||||||
|
isxdigit(line[2]) &&
|
||||||
|
isxdigit(line[3]) &&
|
||||||
|
isxdigit(line[4]) )
|
||||||
|
{
|
||||||
|
// Decode escaping like this: %u00AE
|
||||||
|
// The W3C rejected escaping this way, and
|
||||||
|
// there is no RFC that specifies it.
|
||||||
|
// Appparently there is some software doing
|
||||||
|
// this sort of 4 byte unicode encoding anyway.
|
||||||
|
// Likely causing an increase in it's use is
|
||||||
|
// the third edition of the ECMAScript spec
|
||||||
|
// having functions for encoding and decoding
|
||||||
|
// data in this format.
|
||||||
|
|
||||||
|
// If the first byte is null, let's eat it.
|
||||||
|
// It could just be ASCII encoded into this
|
||||||
|
// unicode escaping structure.
|
||||||
|
if ( ! (line[1] == '0' && line[2] == '0' ) )
|
||||||
|
*URI_p++ = (decode_hex(line[1]) << 4) +
|
||||||
|
decode_hex(line[2]);
|
||||||
|
|
||||||
|
*URI_p++ = (decode_hex(line[3]) << 4) +
|
||||||
|
decode_hex(line[4]);
|
||||||
|
|
||||||
|
line += 4;
|
||||||
|
}
|
||||||
|
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if ( analyzer )
|
if ( analyzer )
|
||||||
|
@ -1891,23 +1908,12 @@ BroString* analyzer::http::unescape_URI(const u_char* line, const u_char* line_e
|
||||||
}
|
}
|
||||||
|
|
||||||
else
|
else
|
||||||
{
|
|
||||||
if ( ! is_reserved_URI_char(*line) &&
|
|
||||||
! is_unreserved_URI_char(*line) )
|
|
||||||
// Count these up as a way to compress
|
|
||||||
// the corresponding Weird event to a
|
|
||||||
// single instance.
|
|
||||||
++unescaped_special_char;
|
|
||||||
*URI_p++ = *line;
|
*URI_p++ = *line;
|
||||||
}
|
|
||||||
|
|
||||||
++line;
|
++line;
|
||||||
}
|
}
|
||||||
|
|
||||||
URI_p[0] = 0;
|
URI_p[0] = 0;
|
||||||
|
|
||||||
if ( unescaped_special_char && analyzer )
|
|
||||||
analyzer->Weird("unescaped_special_URI_char");
|
|
||||||
|
|
||||||
return new BroString(1, decoded_URI, URI_p - decoded_URI);
|
return new BroString(1, decoded_URI, URI_p - decoded_URI);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue