Merge branch 'topic/timw/4218-lowercase-http'

* topic/timw/4218-lowercase-http:
  Ignore case when matching prefix in http analyzer
This commit is contained in:
Tim Wojtulewicz 2025-04-25 10:33:32 -07:00
commit 82bf555f7d
13 changed files with 103 additions and 19 deletions

View file

@ -1,3 +1,7 @@
7.2.0-dev.659 | 2025-04-25 10:33:32 -0700
* Ignore case when matching prefix in http analyzer (Kshitiz Bartariya)
7.2.0-dev.657 | 2025-04-25 10:25:01 -0700
* Updates for the various Broker changes (Christian Kreibich, Corelight)

20
NEWS
View file

@ -6,14 +6,14 @@ release. For an exhaustive list of changes, see the ``CHANGES`` file
Zeek 7.2.0
==========
We would like to thank Aashish Sharma (@initconf), Anthony Verez (@netantho),
Anthony Kasza (@anthonykasza), @biswajitutil, Brendan Kapp (@BrendanKapp),
Carlos Lopez, Chris Hinshaw (@MMChrisHinshaw), Faan Rossouw (@faanross),
@FishyFluffer, Fupeng Zhao (@AmazingPP), Herbert (@Herbert-Karl), @jbaggs, Jan
Grashöfer (@J-Gras), Julian Krieger (@juliankrieger), Justin Azoff
(@JustinAzoff), @Laotree, Mark Overholser (@markoverholser), Mike Dopheide
(@dopheide-esnet), @mnhsrj, Mohan Dhawan (@Mohan-Dhawan), @philipp-tg, Seth Hall
(@sethhall), and @timo-mue for their contributions to this release.
We would like to thank Aashish Sharma (@initconf), Anthony Verez (@netantho), Anthony
Kasza (@anthonykasza), @biswajitutil, Brendan Kapp (@BrendanKapp), Carlos Lopez, Chris
Hinshaw (@MMChrisHinshaw), Faan Rossouw (@faanross), @FishyFluffer, Fupeng Zhao
(@AmazingPP), Herbert (@Herbert-Karl), @jbaggs, Jan Grashöfer (@J-Gras), Julian Krieger
(@juliankrieger), Justin Azoff (@JustinAzoff), Kshitiz Bartariya (@kshitiz56), @Laotree,
Mark Overholser (@markoverholser), Mike Dopheide (@dopheide-esnet), @mnhsrj, Mohan Dhawan
(@Mohan-Dhawan), @philipp-tg, Seth Hall (@sethhall), and @timo-mue for their contributions
to this release.
Breaking Changes
----------------
@ -162,6 +162,10 @@ New Functionality
restrictions on using libkrb5 only on Linux platforms was removed. CMake will now search
for it on all platforms as expected.
- The HTTP analyzer now checks for the HTTP-name field to be case-insensitive, even though
the spec specifies that field must be uppercase. If a non-uppercase string is
encountered, a new ``lowercase_HTTP_keyword`` weird is emitted.
Changed Functionality
---------------------

View file

@ -1 +1 @@
7.2.0-dev.657
7.2.0-dev.659

View file

@ -14,7 +14,7 @@ signature dpd_http_client {
signature dpd_http_server {
ip-proto == tcp
payload /^HTTP\/[0-9]/
payload /^[hH][tT][tT][pP]\/[0-9]/
tcp-state responder
enable "http"
}

View file

@ -1097,8 +1097,10 @@ void HTTP_Analyzer::GenStats() {
}
}
const char* HTTP_Analyzer::PrefixMatch(const char* line, const char* end_of_line, const char* prefix) {
while ( *prefix && line < end_of_line && *prefix == *line ) {
const char* HTTP_Analyzer::PrefixMatch(const char* line, const char* end_of_line, const char* prefix,
bool ignore_case) {
while ( *prefix && line < end_of_line &&
((ignore_case && tolower((unsigned char)*prefix) == tolower((unsigned char)*line)) || *prefix == *line) ) {
++prefix;
++line;
}
@ -1110,8 +1112,9 @@ const char* HTTP_Analyzer::PrefixMatch(const char* line, const char* end_of_line
return line;
}
const char* HTTP_Analyzer::PrefixWordMatch(const char* line, const char* end_of_line, const char* prefix) {
if ( (line = PrefixMatch(line, end_of_line, prefix)) == nullptr )
const char* HTTP_Analyzer::PrefixWordMatch(const char* line, const char* end_of_line, const char* prefix,
bool ignore_case) {
if ( (line = PrefixMatch(line, end_of_line, prefix, ignore_case)) == nullptr )
return nullptr;
const char* orig_line = line;
@ -1193,13 +1196,23 @@ bool HTTP_Analyzer::ParseRequest(const char* line, const char* end_of_line) {
const char* end_of_uri;
const char* version_start;
const char* version_end;
const char* match;
for ( end_of_uri = line; end_of_uri < end_of_line; ++end_of_uri ) {
if ( ! is_reserved_URI_char(*end_of_uri) && ! is_unreserved_URI_char(*end_of_uri) && *end_of_uri != '%' )
break;
}
if ( end_of_uri >= end_of_line && PrefixMatch(line, end_of_line, "HTTP/") ) {
match = PrefixMatch(line, end_of_line, "HTTP/", false);
if ( ! match ) {
// If the uppercase version didn't match, try a case-insensitive version, but
// send a weird if it matches.
match = PrefixMatch(line, end_of_line, "HTTP/", true);
if ( match )
Weird("lowercase_HTTP_keyword");
}
if ( end_of_uri >= end_of_line && match ) {
Weird("missing_HTTP_uri");
end_of_uri = line; // Leave URI empty.
}
@ -1207,8 +1220,14 @@ bool HTTP_Analyzer::ParseRequest(const char* line, const char* end_of_line) {
for ( version_start = end_of_uri; version_start < end_of_line; ++version_start ) {
end_of_uri = version_start;
version_start = util::skip_whitespace(version_start, end_of_line);
if ( PrefixMatch(version_start, end_of_line, "HTTP/") )
if ( PrefixMatch(version_start, end_of_line, "HTTP/", false) )
break;
// If the uppercase version didn't match, try a case-insensitive version, but
// send a weird if it matches.
if ( PrefixMatch(version_start, end_of_line, "HTTP/", true) ) {
Weird("lowercase_HTTP_keyword");
break;
}
}
if ( version_start >= end_of_line ) {
@ -1453,7 +1472,16 @@ const String* HTTP_Analyzer::UnansweredRequestMethod() {
int HTTP_Analyzer::HTTP_ReplyLine(const char* line, const char* end_of_line) {
const char* rest;
if ( ! (rest = PrefixMatch(line, end_of_line, "HTTP/")) ) {
rest = PrefixMatch(line, end_of_line, "HTTP/", false);
if ( ! rest ) {
// If the uppercase version didn't match, try a case-insensitive version, but
// send a weird if it matches.
rest = PrefixMatch(line, end_of_line, "HTTP/", true);
if ( rest )
Weird("lowercase_HTTP_keyword");
}
if ( ! rest ) {
// ##TODO: some server replies with an HTML document
// without a status line and a MIME header, when the
// request is malformed.

View file

@ -209,8 +209,9 @@ protected:
void InitHTTPMessage(analyzer::tcp::ContentLine_Analyzer* cl, HTTP_Message*& message, bool is_orig, int expect_body,
int64_t init_header_length);
const char* PrefixMatch(const char* line, const char* end_of_line, const char* prefix);
const char* PrefixWordMatch(const char* line, const char* end_of_line, const char* prefix);
const char* PrefixMatch(const char* line, const char* end_of_line, const char* prefix, bool ignore_case = false);
const char* PrefixWordMatch(const char* line, const char* end_of_line, const char* prefix,
bool ignore_case = false);
bool ParseRequest(const char* line, const char* end_of_line);
HTTP_VersionNumber HTTP_Version(int len, const char* data);

View file

@ -0,0 +1,11 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
#separator \x09
#set_separator ,
#empty_field (empty)
#unset_field -
#path http
#open XXXX-XX-XX-XX-XX-XX
#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p trans_depth method host uri referrer version user_agent origin request_body_len response_body_len status_code status_msg info_code info_msg tags username password proxied orig_fuids orig_filenames orig_mime_types resp_fuids resp_filenames resp_mime_types
#types time string addr port addr port count string string string string string string string count count count string count string set[enum] string string set[string] vector[string] vector[string] vector[string] vector[string] vector[string] vector[string]
XXXXXXXXXX.XXXXXX CHhAvVGS1DHFjwGM9 127.0.0.1 49742 127.0.0.1 1234 1 GET 146.190.62.39 /index.html - 1.1 Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0 - 0 0 200 OK - - (empty) - - - - - - - - -
#close XXXX-XX-XX-XX-XX-XX

View file

@ -0,0 +1,11 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
#separator \x09
#set_separator ,
#empty_field (empty)
#unset_field -
#path http
#open XXXX-XX-XX-XX-XX-XX
#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p trans_depth method host uri referrer version user_agent origin request_body_len response_body_len status_code status_msg info_code info_msg tags username password proxied orig_fuids orig_filenames orig_mime_types resp_fuids resp_filenames resp_mime_types
#types time string addr port addr port count string string string string string string string count count count string count string set[enum] string string set[string] vector[string] vector[string] vector[string] vector[string] vector[string] vector[string]
XXXXXXXXXX.XXXXXX CHhAvVGS1DHFjwGM9 127.0.0.1 60618 127.0.0.1 80 1 GET 146.190.62.39 /index.html - 1.1 Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0 - 0 0 200 OK - - (empty) - - - - - - - - -
#close XXXX-XX-XX-XX-XX-XX

View file

@ -0,0 +1,2 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
CHhAvVGS1DHFjwGM9 lowercase_HTTP_keyword

View file

@ -0,0 +1,2 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
CHhAvVGS1DHFjwGM9 lowercase_HTTP_keyword

Binary file not shown.

View file

@ -0,0 +1,21 @@
# This tests whether the HTTP analyzer handles HTTP keyword in lower case correctly or not.
#
# @TEST-EXEC: zeek -C -b -r $TRACES/http/http-lower-case.pcap %INPUT
# @TEST-EXEC: ! test -f dpd.log
# @TEST-EXEC: ! test -f analyzer.log
# @TEST-EXEC: zeek-cut uid name < weird.log > weird.log.standard
# @TEST-EXEC: btest-diff weird.log.standard
# @TEST-EXEC: mv http.log http.log.standard
# @TEST-EXEC: btest-diff http.log.standard
# @TEST-EXEC: rm *.log
# @TEST-EXEC: zeek -C -b -r $TRACES/http/http-lower-case-nonstandard-port.pcap %INPUT
# @TEST-EXEC: ! test -f dpd.log
# @TEST-EXEC: ! test -f analyzer.log
# @TEST-EXEC: zeek-cut uid name < weird.log > weird.log.nonstandard
# @TEST-EXEC: btest-diff weird.log.nonstandard
# @TEST-EXEC: mv http.log http.log.nonstandard
# @TEST-EXEC: btest-diff http.log.nonstandard
@load base/protocols/http