Merge branch 'topic/timw/4218-lowercase-http'

* topic/timw/4218-lowercase-http:
  Ignore case when matching prefix in http analyzer
This commit is contained in:
Tim Wojtulewicz 2025-04-25 10:33:32 -07:00
commit 82bf555f7d
13 changed files with 103 additions and 19 deletions

View file

@ -1,3 +1,7 @@
7.2.0-dev.659 | 2025-04-25 10:33:32 -0700
* Ignore case when matching prefix in http analyzer (Kshitiz Bartariya)
7.2.0-dev.657 | 2025-04-25 10:25:01 -0700 7.2.0-dev.657 | 2025-04-25 10:25:01 -0700
* Updates for the various Broker changes (Christian Kreibich, Corelight) * Updates for the various Broker changes (Christian Kreibich, Corelight)

20
NEWS
View file

@ -6,14 +6,14 @@ release. For an exhaustive list of changes, see the ``CHANGES`` file
Zeek 7.2.0 Zeek 7.2.0
========== ==========
We would like to thank Aashish Sharma (@initconf), Anthony Verez (@netantho), We would like to thank Aashish Sharma (@initconf), Anthony Verez (@netantho), Anthony
Anthony Kasza (@anthonykasza), @biswajitutil, Brendan Kapp (@BrendanKapp), Kasza (@anthonykasza), @biswajitutil, Brendan Kapp (@BrendanKapp), Carlos Lopez, Chris
Carlos Lopez, Chris Hinshaw (@MMChrisHinshaw), Faan Rossouw (@faanross), Hinshaw (@MMChrisHinshaw), Faan Rossouw (@faanross), @FishyFluffer, Fupeng Zhao
@FishyFluffer, Fupeng Zhao (@AmazingPP), Herbert (@Herbert-Karl), @jbaggs, Jan (@AmazingPP), Herbert (@Herbert-Karl), @jbaggs, Jan Grashöfer (@J-Gras), Julian Krieger
Grashöfer (@J-Gras), Julian Krieger (@juliankrieger), Justin Azoff (@juliankrieger), Justin Azoff (@JustinAzoff), Kshitiz Bartariya (@kshitiz56), @Laotree,
(@JustinAzoff), @Laotree, Mark Overholser (@markoverholser), Mike Dopheide Mark Overholser (@markoverholser), Mike Dopheide (@dopheide-esnet), @mnhsrj, Mohan Dhawan
(@dopheide-esnet), @mnhsrj, Mohan Dhawan (@Mohan-Dhawan), @philipp-tg, Seth Hall (@Mohan-Dhawan), @philipp-tg, Seth Hall (@sethhall), and @timo-mue for their contributions
(@sethhall), and @timo-mue for their contributions to this release. to this release.
Breaking Changes Breaking Changes
---------------- ----------------
@ -162,6 +162,10 @@ New Functionality
restrictions on using libkrb5 only on Linux platforms was removed. CMake will now search restrictions on using libkrb5 only on Linux platforms was removed. CMake will now search
for it on all platforms as expected. for it on all platforms as expected.
- The HTTP analyzer now checks for the HTTP-name field to be case-insensitive, even though
the spec specifies that field must be uppercase. If a non-uppercase string is
encountered, a new ``lowercase_HTTP_keyword`` weird is emitted.
Changed Functionality Changed Functionality
--------------------- ---------------------

View file

@ -1 +1 @@
7.2.0-dev.657 7.2.0-dev.659

View file

@ -14,7 +14,7 @@ signature dpd_http_client {
signature dpd_http_server { signature dpd_http_server {
ip-proto == tcp ip-proto == tcp
payload /^HTTP\/[0-9]/ payload /^[hH][tT][tT][pP]\/[0-9]/
tcp-state responder tcp-state responder
enable "http" enable "http"
} }

View file

@ -1097,8 +1097,10 @@ void HTTP_Analyzer::GenStats() {
} }
} }
const char* HTTP_Analyzer::PrefixMatch(const char* line, const char* end_of_line, const char* prefix) { const char* HTTP_Analyzer::PrefixMatch(const char* line, const char* end_of_line, const char* prefix,
while ( *prefix && line < end_of_line && *prefix == *line ) { bool ignore_case) {
while ( *prefix && line < end_of_line &&
((ignore_case && tolower((unsigned char)*prefix) == tolower((unsigned char)*line)) || *prefix == *line) ) {
++prefix; ++prefix;
++line; ++line;
} }
@ -1110,8 +1112,9 @@ const char* HTTP_Analyzer::PrefixMatch(const char* line, const char* end_of_line
return line; return line;
} }
const char* HTTP_Analyzer::PrefixWordMatch(const char* line, const char* end_of_line, const char* prefix) { const char* HTTP_Analyzer::PrefixWordMatch(const char* line, const char* end_of_line, const char* prefix,
if ( (line = PrefixMatch(line, end_of_line, prefix)) == nullptr ) bool ignore_case) {
if ( (line = PrefixMatch(line, end_of_line, prefix, ignore_case)) == nullptr )
return nullptr; return nullptr;
const char* orig_line = line; const char* orig_line = line;
@ -1193,13 +1196,23 @@ bool HTTP_Analyzer::ParseRequest(const char* line, const char* end_of_line) {
const char* end_of_uri; const char* end_of_uri;
const char* version_start; const char* version_start;
const char* version_end; const char* version_end;
const char* match;
for ( end_of_uri = line; end_of_uri < end_of_line; ++end_of_uri ) { for ( end_of_uri = line; end_of_uri < end_of_line; ++end_of_uri ) {
if ( ! is_reserved_URI_char(*end_of_uri) && ! is_unreserved_URI_char(*end_of_uri) && *end_of_uri != '%' ) if ( ! is_reserved_URI_char(*end_of_uri) && ! is_unreserved_URI_char(*end_of_uri) && *end_of_uri != '%' )
break; break;
} }
if ( end_of_uri >= end_of_line && PrefixMatch(line, end_of_line, "HTTP/") ) { match = PrefixMatch(line, end_of_line, "HTTP/", false);
if ( ! match ) {
// If the uppercase version didn't match, try a case-insensitive version, but
// send a weird if it matches.
match = PrefixMatch(line, end_of_line, "HTTP/", true);
if ( match )
Weird("lowercase_HTTP_keyword");
}
if ( end_of_uri >= end_of_line && match ) {
Weird("missing_HTTP_uri"); Weird("missing_HTTP_uri");
end_of_uri = line; // Leave URI empty. end_of_uri = line; // Leave URI empty.
} }
@ -1207,8 +1220,14 @@ bool HTTP_Analyzer::ParseRequest(const char* line, const char* end_of_line) {
for ( version_start = end_of_uri; version_start < end_of_line; ++version_start ) { for ( version_start = end_of_uri; version_start < end_of_line; ++version_start ) {
end_of_uri = version_start; end_of_uri = version_start;
version_start = util::skip_whitespace(version_start, end_of_line); version_start = util::skip_whitespace(version_start, end_of_line);
if ( PrefixMatch(version_start, end_of_line, "HTTP/") ) if ( PrefixMatch(version_start, end_of_line, "HTTP/", false) )
break; break;
// If the uppercase version didn't match, try a case-insensitive version, but
// send a weird if it matches.
if ( PrefixMatch(version_start, end_of_line, "HTTP/", true) ) {
Weird("lowercase_HTTP_keyword");
break;
}
} }
if ( version_start >= end_of_line ) { if ( version_start >= end_of_line ) {
@ -1453,7 +1472,16 @@ const String* HTTP_Analyzer::UnansweredRequestMethod() {
int HTTP_Analyzer::HTTP_ReplyLine(const char* line, const char* end_of_line) { int HTTP_Analyzer::HTTP_ReplyLine(const char* line, const char* end_of_line) {
const char* rest; const char* rest;
if ( ! (rest = PrefixMatch(line, end_of_line, "HTTP/")) ) { rest = PrefixMatch(line, end_of_line, "HTTP/", false);
if ( ! rest ) {
// If the uppercase version didn't match, try a case-insensitive version, but
// send a weird if it matches.
rest = PrefixMatch(line, end_of_line, "HTTP/", true);
if ( rest )
Weird("lowercase_HTTP_keyword");
}
if ( ! rest ) {
// ##TODO: some server replies with an HTML document // ##TODO: some server replies with an HTML document
// without a status line and a MIME header, when the // without a status line and a MIME header, when the
// request is malformed. // request is malformed.

View file

@ -209,8 +209,9 @@ protected:
void InitHTTPMessage(analyzer::tcp::ContentLine_Analyzer* cl, HTTP_Message*& message, bool is_orig, int expect_body, void InitHTTPMessage(analyzer::tcp::ContentLine_Analyzer* cl, HTTP_Message*& message, bool is_orig, int expect_body,
int64_t init_header_length); int64_t init_header_length);
const char* PrefixMatch(const char* line, const char* end_of_line, const char* prefix); const char* PrefixMatch(const char* line, const char* end_of_line, const char* prefix, bool ignore_case = false);
const char* PrefixWordMatch(const char* line, const char* end_of_line, const char* prefix); const char* PrefixWordMatch(const char* line, const char* end_of_line, const char* prefix,
bool ignore_case = false);
bool ParseRequest(const char* line, const char* end_of_line); bool ParseRequest(const char* line, const char* end_of_line);
HTTP_VersionNumber HTTP_Version(int len, const char* data); HTTP_VersionNumber HTTP_Version(int len, const char* data);

View file

@ -0,0 +1,11 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
#separator \x09
#set_separator ,
#empty_field (empty)
#unset_field -
#path http
#open XXXX-XX-XX-XX-XX-XX
#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p trans_depth method host uri referrer version user_agent origin request_body_len response_body_len status_code status_msg info_code info_msg tags username password proxied orig_fuids orig_filenames orig_mime_types resp_fuids resp_filenames resp_mime_types
#types time string addr port addr port count string string string string string string string count count count string count string set[enum] string string set[string] vector[string] vector[string] vector[string] vector[string] vector[string] vector[string]
XXXXXXXXXX.XXXXXX CHhAvVGS1DHFjwGM9 127.0.0.1 49742 127.0.0.1 1234 1 GET 146.190.62.39 /index.html - 1.1 Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0 - 0 0 200 OK - - (empty) - - - - - - - - -
#close XXXX-XX-XX-XX-XX-XX

View file

@ -0,0 +1,11 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
#separator \x09
#set_separator ,
#empty_field (empty)
#unset_field -
#path http
#open XXXX-XX-XX-XX-XX-XX
#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p trans_depth method host uri referrer version user_agent origin request_body_len response_body_len status_code status_msg info_code info_msg tags username password proxied orig_fuids orig_filenames orig_mime_types resp_fuids resp_filenames resp_mime_types
#types time string addr port addr port count string string string string string string string count count count string count string set[enum] string string set[string] vector[string] vector[string] vector[string] vector[string] vector[string] vector[string]
XXXXXXXXXX.XXXXXX CHhAvVGS1DHFjwGM9 127.0.0.1 60618 127.0.0.1 80 1 GET 146.190.62.39 /index.html - 1.1 Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0 - 0 0 200 OK - - (empty) - - - - - - - - -
#close XXXX-XX-XX-XX-XX-XX

View file

@ -0,0 +1,2 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
CHhAvVGS1DHFjwGM9 lowercase_HTTP_keyword

View file

@ -0,0 +1,2 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
CHhAvVGS1DHFjwGM9 lowercase_HTTP_keyword

Binary file not shown.

View file

@ -0,0 +1,21 @@
# This tests whether the HTTP analyzer handles HTTP keyword in lower case correctly or not.
#
# @TEST-EXEC: zeek -C -b -r $TRACES/http/http-lower-case.pcap %INPUT
# @TEST-EXEC: ! test -f dpd.log
# @TEST-EXEC: ! test -f analyzer.log
# @TEST-EXEC: zeek-cut uid name < weird.log > weird.log.standard
# @TEST-EXEC: btest-diff weird.log.standard
# @TEST-EXEC: mv http.log http.log.standard
# @TEST-EXEC: btest-diff http.log.standard
# @TEST-EXEC: rm *.log
# @TEST-EXEC: zeek -C -b -r $TRACES/http/http-lower-case-nonstandard-port.pcap %INPUT
# @TEST-EXEC: ! test -f dpd.log
# @TEST-EXEC: ! test -f analyzer.log
# @TEST-EXEC: zeek-cut uid name < weird.log > weird.log.nonstandard
# @TEST-EXEC: btest-diff weird.log.nonstandard
# @TEST-EXEC: mv http.log http.log.nonstandard
# @TEST-EXEC: btest-diff http.log.nonstandard
@load base/protocols/http