Merge branch 'topic/timw/4218-lowercase-http'

* topic/timw/4218-lowercase-http: Ignore case when matching prefix in http analyzer
2025-10-02 06:38:20 +00:00 · 2025-04-25 10:33:32 -07:00 · 2025-04-25 10:33:32 -07:00 · 82bf555f7d
commit 82bf555f7d
parent 4f65b89edf 40935c31b1
13 changed files with 103 additions and 19 deletions
--- a/4
+++ b/4
@ -1,3 +1,7 @@
+7.2.0-dev.659 | 2025-04-25 10:33:32 -0700
+
+  * Ignore case when matching prefix in http analyzer (Kshitiz Bartariya)
+
 7.2.0-dev.657 | 2025-04-25 10:25:01 -0700

  * Updates for the various Broker changes (Christian Kreibich, Corelight)
--- a/20
+++ b/20
@ -6,14 +6,14 @@ release. For an exhaustive list of changes, see the ``CHANGES`` file
 Zeek 7.2.0
 ==========

-We would like to thank Aashish Sharma (@initconf), Anthony Verez (@netantho),
-Anthony Kasza (@anthonykasza), @biswajitutil, Brendan Kapp (@BrendanKapp),
-Carlos Lopez, Chris Hinshaw (@MMChrisHinshaw), Faan Rossouw (@faanross),
-@FishyFluffer, Fupeng Zhao (@AmazingPP), Herbert (@Herbert-Karl), @jbaggs, Jan
-Grashöfer (@J-Gras), Julian Krieger (@juliankrieger), Justin Azoff
-(@JustinAzoff), @Laotree, Mark Overholser (@markoverholser), Mike Dopheide
-(@dopheide-esnet), @mnhsrj, Mohan Dhawan (@Mohan-Dhawan), @philipp-tg, Seth Hall
-(@sethhall), and @timo-mue for their contributions to this release.
+We would like to thank Aashish Sharma (@initconf), Anthony Verez (@netantho), Anthony
+Kasza (@anthonykasza), @biswajitutil, Brendan Kapp (@BrendanKapp), Carlos Lopez, Chris
+Hinshaw (@MMChrisHinshaw), Faan Rossouw (@faanross), @FishyFluffer, Fupeng Zhao
+(@AmazingPP), Herbert (@Herbert-Karl), @jbaggs, Jan Grashöfer (@J-Gras), Julian Krieger
+(@juliankrieger), Justin Azoff (@JustinAzoff), Kshitiz Bartariya (@kshitiz56), @Laotree,
+Mark Overholser (@markoverholser), Mike Dopheide (@dopheide-esnet), @mnhsrj, Mohan Dhawan
+(@Mohan-Dhawan), @philipp-tg, Seth Hall (@sethhall), and @timo-mue for their contributions
+to this release.

 Breaking Changes
 ----------------
@ -162,6 +162,10 @@ New Functionality
  restrictions on using libkrb5 only on Linux platforms was removed. CMake will now search
  for it on all platforms as expected.

+- The HTTP analyzer now checks for the HTTP-name field to be case-insensitive, even though
+  the spec specifies that field must be uppercase. If a non-uppercase string is
+  encountered, a new ``lowercase_HTTP_keyword`` weird is emitted.
+
 Changed Functionality
 ---------------------

--- a/2
+++ b/2
@ -1 +1 @@
-7.2.0-dev.657
+7.2.0-dev.659
--- a/scripts/base/protocols/http/dpd.sig
+++ b/scripts/base/protocols/http/dpd.sig
@ -14,7 +14,7 @@ signature dpd_http_client {

 signature dpd_http_server {
  ip-proto == tcp
-  payload /^HTTP\/[0-9]/
+  payload /^[hH][tT][tT][pP]\/[0-9]/
  tcp-state responder
  enable "http"
 }
--- a/src/analyzer/protocol/http/HTTP.cc
+++ b/src/analyzer/protocol/http/HTTP.cc
@ -1097,8 +1097,10 @@ void HTTP_Analyzer::GenStats() {
    }
 }

-const char* HTTP_Analyzer::PrefixMatch(const char* line, const char* end_of_line, const char* prefix) {
-    while ( *prefix && line < end_of_line && *prefix == *line ) {
+const char* HTTP_Analyzer::PrefixMatch(const char* line, const char* end_of_line, const char* prefix,
+                                       bool ignore_case) {
+    while ( *prefix && line < end_of_line &&
+            ((ignore_case && tolower((unsigned char)*prefix) == tolower((unsigned char)*line)) || *prefix == *line) ) {
        ++prefix;
        ++line;
    }
@ -1110,8 +1112,9 @@ const char* HTTP_Analyzer::PrefixMatch(const char* line, const char* end_of_line
    return line;
 }

-const char* HTTP_Analyzer::PrefixWordMatch(const char* line, const char* end_of_line, const char* prefix) {
-    if ( (line = PrefixMatch(line, end_of_line, prefix)) == nullptr )
+const char* HTTP_Analyzer::PrefixWordMatch(const char* line, const char* end_of_line, const char* prefix,
+                                           bool ignore_case) {
+    if ( (line = PrefixMatch(line, end_of_line, prefix, ignore_case)) == nullptr )
        return nullptr;

    const char* orig_line = line;
@ -1193,13 +1196,23 @@ bool HTTP_Analyzer::ParseRequest(const char* line, const char* end_of_line) {
    const char* end_of_uri;
    const char* version_start;
    const char* version_end;
+    const char* match;

    for ( end_of_uri = line; end_of_uri < end_of_line; ++end_of_uri ) {
        if ( ! is_reserved_URI_char(*end_of_uri) && ! is_unreserved_URI_char(*end_of_uri) && *end_of_uri != '%' )
            break;
    }

-    if ( end_of_uri >= end_of_line && PrefixMatch(line, end_of_line, "HTTP/") ) {
+    match = PrefixMatch(line, end_of_line, "HTTP/", false);
+    if ( ! match ) {
+        // If the uppercase version didn't match, try a case-insensitive version, but
+        // send a weird if it matches.
+        match = PrefixMatch(line, end_of_line, "HTTP/", true);
+        if ( match )
+            Weird("lowercase_HTTP_keyword");
+    }
+
+    if ( end_of_uri >= end_of_line && match ) {
        Weird("missing_HTTP_uri");
        end_of_uri = line; // Leave URI empty.
    }
@ -1207,8 +1220,14 @@ bool HTTP_Analyzer::ParseRequest(const char* line, const char* end_of_line) {
    for ( version_start = end_of_uri; version_start < end_of_line; ++version_start ) {
        end_of_uri = version_start;
        version_start = util::skip_whitespace(version_start, end_of_line);
-        if ( PrefixMatch(version_start, end_of_line, "HTTP/") )
+        if ( PrefixMatch(version_start, end_of_line, "HTTP/", false) )
            break;
+        // If the uppercase version didn't match, try a case-insensitive version, but
+        // send a weird if it matches.
+        if ( PrefixMatch(version_start, end_of_line, "HTTP/", true) ) {
+            Weird("lowercase_HTTP_keyword");
+            break;
+        }
    }

    if ( version_start >= end_of_line ) {
@ -1453,7 +1472,16 @@ const String* HTTP_Analyzer::UnansweredRequestMethod() {
 int HTTP_Analyzer::HTTP_ReplyLine(const char* line, const char* end_of_line) {
    const char* rest;

-    if ( ! (rest = PrefixMatch(line, end_of_line, "HTTP/")) ) {
+    rest = PrefixMatch(line, end_of_line, "HTTP/", false);
+    if ( ! rest ) {
+        // If the uppercase version didn't match, try a case-insensitive version, but
+        // send a weird if it matches.
+        rest = PrefixMatch(line, end_of_line, "HTTP/", true);
+        if ( rest )
+            Weird("lowercase_HTTP_keyword");
+    }
+
+    if ( ! rest ) {
        // ##TODO: some server replies with an HTML document
        // without a status line and a MIME header, when the
        // request is malformed.
--- a/src/analyzer/protocol/http/HTTP.h
+++ b/src/analyzer/protocol/http/HTTP.h
@ -209,8 +209,9 @@ protected:
    void InitHTTPMessage(analyzer::tcp::ContentLine_Analyzer* cl, HTTP_Message*& message, bool is_orig, int expect_body,
                         int64_t init_header_length);

-    const char* PrefixMatch(const char* line, const char* end_of_line, const char* prefix);
-    const char* PrefixWordMatch(const char* line, const char* end_of_line, const char* prefix);
+    const char* PrefixMatch(const char* line, const char* end_of_line, const char* prefix, bool ignore_case = false);
+    const char* PrefixWordMatch(const char* line, const char* end_of_line, const char* prefix,
+                                bool ignore_case = false);

    bool ParseRequest(const char* line, const char* end_of_line);
    HTTP_VersionNumber HTTP_Version(int len, const char* data);
--- a/testing/btest/Baseline/scripts.base.protocols.http.http-lower-case/http.log.nonstandard
+++ b/testing/btest/Baseline/scripts.base.protocols.http.http-lower-case/http.log.nonstandard
@ -0,0 +1,11 @@
+### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
+#separator \x09
+#set_separator	,
+#empty_field	(empty)
+#unset_field	-
+#path	http
+#open XXXX-XX-XX-XX-XX-XX
+#fields	ts	uid	id.orig_h	id.orig_p	id.resp_h	id.resp_p	trans_depth	method	host	uri	referrer	version	user_agent	origin	request_body_len	response_body_len	status_code	status_msg	info_code	info_msg	tags	username	password	proxied	orig_fuids	orig_filenames	orig_mime_types	resp_fuids	resp_filenames	resp_mime_types
+#types	time	string	addr	port	addr	port	count	string	string	string	string	string	string	string	count	count	count	string	count	string	set[enum]	string	string	set[string]	vector[string]	vector[string]	vector[string]	vector[string]	vector[string]	vector[string]
+XXXXXXXXXX.XXXXXX	CHhAvVGS1DHFjwGM9	127.0.0.1	49742	127.0.0.1	1234	1	GET	146.190.62.39	/index.html	-	1.1	Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0	-	0	0	200	OK	-	-	(empty)	-	-	-	-	-	-	-	-	-
+#close XXXX-XX-XX-XX-XX-XX
--- a/testing/btest/Baseline/scripts.base.protocols.http.http-lower-case/http.log.standard
+++ b/testing/btest/Baseline/scripts.base.protocols.http.http-lower-case/http.log.standard
@ -0,0 +1,11 @@
+### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
+#separator \x09
+#set_separator	,
+#empty_field	(empty)
+#unset_field	-
+#path	http
+#open XXXX-XX-XX-XX-XX-XX
+#fields	ts	uid	id.orig_h	id.orig_p	id.resp_h	id.resp_p	trans_depth	method	host	uri	referrer	version	user_agent	origin	request_body_len	response_body_len	status_code	status_msg	info_code	info_msg	tags	username	password	proxied	orig_fuids	orig_filenames	orig_mime_types	resp_fuids	resp_filenames	resp_mime_types
+#types	time	string	addr	port	addr	port	count	string	string	string	string	string	string	string	count	count	count	string	count	string	set[enum]	string	string	set[string]	vector[string]	vector[string]	vector[string]	vector[string]	vector[string]	vector[string]
+XXXXXXXXXX.XXXXXX	CHhAvVGS1DHFjwGM9	127.0.0.1	60618	127.0.0.1	80	1	GET	146.190.62.39	/index.html	-	1.1	Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0	-	0	0	200	OK	-	-	(empty)	-	-	-	-	-	-	-	-	-
+#close XXXX-XX-XX-XX-XX-XX
--- a/testing/btest/Baseline/scripts.base.protocols.http.http-lower-case/weird.log.nonstandard
+++ b/testing/btest/Baseline/scripts.base.protocols.http.http-lower-case/weird.log.nonstandard
@ -0,0 +1,2 @@
+### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
+CHhAvVGS1DHFjwGM9	lowercase_HTTP_keyword
--- a/testing/btest/Baseline/scripts.base.protocols.http.http-lower-case/weird.log.standard
+++ b/testing/btest/Baseline/scripts.base.protocols.http.http-lower-case/weird.log.standard
@ -0,0 +1,2 @@
+### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
+CHhAvVGS1DHFjwGM9	lowercase_HTTP_keyword
--- a/testing/btest/Traces/http/http-lower-case-nonstandard-port.pcap
+++ b/testing/btest/Traces/http/http-lower-case-nonstandard-port.pcap
--- a/testing/btest/Traces/http/http-lower-case.pcap
+++ b/testing/btest/Traces/http/http-lower-case.pcap
--- a/testing/btest/scripts/base/protocols/http/http-lower-case.zeek
+++ b/testing/btest/scripts/base/protocols/http/http-lower-case.zeek
@ -0,0 +1,21 @@
+# This tests whether the HTTP analyzer handles HTTP keyword in lower case correctly or not.
+#
+# @TEST-EXEC: zeek -C -b -r $TRACES/http/http-lower-case.pcap %INPUT
+# @TEST-EXEC: ! test -f dpd.log
+# @TEST-EXEC: ! test -f analyzer.log
+# @TEST-EXEC: zeek-cut uid name < weird.log > weird.log.standard
+# @TEST-EXEC: btest-diff weird.log.standard
+# @TEST-EXEC: mv http.log http.log.standard
+# @TEST-EXEC: btest-diff http.log.standard
+
+# @TEST-EXEC: rm *.log
+
+# @TEST-EXEC: zeek -C -b -r $TRACES/http/http-lower-case-nonstandard-port.pcap %INPUT
+# @TEST-EXEC: ! test -f dpd.log
+# @TEST-EXEC: ! test -f analyzer.log
+# @TEST-EXEC: zeek-cut uid name < weird.log > weird.log.nonstandard
+# @TEST-EXEC: btest-diff weird.log.nonstandard
+# @TEST-EXEC: mv http.log http.log.nonstandard
+# @TEST-EXEC: btest-diff http.log.nonstandard
+
+@load base/protocols/http