From 40935c31b1bc548f73e298f7c8068db72a7393a6 Mon Sep 17 00:00:00 2001 From: Kshitiz Bartariya Date: Mon, 21 Apr 2025 17:57:42 +0530 Subject: [PATCH] Ignore case when matching prefix in http analyzer --- NEWS | 20 +++++---- scripts/base/protocols/http/dpd.sig | 2 +- src/analyzer/protocol/http/HTTP.cc | 42 +++++++++++++++--- src/analyzer/protocol/http/HTTP.h | 5 ++- .../http.log.nonstandard | 11 +++++ .../http.log.standard | 11 +++++ .../weird.log.nonstandard | 2 + .../weird.log.standard | 2 + .../http-lower-case-nonstandard-port.pcap | Bin 0 -> 2087 bytes .../btest/Traces/http/http-lower-case.pcap | Bin 0 -> 2087 bytes .../base/protocols/http/http-lower-case.zeek | 21 +++++++++ 11 files changed, 98 insertions(+), 18 deletions(-) create mode 100644 testing/btest/Baseline/scripts.base.protocols.http.http-lower-case/http.log.nonstandard create mode 100644 testing/btest/Baseline/scripts.base.protocols.http.http-lower-case/http.log.standard create mode 100644 testing/btest/Baseline/scripts.base.protocols.http.http-lower-case/weird.log.nonstandard create mode 100644 testing/btest/Baseline/scripts.base.protocols.http.http-lower-case/weird.log.standard create mode 100644 testing/btest/Traces/http/http-lower-case-nonstandard-port.pcap create mode 100644 testing/btest/Traces/http/http-lower-case.pcap create mode 100644 testing/btest/scripts/base/protocols/http/http-lower-case.zeek diff --git a/NEWS b/NEWS index e65cf38e14..50c4faf128 100644 --- a/NEWS +++ b/NEWS @@ -6,14 +6,14 @@ release. For an exhaustive list of changes, see the ``CHANGES`` file Zeek 7.2.0 ========== -We would like to thank Aashish Sharma (@initconf), Anthony Verez (@netantho), -Anthony Kasza (@anthonykasza), @biswajitutil, Brendan Kapp (@BrendanKapp), -Carlos Lopez, Chris Hinshaw (@MMChrisHinshaw), Faan Rossouw (@faanross), -@FishyFluffer, Fupeng Zhao (@AmazingPP), Herbert (@Herbert-Karl), @jbaggs, Jan -Grashöfer (@J-Gras), Julian Krieger (@juliankrieger), Justin Azoff -(@JustinAzoff), @Laotree, Mark Overholser (@markoverholser), Mike Dopheide -(@dopheide-esnet), @mnhsrj, Mohan Dhawan (@Mohan-Dhawan), @philipp-tg, Seth Hall -(@sethhall), and @timo-mue for their contributions to this release. +We would like to thank Aashish Sharma (@initconf), Anthony Verez (@netantho), Anthony +Kasza (@anthonykasza), @biswajitutil, Brendan Kapp (@BrendanKapp), Carlos Lopez, Chris +Hinshaw (@MMChrisHinshaw), Faan Rossouw (@faanross), @FishyFluffer, Fupeng Zhao +(@AmazingPP), Herbert (@Herbert-Karl), @jbaggs, Jan Grashöfer (@J-Gras), Julian Krieger +(@juliankrieger), Justin Azoff (@JustinAzoff), Kshitiz Bartariya (@kshitiz56), @Laotree, +Mark Overholser (@markoverholser), Mike Dopheide (@dopheide-esnet), @mnhsrj, Mohan Dhawan +(@Mohan-Dhawan), @philipp-tg, Seth Hall (@sethhall), and @timo-mue for their contributions +to this release. Breaking Changes ---------------- @@ -162,6 +162,10 @@ New Functionality restrictions on using libkrb5 only on Linux platforms was removed. CMake will now search for it on all platforms as expected. +- The HTTP analyzer now checks for the HTTP-name field to be case-insensitive, even though + the spec specifies that field must be uppercase. If a non-uppercase string is + encountered, a new ``lowercase_HTTP_keyword`` weird is emitted. + Changed Functionality --------------------- diff --git a/scripts/base/protocols/http/dpd.sig b/scripts/base/protocols/http/dpd.sig index 8412f6c1f8..e8186756e1 100644 --- a/scripts/base/protocols/http/dpd.sig +++ b/scripts/base/protocols/http/dpd.sig @@ -14,7 +14,7 @@ signature dpd_http_client { signature dpd_http_server { ip-proto == tcp - payload /^HTTP\/[0-9]/ + payload /^[hH][tT][tT][pP]\/[0-9]/ tcp-state responder enable "http" } diff --git a/src/analyzer/protocol/http/HTTP.cc b/src/analyzer/protocol/http/HTTP.cc index 3d5bdc246f..a1346a781f 100644 --- a/src/analyzer/protocol/http/HTTP.cc +++ b/src/analyzer/protocol/http/HTTP.cc @@ -1097,8 +1097,10 @@ void HTTP_Analyzer::GenStats() { } } -const char* HTTP_Analyzer::PrefixMatch(const char* line, const char* end_of_line, const char* prefix) { - while ( *prefix && line < end_of_line && *prefix == *line ) { +const char* HTTP_Analyzer::PrefixMatch(const char* line, const char* end_of_line, const char* prefix, + bool ignore_case) { + while ( *prefix && line < end_of_line && + ((ignore_case && tolower((unsigned char)*prefix) == tolower((unsigned char)*line)) || *prefix == *line) ) { ++prefix; ++line; } @@ -1110,8 +1112,9 @@ const char* HTTP_Analyzer::PrefixMatch(const char* line, const char* end_of_line return line; } -const char* HTTP_Analyzer::PrefixWordMatch(const char* line, const char* end_of_line, const char* prefix) { - if ( (line = PrefixMatch(line, end_of_line, prefix)) == nullptr ) +const char* HTTP_Analyzer::PrefixWordMatch(const char* line, const char* end_of_line, const char* prefix, + bool ignore_case) { + if ( (line = PrefixMatch(line, end_of_line, prefix, ignore_case)) == nullptr ) return nullptr; const char* orig_line = line; @@ -1193,13 +1196,23 @@ bool HTTP_Analyzer::ParseRequest(const char* line, const char* end_of_line) { const char* end_of_uri; const char* version_start; const char* version_end; + const char* match; for ( end_of_uri = line; end_of_uri < end_of_line; ++end_of_uri ) { if ( ! is_reserved_URI_char(*end_of_uri) && ! is_unreserved_URI_char(*end_of_uri) && *end_of_uri != '%' ) break; } - if ( end_of_uri >= end_of_line && PrefixMatch(line, end_of_line, "HTTP/") ) { + match = PrefixMatch(line, end_of_line, "HTTP/", false); + if ( ! match ) { + // If the uppercase version didn't match, try a case-insensitive version, but + // send a weird if it matches. + match = PrefixMatch(line, end_of_line, "HTTP/", true); + if ( match ) + Weird("lowercase_HTTP_keyword"); + } + + if ( end_of_uri >= end_of_line && match ) { Weird("missing_HTTP_uri"); end_of_uri = line; // Leave URI empty. } @@ -1207,8 +1220,14 @@ bool HTTP_Analyzer::ParseRequest(const char* line, const char* end_of_line) { for ( version_start = end_of_uri; version_start < end_of_line; ++version_start ) { end_of_uri = version_start; version_start = util::skip_whitespace(version_start, end_of_line); - if ( PrefixMatch(version_start, end_of_line, "HTTP/") ) + if ( PrefixMatch(version_start, end_of_line, "HTTP/", false) ) break; + // If the uppercase version didn't match, try a case-insensitive version, but + // send a weird if it matches. + if ( PrefixMatch(version_start, end_of_line, "HTTP/", true) ) { + Weird("lowercase_HTTP_keyword"); + break; + } } if ( version_start >= end_of_line ) { @@ -1453,7 +1472,16 @@ const String* HTTP_Analyzer::UnansweredRequestMethod() { int HTTP_Analyzer::HTTP_ReplyLine(const char* line, const char* end_of_line) { const char* rest; - if ( ! (rest = PrefixMatch(line, end_of_line, "HTTP/")) ) { + rest = PrefixMatch(line, end_of_line, "HTTP/", false); + if ( ! rest ) { + // If the uppercase version didn't match, try a case-insensitive version, but + // send a weird if it matches. + rest = PrefixMatch(line, end_of_line, "HTTP/", true); + if ( rest ) + Weird("lowercase_HTTP_keyword"); + } + + if ( ! rest ) { // ##TODO: some server replies with an HTML document // without a status line and a MIME header, when the // request is malformed. diff --git a/src/analyzer/protocol/http/HTTP.h b/src/analyzer/protocol/http/HTTP.h index 15feb9e313..ffe57a187b 100644 --- a/src/analyzer/protocol/http/HTTP.h +++ b/src/analyzer/protocol/http/HTTP.h @@ -209,8 +209,9 @@ protected: void InitHTTPMessage(analyzer::tcp::ContentLine_Analyzer* cl, HTTP_Message*& message, bool is_orig, int expect_body, int64_t init_header_length); - const char* PrefixMatch(const char* line, const char* end_of_line, const char* prefix); - const char* PrefixWordMatch(const char* line, const char* end_of_line, const char* prefix); + const char* PrefixMatch(const char* line, const char* end_of_line, const char* prefix, bool ignore_case = false); + const char* PrefixWordMatch(const char* line, const char* end_of_line, const char* prefix, + bool ignore_case = false); bool ParseRequest(const char* line, const char* end_of_line); HTTP_VersionNumber HTTP_Version(int len, const char* data); diff --git a/testing/btest/Baseline/scripts.base.protocols.http.http-lower-case/http.log.nonstandard b/testing/btest/Baseline/scripts.base.protocols.http.http-lower-case/http.log.nonstandard new file mode 100644 index 0000000000..c2dc134cca --- /dev/null +++ b/testing/btest/Baseline/scripts.base.protocols.http.http-lower-case/http.log.nonstandard @@ -0,0 +1,11 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +#separator \x09 +#set_separator , +#empty_field (empty) +#unset_field - +#path http +#open XXXX-XX-XX-XX-XX-XX +#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p trans_depth method host uri referrer version user_agent origin request_body_len response_body_len status_code status_msg info_code info_msg tags username password proxied orig_fuids orig_filenames orig_mime_types resp_fuids resp_filenames resp_mime_types +#types time string addr port addr port count string string string string string string string count count count string count string set[enum] string string set[string] vector[string] vector[string] vector[string] vector[string] vector[string] vector[string] +XXXXXXXXXX.XXXXXX CHhAvVGS1DHFjwGM9 127.0.0.1 49742 127.0.0.1 1234 1 GET 146.190.62.39 /index.html - 1.1 Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0 - 0 0 200 OK - - (empty) - - - - - - - - - +#close XXXX-XX-XX-XX-XX-XX diff --git a/testing/btest/Baseline/scripts.base.protocols.http.http-lower-case/http.log.standard b/testing/btest/Baseline/scripts.base.protocols.http.http-lower-case/http.log.standard new file mode 100644 index 0000000000..8d7b905bf3 --- /dev/null +++ b/testing/btest/Baseline/scripts.base.protocols.http.http-lower-case/http.log.standard @@ -0,0 +1,11 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +#separator \x09 +#set_separator , +#empty_field (empty) +#unset_field - +#path http +#open XXXX-XX-XX-XX-XX-XX +#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p trans_depth method host uri referrer version user_agent origin request_body_len response_body_len status_code status_msg info_code info_msg tags username password proxied orig_fuids orig_filenames orig_mime_types resp_fuids resp_filenames resp_mime_types +#types time string addr port addr port count string string string string string string string count count count string count string set[enum] string string set[string] vector[string] vector[string] vector[string] vector[string] vector[string] vector[string] +XXXXXXXXXX.XXXXXX CHhAvVGS1DHFjwGM9 127.0.0.1 60618 127.0.0.1 80 1 GET 146.190.62.39 /index.html - 1.1 Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0 - 0 0 200 OK - - (empty) - - - - - - - - - +#close XXXX-XX-XX-XX-XX-XX diff --git a/testing/btest/Baseline/scripts.base.protocols.http.http-lower-case/weird.log.nonstandard b/testing/btest/Baseline/scripts.base.protocols.http.http-lower-case/weird.log.nonstandard new file mode 100644 index 0000000000..5cb41592a6 --- /dev/null +++ b/testing/btest/Baseline/scripts.base.protocols.http.http-lower-case/weird.log.nonstandard @@ -0,0 +1,2 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +CHhAvVGS1DHFjwGM9 lowercase_HTTP_keyword diff --git a/testing/btest/Baseline/scripts.base.protocols.http.http-lower-case/weird.log.standard b/testing/btest/Baseline/scripts.base.protocols.http.http-lower-case/weird.log.standard new file mode 100644 index 0000000000..5cb41592a6 --- /dev/null +++ b/testing/btest/Baseline/scripts.base.protocols.http.http-lower-case/weird.log.standard @@ -0,0 +1,2 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +CHhAvVGS1DHFjwGM9 lowercase_HTTP_keyword diff --git a/testing/btest/Traces/http/http-lower-case-nonstandard-port.pcap b/testing/btest/Traces/http/http-lower-case-nonstandard-port.pcap new file mode 100644 index 0000000000000000000000000000000000000000..86a8dec9eec421f1227e2ee48c1bcaaf96d5697e GIT binary patch literal 2087 zcmaKt&u`pB6vw^EDpZy%wTJ@;4x>Vqv|Zcl&1OlQq$HFy1(GysHfkg!VB#Hnr}56X zGvjm<2@z2HCva#_{u##@4pUmfSYd-V&L6_Y2*)f3r@ZkK2@p1xu%(ea1RPAdOh%X={ zx}APBG52fhnkIhw&p_gvBgV;3uM(G+s>r3RMw(7tHDYwLT3vTN$Ft0vTq+-V<%;9Y z6rD=RnVhlAO-Y2k7!d})R{1`SW9&{jMKrPHd9!GRvb2es)0NLE7QdgulfB;-vu`)iqtK|m7VDbiG~cA|ijB&GpYlry)fN3J%{jyB@id-FwS zra&7wA}-#gVW)eaY$u&=!lF)3?nW8oihCuSH{D&AIsbzfxWzbJ?mfRjFCV)?xP(-5*x>K7i>3^NZ=Z= zKV*^wsUY@kaxW!P$!ylTpj=S3=c9DKSU}V=EmNm@Yp8gRU+7fd{kxm$jw~PgNA9D8 z^SS>9iRX=X@`f5UvUXNHqv3H&6Ql135H0`f{|F%dAaG=;o(eQ(k)SJ3im)iNQ33iM@H_YyGf ziNZ2huvGRc}q&tFha*$$~?OAO4? zQWZm*@3xmdnvnYRg$1i;og2ov%om7syLA#1Qx`+gm?69_J4XT&nM z?9==L}!fJpbgQ@P#g_p<=vPPH_9euDgQ8Y^3h0F6r$ zhv20Q1WnpQL5oY`g&+XwM4U%4!3hPLMx;#Qkl2*P zG>eS3%1{H%JA+ceArfSOK`YLpP(Xlf%mS!IE@Uqhf+Soh8@7(L$ffVP5M!n4ByJF= z|5u}3;nv(hCN5zi7XegzH}!q#>7%1t)o(!!4!IKE%dUj4yEWLp65K~)Tdk*1)&CBv zYC4>_kP*KcNW3tZcsP`J{AYIZ{M9A4PoCCOIC&l)To`<99Sh($ literal 0 HcmV?d00001 diff --git a/testing/btest/Traces/http/http-lower-case.pcap b/testing/btest/Traces/http/http-lower-case.pcap new file mode 100644 index 0000000000000000000000000000000000000000..78531e02470e426a541199e8145bec5d57e6421b GIT binary patch literal 2087 zcmaKt&u`l{6vt&JSOQ@x(w%lBxt5?c_79t!UcC_RY)M<&VCj-)mY%wfE1o%4_pYPFX(6 z9>4gI*R<^9@r%jqw0?N+{>N%OGchrx$e)cRYnmco{HaIQD(|1Yd)WRF=;otOj-L;c z6}?9;0r^WT|KZminCZ$X%aJpdTL0WVH`-;A5uauc9hn?cb-hmk<)wPCkm5 z`K7(Ch~K^(N&Gac9Ut|Hh9a)6RFO?+9k;B8Y)0sMwOX+q%Q5upOh^|w#j@or=B;wU znqM^ZErGeY9AXN;*4YCRMV?)<@@Vd^<1C>yLX#G1EtJ157ncy!53dVPy ze!{VN8}B4oNRiH}aKbo|`z}f@<#UJ_x}mF7FGH&TUp%E$@KbxeRCi^0_}YB?$VEVK-9>kBJ%@F^6T^)VfhhD>QE*eTRv+J?Bsrg)0tz;PyhlDR>)WwVJ{{?Q;38LFTf_D z5lJJXtWwlS^X{OKaESORVAP6BDBuua)1yArBIBYT3XWsOr3qU{O60<|ZHTdy4IDMG zHTbL0Hn;1xFJc?hfN>wHy_fod^wiPOtLlHoR(1PUdL@Wn4R)@Cw@=?z`z-Z}AjEU}bcGE@C)t8?-ky@HeH>A~4k*OnrlQ76yyvBY*I S^>Js&$EUFCKZkam68{5|n`PSo literal 0 HcmV?d00001 diff --git a/testing/btest/scripts/base/protocols/http/http-lower-case.zeek b/testing/btest/scripts/base/protocols/http/http-lower-case.zeek new file mode 100644 index 0000000000..0ae0cd06b6 --- /dev/null +++ b/testing/btest/scripts/base/protocols/http/http-lower-case.zeek @@ -0,0 +1,21 @@ +# This tests whether the HTTP analyzer handles HTTP keyword in lower case correctly or not. +# +# @TEST-EXEC: zeek -C -b -r $TRACES/http/http-lower-case.pcap %INPUT +# @TEST-EXEC: ! test -f dpd.log +# @TEST-EXEC: ! test -f analyzer.log +# @TEST-EXEC: zeek-cut uid name < weird.log > weird.log.standard +# @TEST-EXEC: btest-diff weird.log.standard +# @TEST-EXEC: mv http.log http.log.standard +# @TEST-EXEC: btest-diff http.log.standard + +# @TEST-EXEC: rm *.log + +# @TEST-EXEC: zeek -C -b -r $TRACES/http/http-lower-case-nonstandard-port.pcap %INPUT +# @TEST-EXEC: ! test -f dpd.log +# @TEST-EXEC: ! test -f analyzer.log +# @TEST-EXEC: zeek-cut uid name < weird.log > weird.log.nonstandard +# @TEST-EXEC: btest-diff weird.log.nonstandard +# @TEST-EXEC: mv http.log http.log.nonstandard +# @TEST-EXEC: btest-diff http.log.nonstandard + +@load base/protocols/http