Special case HTTP 0.9 early on

Mostly, treat HTTP0.9 completely separate. Because we're doing raw
delivery of a body directly, fake enough (connection_close=1, and finish
headers manually) so that the MIME infrastructure thinks it is seeing a
body.

This deals better with the body due to accounting for the first line. Also
it avoids the content line analyzer to strip CRLF/LF and the analyzer
then adding CRLF unconditionally by fully bypassing the content line
analyzer.

Concretely, the vlan-mpls test case contains a HTTP response with LF only,
but the previous implementation would use CRLF, accounting for two many bytes.
Same for the http.no-version test which would previously report a body
length of 280 and now is at 323 (which agrees with wireshark).

Further, the mime_type detection for the http-09 test case works because
it's now seeing the full body.

Drawback: We don't extract headers when a server actually replies with
a HTTP/1.1 message, but grrr, something needs to give I guess.
This commit is contained in:
Tim Wojtulewicz 2023-03-06 16:19:37 -07:00
parent 220d8a2795
commit 0003495a9b
11 changed files with 82 additions and 23 deletions

View file

@ -37,6 +37,7 @@ enum HTTP_ExpectReply
EXPECT_REPLY_MESSAGE,
EXPECT_REPLY_TRAILER,
EXPECT_REPLY_NOTHING,
EXPECT_REPLY_HTTP09,
};
HTTP_Entity::HTTP_Entity(HTTP_Message* arg_message, analyzer::mime::MIME_Entity* parent_entity,
@ -898,6 +899,37 @@ void HTTP_Analyzer::DeliverStream(int len, const u_char* data, bool is_orig)
const char* line = reinterpret_cast<const char*>(data);
const char* end_of_line = line + len;
// HTTP 0.9 is just raw data directly from the server, special case.
if ( reply_state == EXPECT_REPLY_HTTP09 && ! is_orig )
{
if ( ! reply_message )
{
SetVersion(&reply_version, {0, 9});
if ( ! unanswered_requests.empty() )
{
AnalyzerConfirmation();
unanswered_requests.pop();
}
// Expect the server to close the connection after replying. This is used within
// HTTP_Message() below to switch the message into plain delivery mode (and
// the content_line_analyzer, but that's not used anymore).
connection_close = 1;
reply_ongoing = 1;
HTTP_Reply();
InitHTTPMessage(content_line_resp, reply_message, is_orig, ExpectReplyMessageBody(), 0);
// Finish header processing right way and switch into plain delivery.
// Need trailing_CRLF set to avoid a weird.
reply_message->Deliver(0, "", true);
}
reply_message->Deliver(len, line, false);
return;
}
analyzer::tcp::ContentLine_Analyzer* content_line = is_orig ? content_line_orig
: content_line_resp;
@ -947,6 +979,14 @@ void HTTP_Analyzer::DeliverStream(int len, const u_char* data, bool is_orig)
unanswered_requests.push(request_method);
HTTP_Request();
InitHTTPMessage(content_line, request_message, is_orig, HTTP_BODY_MAYBE, len);
// For HTTP/0.9, turn off the content_line analyzer for the
// responder because we expect raw data.
if ( request_version == HTTP_VersionNumber{0, 9} )
{
reply_state = EXPECT_REPLY_HTTP09;
RemoveSupportAnalyzer(content_line_resp);
}
}
else
@ -1048,6 +1088,8 @@ void HTTP_Analyzer::DeliverStream(int len, const u_char* data, bool is_orig)
break;
case EXPECT_REPLY_HTTP09:
// unreachable
case EXPECT_REPLY_TRAILER:
case EXPECT_REPLY_NOTHING:
break;
@ -1229,7 +1271,7 @@ int HTTP_Analyzer::HTTP_RequestLine(const char* line, const char* end_of_line)
// If we determined HTTP/0.9 (no HTTP/ in the request line), assert that
// minimally we have an URI and a 3 character method (HTTP 0.9 only
// supports GET). If that doesn't hold, probably not HTTP or very stange.
if ( request_version.major == 0 && request_version.minor == 9 )
if ( request_version == HTTP_VersionNumber{0, 9} )
{
bool maybe_get_method = (end_of_method - line) >= 3;
bool has_uri = request_URI && request_URI->Len() > 0;