mirror of
https://github.com/zeek/zeek.git
synced 2025-10-15 13:08:20 +00:00
Merge remote-tracking branch 'origin/topic/jsiwek/bit-1235'
* origin/topic/jsiwek/bit-1235: Improve multipart HTTP/MIME entity file analysis. BIT-1235 #merged
This commit is contained in:
commit
53eb197b94
15 changed files with 91 additions and 32 deletions
|
@ -466,6 +466,20 @@ void HTTP_Entity::SubmitAllHeaders()
|
|||
if ( DEBUG_http )
|
||||
DEBUG_MSG("%.6f end of headers\n", network_time);
|
||||
|
||||
if ( Parent() &&
|
||||
Parent()->MIMEContentType() == mime::CONTENT_TYPE_MULTIPART )
|
||||
{
|
||||
// Don't treat single \r or \n characters in the multipart body content
|
||||
// as lines because the MIME_Entity code will implicitly add back a
|
||||
// \r\n for each line it receives. We do this instead of setting
|
||||
// plain delivery mode for the content line analyzer because
|
||||
// the size of the content to deliver "plainly" may be unknown
|
||||
// and just leaving it in that mode indefinitely screws up the
|
||||
// detection of multipart boundaries.
|
||||
http_message->content_line->SupressWeirds(true);
|
||||
http_message->content_line->SetCRLFAsEOL(0);
|
||||
}
|
||||
|
||||
// The presence of a message-body in a request is signaled by
|
||||
// the inclusion of a Content-Length or Transfer-Encoding
|
||||
// header field in the request's message-headers.
|
||||
|
@ -655,6 +669,13 @@ void HTTP_Message::EndEntity(mime::MIME_Entity* entity)
|
|||
|
||||
current_entity = (HTTP_Entity*) entity->Parent();
|
||||
|
||||
if ( entity->Parent() &&
|
||||
entity->Parent()->MIMEContentType() == mime::CONTENT_TYPE_MULTIPART )
|
||||
{
|
||||
content_line->SupressWeirds(false);
|
||||
content_line->SetCRLFAsEOL();
|
||||
}
|
||||
|
||||
// It is necessary to call Done when EndEntity is triggered by
|
||||
// SubmitAllHeaders (through EndOfData).
|
||||
if ( entity == top_level )
|
||||
|
|
|
@ -99,6 +99,8 @@ enum {
|
|||
// HTTP_MessageDone -> {Request,Reply}Made
|
||||
|
||||
class HTTP_Message : public mime::MIME_Message {
|
||||
friend class HTTP_Entity;
|
||||
|
||||
public:
|
||||
HTTP_Message(HTTP_Analyzer* analyzer, tcp::ContentLine_Analyzer* cl,
|
||||
bool is_orig, int expect_body, int64_t init_header_length);
|
||||
|
|
|
@ -553,6 +553,7 @@ void MIME_Entity::init()
|
|||
data_buf_offset = -1;
|
||||
|
||||
message = 0;
|
||||
delay_adding_implicit_CRLF = false;
|
||||
}
|
||||
|
||||
MIME_Entity::~MIME_Entity()
|
||||
|
@ -1003,12 +1004,33 @@ void MIME_Entity::DecodeDataLine(int len, const char* data, int trailing_CRLF)
|
|||
|
||||
void MIME_Entity::DecodeBinary(int len, const char* data, int trailing_CRLF)
|
||||
{
|
||||
if ( delay_adding_implicit_CRLF )
|
||||
{
|
||||
delay_adding_implicit_CRLF = false;
|
||||
DataOctet(CR);
|
||||
DataOctet(LF);
|
||||
}
|
||||
|
||||
DataOctets(len, data);
|
||||
|
||||
if ( trailing_CRLF )
|
||||
{
|
||||
DataOctet(CR);
|
||||
DataOctet(LF);
|
||||
if ( Parent() &&
|
||||
Parent()->MIMEContentType() == mime::CONTENT_TYPE_MULTIPART )
|
||||
{
|
||||
// For multipart body content, we want to keep all implicit CRLFs
|
||||
// except for the last because that one belongs to the multipart
|
||||
// boundary delimiter, not the content. Simply delaying the
|
||||
// addition of implicit CRLFs until another chunk of content
|
||||
// data comes in is a way to prevent the CRLF before the final
|
||||
// message boundary from being accidentally added to the content.
|
||||
delay_adding_implicit_CRLF = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
DataOctet(CR);
|
||||
DataOctet(LF);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -173,6 +173,7 @@ protected:
|
|||
int data_buf_offset;
|
||||
|
||||
MIME_Message* message;
|
||||
bool delay_adding_implicit_CRLF;
|
||||
};
|
||||
|
||||
// The reason I separate MIME_Message as an abstract class is to
|
||||
|
|
|
@ -32,6 +32,7 @@ void ContentLine_Analyzer::InitState()
|
|||
seq_to_skip = 0;
|
||||
plain_delivery_length = 0;
|
||||
is_plain = 0;
|
||||
suppress_weirds = false;
|
||||
|
||||
InitBuffer(0);
|
||||
}
|
||||
|
@ -258,7 +259,7 @@ int ContentLine_Analyzer::DoDeliverOnce(int len, const u_char* data)
|
|||
|
||||
else
|
||||
{
|
||||
if ( Conn()->FlagEvent(SINGULAR_LF) )
|
||||
if ( ! suppress_weirds && Conn()->FlagEvent(SINGULAR_LF) )
|
||||
Conn()->Weird("line_terminated_with_single_LF");
|
||||
buf[offset++] = c;
|
||||
}
|
||||
|
@ -277,7 +278,7 @@ int ContentLine_Analyzer::DoDeliverOnce(int len, const u_char* data)
|
|||
}
|
||||
|
||||
if ( last_char == '\r' )
|
||||
if ( Conn()->FlagEvent(SINGULAR_CR) )
|
||||
if ( ! suppress_weirds && Conn()->FlagEvent(SINGULAR_CR) )
|
||||
Conn()->Weird("line_terminated_with_single_CR");
|
||||
|
||||
last_char = c;
|
||||
|
@ -307,7 +308,7 @@ void ContentLine_Analyzer::CheckNUL()
|
|||
; // Ignore it.
|
||||
else
|
||||
{
|
||||
if ( Conn()->FlagEvent(NUL_IN_LINE) )
|
||||
if ( ! suppress_weirds && Conn()->FlagEvent(NUL_IN_LINE) )
|
||||
Conn()->Weird("NUL_in_line");
|
||||
flag_NULs = 0;
|
||||
}
|
||||
|
|
|
@ -15,6 +15,9 @@ public:
|
|||
ContentLine_Analyzer(Connection* conn, bool orig);
|
||||
~ContentLine_Analyzer();
|
||||
|
||||
void SupressWeirds(bool enable)
|
||||
{ suppress_weirds = enable; }
|
||||
|
||||
// If enabled, flag (first) line with embedded NUL. Default off.
|
||||
void SetIsNULSensitive(bool enable)
|
||||
{ flag_NULs = enable; }
|
||||
|
@ -96,6 +99,8 @@ protected:
|
|||
// Don't deliver further data.
|
||||
int skip_deliveries;
|
||||
|
||||
bool suppress_weirds;
|
||||
|
||||
// If true, flag (first) line with embedded NUL.
|
||||
unsigned int flag_NULs:1;
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue