Merge remote-tracking branch 'origin/topic/jsiwek/bit-1235'

* origin/topic/jsiwek/bit-1235:
  Improve multipart HTTP/MIME entity file analysis.

BIT-1235 #merged
This commit is contained in:
Robin Sommer 2014-10-16 06:51:49 -07:00
commit 53eb197b94
15 changed files with 91 additions and 32 deletions

View file

@ -466,6 +466,20 @@ void HTTP_Entity::SubmitAllHeaders()
if ( DEBUG_http )
DEBUG_MSG("%.6f end of headers\n", network_time);
if ( Parent() &&
Parent()->MIMEContentType() == mime::CONTENT_TYPE_MULTIPART )
{
// Don't treat single \r or \n characters in the multipart body content
// as lines because the MIME_Entity code will implicitly add back a
// \r\n for each line it receives. We do this instead of setting
// plain delivery mode for the content line analyzer because
// the size of the content to deliver "plainly" may be unknown
// and just leaving it in that mode indefinitely screws up the
// detection of multipart boundaries.
http_message->content_line->SupressWeirds(true);
http_message->content_line->SetCRLFAsEOL(0);
}
// The presence of a message-body in a request is signaled by
// the inclusion of a Content-Length or Transfer-Encoding
// header field in the request's message-headers.
@ -655,6 +669,13 @@ void HTTP_Message::EndEntity(mime::MIME_Entity* entity)
current_entity = (HTTP_Entity*) entity->Parent();
if ( entity->Parent() &&
entity->Parent()->MIMEContentType() == mime::CONTENT_TYPE_MULTIPART )
{
content_line->SupressWeirds(false);
content_line->SetCRLFAsEOL();
}
// It is necessary to call Done when EndEntity is triggered by
// SubmitAllHeaders (through EndOfData).
if ( entity == top_level )

View file

@ -99,6 +99,8 @@ enum {
// HTTP_MessageDone -> {Request,Reply}Made
class HTTP_Message : public mime::MIME_Message {
friend class HTTP_Entity;
public:
HTTP_Message(HTTP_Analyzer* analyzer, tcp::ContentLine_Analyzer* cl,
bool is_orig, int expect_body, int64_t init_header_length);

View file

@ -553,6 +553,7 @@ void MIME_Entity::init()
data_buf_offset = -1;
message = 0;
delay_adding_implicit_CRLF = false;
}
MIME_Entity::~MIME_Entity()
@ -1003,12 +1004,33 @@ void MIME_Entity::DecodeDataLine(int len, const char* data, int trailing_CRLF)
void MIME_Entity::DecodeBinary(int len, const char* data, int trailing_CRLF)
{
if ( delay_adding_implicit_CRLF )
{
delay_adding_implicit_CRLF = false;
DataOctet(CR);
DataOctet(LF);
}
DataOctets(len, data);
if ( trailing_CRLF )
{
DataOctet(CR);
DataOctet(LF);
if ( Parent() &&
Parent()->MIMEContentType() == mime::CONTENT_TYPE_MULTIPART )
{
// For multipart body content, we want to keep all implicit CRLFs
// except for the last because that one belongs to the multipart
// boundary delimiter, not the content. Simply delaying the
// addition of implicit CRLFs until another chunk of content
// data comes in is a way to prevent the CRLF before the final
// message boundary from being accidentally added to the content.
delay_adding_implicit_CRLF = true;
}
else
{
DataOctet(CR);
DataOctet(LF);
}
}
}

View file

@ -173,6 +173,7 @@ protected:
int data_buf_offset;
MIME_Message* message;
bool delay_adding_implicit_CRLF;
};
// The reason I separate MIME_Message as an abstract class is to

View file

@ -32,6 +32,7 @@ void ContentLine_Analyzer::InitState()
seq_to_skip = 0;
plain_delivery_length = 0;
is_plain = 0;
suppress_weirds = false;
InitBuffer(0);
}
@ -258,7 +259,7 @@ int ContentLine_Analyzer::DoDeliverOnce(int len, const u_char* data)
else
{
if ( Conn()->FlagEvent(SINGULAR_LF) )
if ( ! suppress_weirds && Conn()->FlagEvent(SINGULAR_LF) )
Conn()->Weird("line_terminated_with_single_LF");
buf[offset++] = c;
}
@ -277,7 +278,7 @@ int ContentLine_Analyzer::DoDeliverOnce(int len, const u_char* data)
}
if ( last_char == '\r' )
if ( Conn()->FlagEvent(SINGULAR_CR) )
if ( ! suppress_weirds && Conn()->FlagEvent(SINGULAR_CR) )
Conn()->Weird("line_terminated_with_single_CR");
last_char = c;
@ -307,7 +308,7 @@ void ContentLine_Analyzer::CheckNUL()
; // Ignore it.
else
{
if ( Conn()->FlagEvent(NUL_IN_LINE) )
if ( ! suppress_weirds && Conn()->FlagEvent(NUL_IN_LINE) )
Conn()->Weird("NUL_in_line");
flag_NULs = 0;
}

View file

@ -15,6 +15,9 @@ public:
ContentLine_Analyzer(Connection* conn, bool orig);
~ContentLine_Analyzer();
void SupressWeirds(bool enable)
{ suppress_weirds = enable; }
// If enabled, flag (first) line with embedded NUL. Default off.
void SetIsNULSensitive(bool enable)
{ flag_NULs = enable; }
@ -96,6 +99,8 @@ protected:
// Don't deliver further data.
int skip_deliveries;
bool suppress_weirds;
// If true, flag (first) line with embedded NUL.
unsigned int flag_NULs:1;