#include "zeek-config.h" #include "MIME.h" #include "NetVar.h" #include "Base64.h" #include "Reporter.h" #include "digest.h" #include "file_analysis/Manager.h" #include "events.bif.h" // Here are a few things to do: // // 1. Add a Bro internal function 'stop_deliver_data_of_entity' so // that the engine does not decode and deliver further data for the // entity (which may speed up the engine by avoiding copying). // // 2. Better support for structured header fields, in particular, // headers of form: =; =; // =; ... (so that namespace analyzer { namespace mime { static const zeek::data_chunk_t null_data_chunk = { 0, nullptr }; int mime_header_only = 0; int mime_decode_data = 1; int mime_submit_data = 1; enum MIME_HEADER_FIELDS { MIME_CONTENT_TYPE, MIME_CONTENT_TRANSFER_ENCODING, MIME_FIELD_OTHER, }; enum MIME_CONTENT_SUBTYPE { CONTENT_SUBTYPE_MIXED, // for multipart CONTENT_SUBTYPE_ALTERNATIVE, // for multipart CONTENT_SUBTYPE_DIGEST, // for multipart CONTENT_SUBTYPE_RFC822, // for message CONTENT_SUBTYPE_PARTIAL, // for message CONTENT_SUBTYPE_EXTERNAL_BODY, // for message CONTENT_SUBTYPE_PLAIN, // for text CONTENT_SUBTYPE_OTHER, }; enum MIME_CONTENT_ENCODING { CONTENT_ENCODING_7BIT, CONTENT_ENCODING_8BIT, CONTENT_ENCODING_BINARY, CONTENT_ENCODING_QUOTED_PRINTABLE, CONTENT_ENCODING_BASE64, CONTENT_ENCODING_OTHER, }; enum MIME_BOUNDARY_DELIMITER { NOT_MULTIPART_BOUNDARY, MULTIPART_BOUNDARY, MULTIPART_CLOSING_BOUNDARY, }; static const char* MIMEHeaderName[] = { "content-type", "content-transfer-encoding", nullptr, }; static const char* MIMEContentTypeName[] = { "MULTIPART", "MESSAGE", "TEXT", nullptr, }; static const char* MIMEContentSubtypeName[] = { "MIXED", // for multipart "ALTERNATIVE", // for multipart "DIGEST", // for multipart "RFC822", // for message "PARTIAL", // for message "EXTERNAL-BODY", // for message "PLAIN", // for text nullptr, // other }; static const char* MIMEContentEncodingName[] = { "7BIT", "8BIT", "BINARY", "QUOTED-PRINTABLE", "BASE64", nullptr, }; bool is_null_data_chunk(zeek::data_chunk_t b) { return b.data == nullptr; } bool is_lws(char ch) { return ch == 9 || ch == 32; } zeek::StringVal* new_string_val(int length, const char* data) { return to_string_val(length, data).release(); } zeek::StringVal* new_string_val(const char* data, const char* end_of_data) { return to_string_val(data, end_of_data).release(); } zeek::StringVal* new_string_val(const zeek::data_chunk_t buf) { return to_string_val(buf).release(); } zeek::StringValPtr to_string_val(int length, const char* data) { return zeek::make_intrusive(length, data); } zeek::StringValPtr to_string_val(const char* data, const char* end_of_data) { return zeek::make_intrusive(end_of_data - data, data); } zeek::StringValPtr to_string_val(const zeek::data_chunk_t buf) { return to_string_val(buf.length, buf.data); } static zeek::data_chunk_t get_data_chunk(zeek::String* s) { zeek::data_chunk_t b; b.length = s->Len(); b.data = (const char*) s->Bytes(); return b; } int fputs(zeek::data_chunk_t b, FILE* fp) { for ( int i = 0; i < b.length; ++i ) if ( fputc(b.data[i], fp) == EOF ) return EOF; return 0; } void MIME_Mail::Undelivered(int len) { cur_entity_id = file_mgr->Gap(cur_entity_len, len, analyzer->GetAnalyzerTag(), analyzer->Conn(), is_orig, cur_entity_id); } bool istrequal(zeek::data_chunk_t s, const char* t) { int len = strlen(t); if ( s.length != len ) return false; return strncasecmp(s.data, t, len) == 0; } int MIME_count_leading_lws(int len, const char* data) { int i; for ( i = 0; i < len; ++i ) if ( ! is_lws(data[i]) ) break; return i; } int MIME_count_trailing_lws(int len, const char* data) { int i; for ( i = 0; i < len; ++i ) if ( ! is_lws(data[len - 1 - i]) ) break; return i; } // See RFC 2822, page 11 int MIME_skip_comments(int len, const char* data) { if ( len == 0 || data[0] != '(' ) return 0; int par = 0; for ( int i = 0; i < len; ++i ) { switch ( data[i] ) { case '(': ++par; break; case ')': --par; if ( par == 0 ) return i + 1; break; case '\\': ++i; break; } } return len; } // Skip over lws and comments, but not tspecials. Do not use this // function in quoted-string or comments. int MIME_skip_lws_comments(int len, const char* data) { int i = 0; while ( i < len ) { if ( is_lws(data[i]) ) ++i; else { if ( data[i] == '(' ) i += MIME_skip_comments(len - i, data + i); else return i; } } return len; } int MIME_get_field_name(int len, const char* data, zeek::data_chunk_t* name) { int i = MIME_skip_lws_comments(len, data); while ( i < len ) { int j; if ( MIME_is_field_name_char(data[i]) ) { name->data = data + i; for ( j = i; j < len; ++j ) if ( ! MIME_is_field_name_char(data[j]) ) break; name->length = j - i; return j; } j = MIME_skip_lws_comments(len - i, data + i); i += (j > 0) ? j : 1; } return -1; } // See RFC 2045, page 12. static bool MIME_is_tspecial (char ch, bool is_boundary = false) { if ( is_boundary ) return ch == '"'; else return ch == '(' || ch == ')' || ch == '<' || ch == '>' || ch == '@' || ch == ',' || ch == ';' || ch == ':' || ch == '\\' || ch == '"' || ch == '/' || ch == '[' || ch == ']' || ch == '?' || ch == '='; } bool MIME_is_field_name_char (char ch) { return ch >= 33 && ch <= 126 && ch != ':'; } static bool MIME_is_token_char (char ch, bool is_boundary = false) { return ch >= 33 && ch <= 126 && ! MIME_is_tspecial(ch, is_boundary); } // See RFC 2045, page 12. // A token is composed of characters that are not SPACE, CTLs or tspecials int MIME_get_token(int len, const char* data, zeek::data_chunk_t* token, bool is_boundary) { int i = 0; if ( ! is_boundary ) i = MIME_skip_lws_comments(len, data); while ( i < len ) { int j; if ( MIME_is_token_char(data[i], is_boundary) ) { token->data = (data + i); for ( j = i; j < len; ++j ) { if ( ! MIME_is_token_char(data[j], is_boundary) ) break; } token->length = j - i; return j; } j = MIME_skip_lws_comments(len - i, data + i); i += (j > 0) ? j : 1; } return -1; } int MIME_get_slash_token_pair(int len, const char* data, zeek::data_chunk_t* first, zeek::data_chunk_t* second) { int offset; const char* data_start = data; offset = MIME_get_token(len, data, first); if ( offset < 0 ) { // DEBUG_MSG("first token missing in slash token pair"); return -1; } data += offset; len -= offset; offset = MIME_skip_lws_comments(len, data); if ( offset < 0 || offset >= len || data[offset] != '/' ) { // DEBUG_MSG("/ not found in slash token pair"); return -1; } ++offset; data += offset; len -= offset; offset = MIME_get_token(len, data, second); if ( offset < 0 ) { // DEBUG_MSG("second token missing in slash token pair"); return -1; } data += offset; len -= offset; return data - data_start; } // See RFC 2822, page 13. int MIME_get_quoted_string(int len, const char* data, zeek::data_chunk_t* str) { int offset = MIME_skip_lws_comments(len, data); len -= offset; data += offset; if ( len <= 0 || *data != '"' ) return -1; for ( int i = 1; i < len; ++i ) { switch ( data[i] ) { case '"': str->data = data + 1; str->length = i - 1; return offset + i + 1; case '\\': ++i; break; } } return -1; } int MIME_get_value(int len, const char* data, zeek::String*& buf, bool is_boundary) { int offset = 0; if ( ! is_boundary ) // For boundaries, simply accept everything. offset = MIME_skip_lws_comments(len, data); len -= offset; data += offset; if ( len > 0 && *data == '"' ) { zeek::data_chunk_t str; int end = MIME_get_quoted_string(len, data, &str); if ( end < 0 ) return -1; buf = MIME_decode_quoted_pairs(str); return offset + end; } else { zeek::data_chunk_t str; int end = MIME_get_token(len, data, &str, is_boundary); if ( end < 0 ) return -1; buf = new zeek::String((const u_char*)str.data, str.length, true); return offset + end; } } // Decode each quoted-pair: a '\' followed by a character by the // quoted character. The decoded string is returned. zeek::String* MIME_decode_quoted_pairs(zeek::data_chunk_t buf) { const char* data = buf.data; char* dest = new char[buf.length+1]; int j = 0; for ( int i = 0; i < buf.length; ++i ) if ( data[i] == '\\' ) { if ( ++i < buf.length ) dest[j++] = data[i]; else { // a trailing '\' -- don't know what // to do with it -- ignore it. } } else dest[j++] = data[i]; dest[j] = 0; return new zeek::String(true, (zeek::byte_vec) dest, j); } } } // namespace analyzer::* using namespace analyzer::mime; MIME_Multiline::MIME_Multiline() { line = nullptr; } MIME_Multiline::~MIME_Multiline() { delete line; delete_strings(buffer); } void MIME_Multiline::append(int len, const char* data) { buffer.push_back(new zeek::String((const u_char*) data, len, true)); } zeek::String* MIME_Multiline::get_concatenated_line() { if ( buffer.empty() ) return nullptr; delete line; line = concatenate(buffer); return line; } MIME_Header::MIME_Header(MIME_Multiline* hl) { lines = hl; name = value = value_token = rest_value = null_data_chunk; zeek::String* s = hl->get_concatenated_line(); int len = s->Len(); const char* data = (const char*) s->Bytes(); int offset = MIME_get_field_name(len, data, &name); if ( offset < 0 ) return; len -= offset; data += offset; offset = MIME_skip_lws_comments(len, data); if ( offset < len && data[offset] == ':' ) { value.length = len - offset - 1; value.data = data + offset + 1; while ( value.length && isspace(*value.data) ) { --value.length; ++value.data; } } else // malformed header line name = null_data_chunk; } MIME_Header::~MIME_Header() { delete lines; } int MIME_Header::get_first_token() { if ( MIME_get_token(value.length, value.data, &value_token) >= 0 ) { rest_value.data = value_token.data + value_token.length; rest_value.length = value.data + value.length - rest_value.data; return 1; } else { value_token = rest_value = null_data_chunk; return 0; } } zeek::data_chunk_t MIME_Header::get_value_token() { if ( ! is_null_data_chunk(value_token) ) return value_token; get_first_token(); return value_token; } zeek::data_chunk_t MIME_Header::get_value_after_token() { if ( ! is_null_data_chunk(rest_value) ) return rest_value; get_first_token(); return rest_value; } MIME_Entity::MIME_Entity(MIME_Message* output_message, MIME_Entity* parent_entity) { init(); parent = parent_entity; message = output_message; if ( parent ) content_encoding = parent->ContentTransferEncoding(); want_all_headers = (bool)mime_all_headers; } void MIME_Entity::init() { in_header = 1; end_of_data = 0; current_header_line = nullptr; current_field_type = MIME_FIELD_OTHER; need_to_parse_parameters = 0; content_type_str = zeek::make_intrusive("TEXT"); content_subtype_str = zeek::make_intrusive("PLAIN"); content_encoding_str = nullptr; multipart_boundary = nullptr; content_type = CONTENT_TYPE_TEXT; content_subtype = CONTENT_SUBTYPE_PLAIN; content_encoding = CONTENT_ENCODING_OTHER; parent = nullptr; current_child_entity = nullptr; base64_decoder = nullptr; data_buf_length = 0; data_buf_data = nullptr; data_buf_offset = -1; message = nullptr; delay_adding_implicit_CRLF = false; want_all_headers = false; } MIME_Entity::~MIME_Entity() { if ( ! end_of_data ) reporter->AnalyzerError(message ? message->GetAnalyzer() : nullptr, "missing MIME_Entity::EndOfData() before ~MIME_Entity"); delete current_header_line; delete content_encoding_str; delete multipart_boundary; for ( auto& header : headers ) delete header; headers.clear(); delete base64_decoder; } void MIME_Entity::Deliver(int len, const char* data, bool trailing_CRLF) { if ( in_header ) { if ( len == 0 || *data == '\0' ) { // an empty line at the end of header fields FinishHeader(); in_header = 0; SubmitAllHeaders(); // Note: it's possible that we are in the // trailer of a chunked transfer (see HTTP.cc). // In this case, end_of_data will be set in // HTTP_Entity::SubmitAllHeaders(), and we // should not begin a new body. if ( ! end_of_data ) BeginBody(); } else if ( is_lws(*data) ) // linear whitespace - a continuing header line ContHeader(len, data); else NewHeader(len, data); } else { if ( ! mime_header_only && data ) NewDataLine(len, data, trailing_CRLF); } } void MIME_Entity::BeginBody() { if ( content_encoding == CONTENT_ENCODING_BASE64 ) StartDecodeBase64(); if ( content_type == CONTENT_TYPE_MESSAGE ) BeginChildEntity(); } void MIME_Entity::EndOfData() { if ( end_of_data ) return; end_of_data = 1; if ( in_header ) { FinishHeader(); in_header = 0; SubmitAllHeaders(); message->SubmitEvent(MIME_EVENT_ILLEGAL_FORMAT, "entity body missing"); } else { if ( current_child_entity != nullptr ) { if ( content_type == CONTENT_TYPE_MULTIPART ) IllegalFormat("multipart closing boundary delimiter missing"); EndChildEntity(); } if ( content_encoding == CONTENT_ENCODING_BASE64 ) FinishDecodeBase64(); FlushData(); } message->EndEntity (this); } void MIME_Entity::NewDataLine(int len, const char* data, bool trailing_CRLF) { if ( content_type == CONTENT_TYPE_MULTIPART ) { switch ( CheckBoundaryDelimiter(len, data) ) { case MULTIPART_BOUNDARY: if ( current_child_entity != nullptr ) EndChildEntity(); BeginChildEntity(); return; case MULTIPART_CLOSING_BOUNDARY: if ( current_child_entity != nullptr ) EndChildEntity(); EndOfData(); return; } } if ( content_type == CONTENT_TYPE_MULTIPART || content_type == CONTENT_TYPE_MESSAGE ) { // Here we ignore the difference among 7bit, 8bit and // binary encoding, and thus do not need to decode // before passing the data to child. if ( current_child_entity != nullptr ) // Data before the first or after the last // boundary delimiter are ignored current_child_entity->Deliver(len, data, trailing_CRLF); } else { if ( mime_decode_data ) DecodeDataLine(len, data, trailing_CRLF); } } void MIME_Entity::NewHeader(int len, const char* data) { FinishHeader(); if ( len == 0 ) return; ASSERT(! is_lws(*data)); current_header_line = new MIME_Multiline(); current_header_line->append(len, data); } void MIME_Entity::ContHeader(int len, const char* data) { if ( current_header_line == nullptr ) { IllegalFormat("first header line starts with linear whitespace"); // shall we try it as a new header or simply ignore this line? int ws = MIME_count_leading_lws(len, data); NewHeader(len - ws, data + ws); return; } current_header_line->append(len, data); } void MIME_Entity::FinishHeader() { if ( current_header_line == nullptr ) return; MIME_Header* h = new MIME_Header(current_header_line); current_header_line = nullptr; if ( ! is_null_data_chunk(h->get_name()) ) { ParseMIMEHeader(h); SubmitHeader(h); if ( want_all_headers ) headers.push_back(h); else delete h; } else delete h; } int MIME_Entity::LookupMIMEHeaderName(zeek::data_chunk_t name) { // A linear lookup should be fine for now. // header names are case-insensitive (RFC 822, 2822, 2045). for ( int i = 0; MIMEHeaderName[i] != nullptr; ++i ) if ( istrequal(name, MIMEHeaderName[i]) ) return i; return -1; } void MIME_Entity::ParseMIMEHeader(MIME_Header* h) { if ( h == nullptr ) return; current_field_type = LookupMIMEHeaderName(h->get_name()); switch ( current_field_type ) { case MIME_CONTENT_TYPE: ParseContentTypeField(h); break; case MIME_CONTENT_TRANSFER_ENCODING: ParseContentEncodingField(h); break; } } bool MIME_Entity::ParseContentTypeField(MIME_Header* h) { zeek::data_chunk_t val = h->get_value(); int len = val.length; const char* data = val.data; zeek::data_chunk_t ty, subty; int offset; offset = MIME_get_slash_token_pair(len, data, &ty, &subty); if ( offset < 0 ) { IllegalFormat("media type/subtype not found in content type"); return false; } data += offset; len -= offset; content_type_str = zeek::make_intrusive(ty.length, ty.data); content_type_str->ToUpper(); content_subtype_str = zeek::make_intrusive(subty.length, subty.data); content_subtype_str->ToUpper(); ParseContentType(ty, subty); // Proceed to parameters. if ( need_to_parse_parameters ) ParseFieldParameters(len, data); if ( content_type == CONTENT_TYPE_MULTIPART && ! multipart_boundary ) { IllegalFormat("boundary delimiter is not specified for a multipart entity -- content is treated as type application/octet-stream"); content_type = CONTENT_TYPE_OTHER; content_subtype = CONTENT_SUBTYPE_OTHER; } return true; } bool MIME_Entity::ParseContentEncodingField(MIME_Header* h) { zeek::data_chunk_t enc; enc = h->get_value_token(); if ( is_null_data_chunk(enc) ) { IllegalFormat("encoding type not found in content encoding"); return false; } delete content_encoding_str; content_encoding_str = new zeek::String((const u_char*)enc.data, enc.length, true); ParseContentEncoding(enc); if ( need_to_parse_parameters ) { zeek::data_chunk_t val = h->get_value_after_token(); if ( ! is_null_data_chunk(val) ) ParseFieldParameters(val.length, val.data); } return true; } bool MIME_Entity::ParseFieldParameters(int len, const char* data) { zeek::data_chunk_t attr; while ( true ) { int offset = MIME_skip_lws_comments(len, data); if ( offset < 0 || offset >= len || data[offset] != ';' ) break; ++offset; data += offset; len -= offset; offset = MIME_get_token(len, data, &attr); if ( offset < 0 ) { IllegalFormat("attribute name not found in parameter specification"); return false; } data += offset; len -= offset; offset = MIME_skip_lws_comments(len, data); if ( offset < 0 || offset >= len || data[offset] != '=' ) { IllegalFormat("= not found in parameter specification"); continue; } ++offset; data += offset; len -= offset; zeek::String* val = nullptr; if ( current_field_type == MIME_CONTENT_TYPE && content_type == CONTENT_TYPE_MULTIPART && istrequal(attr, "boundary") ) { // token or quoted-string (and some lenience for characters // not explicitly allowed by the RFC, but encountered in the wild) offset = MIME_get_value(len, data, val, true); if ( ! val ) { IllegalFormat("Could not parse multipart boundary"); continue; } zeek::data_chunk_t vd = get_data_chunk(val); delete multipart_boundary; multipart_boundary = new zeek::String((const u_char*)vd.data, vd.length, true); } else // token or quoted-string offset = MIME_get_value(len, data, val); if ( offset < 0 ) { IllegalFormat("value not found in parameter specification"); delete val; continue; } data += offset; len -= offset; delete val; } return true; } void MIME_Entity::ParseContentType(zeek::data_chunk_t type, zeek::data_chunk_t sub_type) { int i; for ( i = 0; MIMEContentTypeName[i]; ++i ) if ( istrequal(type, MIMEContentTypeName[i]) ) break; content_type = i; for ( i = 0; MIMEContentSubtypeName[i]; ++i ) if ( istrequal(sub_type, MIMEContentSubtypeName[i]) ) break; content_subtype = i; switch ( content_type ) { case CONTENT_TYPE_MULTIPART: case CONTENT_TYPE_MESSAGE: need_to_parse_parameters = 1; break; default: need_to_parse_parameters = 0; break; } } void MIME_Entity::ParseContentEncoding(zeek::data_chunk_t encoding_mechanism) { int i; for ( i = 0; MIMEContentEncodingName[i]; ++i ) if ( istrequal(encoding_mechanism, MIMEContentEncodingName[i]) ) break; content_encoding = i; } int MIME_Entity::CheckBoundaryDelimiter(int len, const char* data) { if ( ! multipart_boundary ) { reporter->Warning("boundary delimiter was not specified for a multipart message\n"); DEBUG_MSG("headers of the MIME entity for debug:\n"); DebugPrintHeaders(); return NOT_MULTIPART_BOUNDARY; } if ( len >= 2 && data[0] == '-' && data[1] == '-' ) { len -= 2; data += 2; zeek::data_chunk_t delim = get_data_chunk(multipart_boundary); int i; for ( i = 0; i < len && i < delim.length; ++i ) if ( data[i] != delim.data[i] ) return NOT_MULTIPART_BOUNDARY; if ( i < delim.length ) return NOT_MULTIPART_BOUNDARY; len -= i; data += i; if ( len >= 2 && data[0] == '-' && data[1] == '-' ) return MULTIPART_CLOSING_BOUNDARY; else return MULTIPART_BOUNDARY; } return NOT_MULTIPART_BOUNDARY; } // trailing_CRLF indicates whether an implicit CRLF sequence follows data // (the CRLF sequence is not included in data). void MIME_Entity::DecodeDataLine(int len, const char* data, bool trailing_CRLF) { if ( ! mime_submit_data ) return; switch ( content_encoding ) { case CONTENT_ENCODING_QUOTED_PRINTABLE: DecodeQuotedPrintable(len, data); break; case CONTENT_ENCODING_BASE64: DecodeBase64(len, data); break; case CONTENT_ENCODING_7BIT: case CONTENT_ENCODING_8BIT: case CONTENT_ENCODING_BINARY: case CONTENT_ENCODING_OTHER: DecodeBinary(len, data, trailing_CRLF); break; } FlushData(); } void MIME_Entity::DecodeBinary(int len, const char* data, bool trailing_CRLF) { if ( delay_adding_implicit_CRLF ) { delay_adding_implicit_CRLF = false; DataOctet(CR); DataOctet(LF); } DataOctets(len, data); if ( trailing_CRLF ) { if ( Parent() && Parent()->MIMEContentType() == mime::CONTENT_TYPE_MULTIPART ) { // For multipart body content, we want to keep all implicit CRLFs // except for the last because that one belongs to the multipart // boundary delimiter, not the content. Simply delaying the // addition of implicit CRLFs until another chunk of content // data comes in is a way to prevent the CRLF before the final // message boundary from being accidentally added to the content. delay_adding_implicit_CRLF = true; } else { DataOctet(CR); DataOctet(LF); } } } void MIME_Entity::DecodeQuotedPrintable(int len, const char* data) { // Ignore trailing HT and SP. int i; for ( i = len - 1; i >= 0; --i ) if ( data[i] != HT && data[i] != SP ) break; int end_of_line = i; int soft_line_break = 0; for ( i = 0; i <= end_of_line; ++i ) { if ( data[i] == '=' ) { if ( i == end_of_line ) soft_line_break = 1; else { int legal = 0; if ( i + 2 < len ) { int a, b; a = decode_hex(data[i+1]); b = decode_hex(data[i+2]); if ( a >= 0 && b >= 0 ) { DataOctet((a << 4) + b); legal = 1; i += 2; } } if ( ! legal ) { // Follows suggestions for a robust // decoder. See RFC 2045 page 22. IllegalEncoding("= is not followed by two hexadecimal digits in quoted-printable encoding"); DataOctet(data[i]); } } } else if ( (data[i] >= 33 && data[i] <= 60) || // except controls, whitespace and '=' (data[i] >= 62 && data[i] <= 126) ) DataOctet(data[i]); else if ( data[i] == HT || data[i] == SP ) DataOctet(data[i]); else { IllegalEncoding(fmt("control characters in quoted-printable encoding: %d", (int) (data[i]))); DataOctet(data[i]); } } if ( ! soft_line_break ) { DataOctet(CR); DataOctet(LF); } } void MIME_Entity::DecodeBase64(int len, const char* data) { int rlen; char rbuf[128]; while ( len > 0 ) { rlen = 128; char* prbuf = rbuf; int decoded = base64_decoder->Decode(len, data, &rlen, &prbuf); DataOctets(rlen, rbuf); len -= decoded; data += decoded; } } void MIME_Entity::StartDecodeBase64() { if ( base64_decoder ) { reporter->InternalWarning("previous MIME Base64 decoder not released"); delete base64_decoder; } analyzer::Analyzer* analyzer = message->GetAnalyzer(); if ( ! analyzer ) { reporter->InternalWarning("no analyzer associated with MIME message"); return; } base64_decoder = new Base64Converter(analyzer->Conn()); } void MIME_Entity::FinishDecodeBase64() { if ( ! base64_decoder ) return; int rlen = 128; char rbuf[128]; char* prbuf = rbuf; if ( base64_decoder->Done(&rlen, &prbuf) ) { // some remaining data if ( rlen > 0 ) DataOctets(rlen, rbuf); } delete base64_decoder; base64_decoder = nullptr; } bool MIME_Entity::GetDataBuffer() { int ret = message->RequestBuffer(&data_buf_length, &data_buf_data); if ( ! ret || data_buf_length == 0 || data_buf_data == nullptr ) { // reporter->InternalError("cannot get data buffer from MIME_Message", ""); return false; } data_buf_offset = 0; return true; } void MIME_Entity::DataOctet(char ch) { if ( data_buf_offset < 0 && ! GetDataBuffer() ) return; data_buf_data[data_buf_offset] = ch; ++data_buf_offset; if ( data_buf_offset == data_buf_length ) { SubmitData(data_buf_length, data_buf_data); data_buf_offset = -1; } } void MIME_Entity::SubmitData(int len, const char* buf) { message->SubmitData(len, buf); } void MIME_Entity::DataOctets(int len, const char* data) { while ( len > 0 ) { if ( data_buf_offset < 0 && ! GetDataBuffer() ) return; int n = std::min(data_buf_length - data_buf_offset, len); memcpy(data_buf_data + data_buf_offset, data, n); data += n; data_buf_offset += n; len -= n; if ( data_buf_offset == data_buf_length ) { SubmitData(data_buf_length, data_buf_data); data_buf_offset = -1; } } } void MIME_Entity::FlushData() { if ( data_buf_offset > 0 ) { SubmitData(data_buf_offset, data_buf_data); data_buf_offset = -1; } } void MIME_Entity::SubmitHeader(MIME_Header* h) { message->SubmitHeader(h); } void MIME_Entity::SubmitAllHeaders() { message->SubmitAllHeaders(headers); } void MIME_Entity::BeginChildEntity() { ASSERT(current_child_entity == nullptr); current_child_entity = NewChildEntity(); message->BeginEntity(current_child_entity); } void MIME_Entity::EndChildEntity() { ASSERT(current_child_entity != nullptr); current_child_entity->EndOfData(); delete current_child_entity; current_child_entity = nullptr; } void MIME_Entity::IllegalFormat(const char* explanation) { message->SubmitEvent(MIME_EVENT_ILLEGAL_FORMAT, explanation); } void MIME_Entity::IllegalEncoding(const char* explanation) { message->SubmitEvent(MIME_EVENT_ILLEGAL_ENCODING, explanation); } void MIME_Entity::DebugPrintHeaders() { #ifdef DEBUG_BRO for ( MIME_Header* h : headers ) { DEBUG_fputs(h->get_name(), stderr); DEBUG_MSG(":\""); DEBUG_fputs(h->get_value(), stderr); DEBUG_MSG("\"\n"); } #endif } zeek::RecordVal* MIME_Message::BuildHeaderVal(MIME_Header* h) { return ToHeaderVal(h).release(); } zeek::RecordValPtr MIME_Message::ToHeaderVal(MIME_Header* h) { static auto mime_header_rec = zeek::id::find_type("mime_header_rec"); auto header_record = zeek::make_intrusive(mime_header_rec); header_record->Assign(0, to_string_val(h->get_name())); auto upper_hn = to_string_val(h->get_name()); upper_hn->ToUpper(); header_record->Assign(1, std::move(upper_hn)); header_record->Assign(2, to_string_val(h->get_value())); return header_record; } zeek::TableVal* MIME_Message::BuildHeaderTable(MIME_HeaderList& hlist) { return ToHeaderTable(hlist).release(); } zeek::TableValPtr MIME_Message::ToHeaderTable(MIME_HeaderList& hlist) { static auto mime_header_list = zeek::id::find_type("mime_header_list"); auto t = zeek::make_intrusive(mime_header_list); for ( size_t i = 0; i < hlist.size(); ++i ) { auto index = zeek::val_mgr->Count(i + 1); // index starting from 1 MIME_Header* h = hlist[i]; t->Assign(std::move(index), ToHeaderVal(h)); } return t; } MIME_Mail::MIME_Mail(analyzer::Analyzer* mail_analyzer, bool orig, int buf_size) : MIME_Message(mail_analyzer), md5_hash() { analyzer = mail_analyzer; min_overlap_length = mime_segment_overlap_length; max_chunk_length = mime_segment_length; is_orig = orig; int length = buf_size; if ( min_overlap_length < 0 ) min_overlap_length = 0; if ( max_chunk_length < min_overlap_length + 32 ) max_chunk_length = min_overlap_length + 32; if ( length < max_chunk_length ) length = max_chunk_length; buffer_start = data_start = 0; data_buffer = new zeek::String(true, new u_char[length+1], length); if ( mime_content_hash ) { compute_content_hash = 1; md5_hash = hash_init(Hash_MD5); } else compute_content_hash = 0; content_hash_length = 0; top_level = new MIME_Entity(this, nullptr); // to be changed to MIME_Mail BeginEntity(top_level); } void MIME_Mail::Done() { top_level->EndOfData(); SubmitAllData(); if ( compute_content_hash && mime_content_hash ) { u_char* digest = new u_char[16]; hash_final(md5_hash, digest); md5_hash = nullptr; analyzer->EnqueueConnEvent(mime_content_hash, analyzer->ConnVal(), zeek::val_mgr->Count(content_hash_length), zeek::make_intrusive(new zeek::String(true, digest, 16)) ); } MIME_Message::Done(); file_mgr->EndOfFile(analyzer->GetAnalyzerTag(), analyzer->Conn()); } MIME_Mail::~MIME_Mail() { if ( md5_hash ) EVP_MD_CTX_free(md5_hash); delete_strings(all_content); delete data_buffer; delete top_level; } void MIME_Mail::BeginEntity(MIME_Entity* /* entity */) { cur_entity_len = 0; cur_entity_id.clear(); if ( mime_begin_entity ) analyzer->EnqueueConnEvent(mime_begin_entity, analyzer->ConnVal()); buffer_start = data_start = 0; ASSERT(entity_content.size() == 0); } void MIME_Mail::EndEntity(MIME_Entity* /* entity */) { if ( mime_entity_data ) { zeek::String* s = concatenate(entity_content); analyzer->EnqueueConnEvent(mime_entity_data, analyzer->ConnVal(), zeek::val_mgr->Count(s->Len()), zeek::make_intrusive(s) ); if ( ! mime_all_data ) delete_strings(entity_content); else entity_content.clear(); } if ( mime_end_entity ) analyzer->EnqueueConnEvent(mime_end_entity, analyzer->ConnVal()); file_mgr->EndOfFile(analyzer->GetAnalyzerTag(), analyzer->Conn()); cur_entity_id.clear(); } void MIME_Mail::SubmitHeader(MIME_Header* h) { if ( mime_one_header ) analyzer->EnqueueConnEvent(mime_one_header, analyzer->ConnVal(), ToHeaderVal(h) ); } void MIME_Mail::SubmitAllHeaders(MIME_HeaderList& hlist) { if ( mime_all_headers ) analyzer->EnqueueConnEvent(mime_all_headers, analyzer->ConnVal(), ToHeaderTable(hlist) ); } void MIME_Mail::SubmitData(int len, const char* buf) { if ( buf != (char*) data_buffer->Bytes() + buffer_start ) { reporter->AnalyzerError(GetAnalyzer(), "MIME buffer misalignment"); return; } if ( compute_content_hash ) { content_hash_length += len; hash_update(md5_hash, (const u_char*) buf, len); } if ( mime_entity_data || mime_all_data ) { zeek::String* s = new zeek::String((const u_char*) buf, len, false); if ( mime_entity_data ) entity_content.push_back(s); if ( mime_all_data ) all_content.push_back(s); } if ( mime_segment_data ) { const char* data = (char*) data_buffer->Bytes() + data_start; int data_len = (buf + len) - data; analyzer->EnqueueConnEvent(mime_segment_data, analyzer->ConnVal(), zeek::val_mgr->Count(data_len), zeek::make_intrusive(data_len, data) ); } cur_entity_id = file_mgr->DataIn(reinterpret_cast(buf), len, analyzer->GetAnalyzerTag(), analyzer->Conn(), is_orig, cur_entity_id); cur_entity_len += len; buffer_start = (buf + len) - (char*)data_buffer->Bytes(); } bool MIME_Mail::RequestBuffer(int* plen, char** pbuf) { data_start = buffer_start - min_overlap_length; if ( data_start < 0 ) data_start = 0; int overlap = buffer_start - data_start; int buffer_end = data_start + max_chunk_length; if ( buffer_end > data_buffer->Len() ) { // Copy every thing in [data_start, buffer_start) to // [0, overlap). if ( buffer_start > data_start ) memcpy(data_buffer->Bytes(), data_buffer->Bytes() + data_start, overlap); data_start = 0; buffer_start = overlap; } *plen = max_chunk_length - overlap; *pbuf = (char*) data_buffer->Bytes() + buffer_start; return true; } void MIME_Mail::SubmitAllData() { if ( mime_all_data ) { zeek::String* s = concatenate(all_content); delete_strings(all_content); analyzer->EnqueueConnEvent(mime_all_data, analyzer->ConnVal(), zeek::val_mgr->Count(s->Len()), zeek::make_intrusive(s) ); } } void MIME_Mail::SubmitEvent(int event_type, const char* detail) { const char* category = ""; switch ( event_type ) { case MIME_EVENT_ILLEGAL_FORMAT: category = "illegal format"; break; case MIME_EVENT_ILLEGAL_ENCODING: category = "illegal encoding"; break; default: reporter->AnalyzerError(GetAnalyzer(), "unrecognized MIME_Mail event"); return; } if ( mime_event ) analyzer->EnqueueConnEvent(mime_event, analyzer->ConnVal(), zeek::make_intrusive(category), zeek::make_intrusive(detail) ); }