// See the file "COPYING" in the main distribution directory for copyright. #include "zeek/analyzer/protocol/http/HTTP.h" #include "zeek/zeek-config.h" #include #include #include #include #include #include "zeek/Event.h" #include "zeek/NetVar.h" #include "zeek/analyzer/Manager.h" #include "zeek/analyzer/protocol/http/events.bif.h" #include "zeek/analyzer/protocol/mime/MIME.h" #include "zeek/file_analysis/Manager.h" namespace zeek::analyzer::http { const bool DEBUG_http = false; // The EXPECT_*_NOTHING states are used to prevent further parsing. Used if a // message was interrupted. enum HTTP_ExpectRequest { EXPECT_REQUEST_LINE, EXPECT_REQUEST_MESSAGE, EXPECT_REQUEST_TRAILER, EXPECT_REQUEST_NOTHING, }; enum HTTP_ExpectReply { EXPECT_REPLY_LINE, EXPECT_REPLY_MESSAGE, EXPECT_REPLY_TRAILER, EXPECT_REPLY_NOTHING, EXPECT_REPLY_HTTP09, }; HTTP_Entity::HTTP_Entity(HTTP_Message* arg_message, analyzer::mime::MIME_Entity* parent_entity, int arg_expect_body) : analyzer::mime::MIME_Entity(arg_message, parent_entity) { http_message = arg_message; expect_body = arg_expect_body; chunked_transfer_state = NON_CHUNKED_TRANSFER; content_length = range_length = -1; // unspecified expect_data_length = 0; body_length = 0; header_length = 0; deliver_body = true; encoding = IDENTITY; zip = nullptr; is_partial_content = false; offset = 0; instance_length = -1; // unspecified send_size = true; // Always override what MIME_Entity set for want_all_headers: HTTP doesn't // raise the generic MIME events, but rather it's own specific ones. want_all_headers = (bool)http_all_headers; } void HTTP_Entity::EndOfData() { if ( DEBUG_http ) DEBUG_MSG("%.6f: end of data\n", run_state::network_time); if ( zip ) { zip->Done(); delete zip; zip = nullptr; encoding = IDENTITY; } zeek::detail::Rule::PatternType rule = http_message->IsOrig() ? zeek::detail::Rule::HTTP_REQUEST_BODY : zeek::detail::Rule::HTTP_REPLY_BODY; http_message->MyHTTP_Analyzer()->Conn()->Match(rule, reinterpret_cast(""), 0, http_message->IsOrig(), false, true, false); if ( body_length ) http_message->MyHTTP_Analyzer()->ForwardEndOfData(http_message->IsOrig()); analyzer::mime::MIME_Entity::EndOfData(); } void HTTP_Entity::Deliver(int len, const char* data, bool trailing_CRLF) { if ( DEBUG_http ) { DEBUG_MSG("%.6f HTTP_Entity::Deliver len=%d, in_header=%d\n", run_state::network_time, len, in_header); } if ( end_of_data ) { // Multipart entities may have trailers if ( content_type != analyzer::mime::CONTENT_TYPE_MULTIPART ) IllegalFormat("data trailing the end of entity"); return; } if ( in_header ) { if ( ! trailing_CRLF ) http_message->MyHTTP_Analyzer()->Weird("http_no_crlf_in_header_list"); header_length += len; analyzer::mime::MIME_Entity::Deliver(len, data, trailing_CRLF); return; } // Entity body. if ( content_type == analyzer::mime::CONTENT_TYPE_MULTIPART || content_type == analyzer::mime::CONTENT_TYPE_MESSAGE ) DeliverBody(len, data, trailing_CRLF); else if ( chunked_transfer_state != NON_CHUNKED_TRANSFER ) { switch ( chunked_transfer_state ) { case EXPECT_CHUNK_SIZE: ASSERT(trailing_CRLF); if ( ! util::atoi_n(len, data, nullptr, 16, expect_data_length) ) { http_message->Weird("HTTP_bad_chunk_size"); expect_data_length = 0; } if ( expect_data_length > 0 ) { chunked_transfer_state = EXPECT_CHUNK_DATA; SetPlainDelivery(expect_data_length); } else { // This is the last chunk in_header = 1; chunked_transfer_state = EXPECT_CHUNK_TRAILER; } break; case EXPECT_CHUNK_DATA: ASSERT(! trailing_CRLF); ASSERT(len <= expect_data_length); expect_data_length -= len; if ( expect_data_length <= 0 ) { SetPlainDelivery(0); chunked_transfer_state = EXPECT_CHUNK_DATA_CRLF; } DeliverBody(len, data, false); break; case EXPECT_CHUNK_DATA_CRLF: ASSERT(trailing_CRLF); if ( len > 0 ) IllegalFormat("inaccurate chunk size: data before "); chunked_transfer_state = EXPECT_CHUNK_SIZE; break; } } else if ( content_length >= 0 ) { ASSERT(! trailing_CRLF); ASSERT(len <= expect_data_length); DeliverBody(len, data, false); expect_data_length -= len; if ( expect_data_length <= 0 ) { SetPlainDelivery(0); http_message->SetDeliverySize(-1); EndOfData(); } } else DeliverBody(len, data, trailing_CRLF); } class HTTP_Entity::UncompressedOutput : public analyzer::OutputHandler { public: UncompressedOutput(HTTP_Entity* e) { entity = e; } void DeliverStream(int len, const u_char* data, bool orig) override { entity->DeliverBodyClear(len, (char*)data, false); } private: HTTP_Entity* entity; }; void HTTP_Entity::DeliverBody(int len, const char* data, bool trailing_CRLF) { if ( encoding == GZIP || encoding == DEFLATE ) { analyzer::zip::ZIP_Analyzer::Method method = encoding == GZIP ? analyzer::zip::ZIP_Analyzer::GZIP : analyzer::zip::ZIP_Analyzer::DEFLATE; if ( ! zip ) { // We don't care about the direction here. zip = new analyzer::zip::ZIP_Analyzer(http_message->MyHTTP_Analyzer()->Conn(), false, method); zip->SetOutputHandler(new UncompressedOutput(this)); } zip->NextStream(len, (const u_char*)data, false); } else DeliverBodyClear(len, data, trailing_CRLF); } void HTTP_Entity::DeliverBodyClear(int len, const char* data, bool trailing_CRLF) { bool new_data = (body_length == 0); body_length += len; if ( trailing_CRLF ) body_length += 2; if ( deliver_body ) analyzer::mime::MIME_Entity::Deliver(len, data, trailing_CRLF); zeek::detail::Rule::PatternType rule = http_message->IsOrig() ? zeek::detail::Rule::HTTP_REQUEST_BODY : zeek::detail::Rule::HTTP_REPLY_BODY; http_message->MyHTTP_Analyzer()->Conn()->Match(rule, (const u_char*)data, len, http_message->IsOrig(), new_data, false, new_data); // FIXME: buffer data for forwarding (matcher might match later). http_message->MyHTTP_Analyzer()->ForwardStream(len, (const u_char*)data, http_message->IsOrig()); } // Returns 1 if the undelivered bytes are completely within the body, // otherwise returns 0. bool HTTP_Entity::Undelivered(int64_t len) { if ( DEBUG_http ) { DEBUG_MSG("Content gap %" PRId64 ", expect_data_length %" PRId64 "\n", len, expect_data_length); } // Don't propagate an entity (file) gap if we're still in the headers, // or the body length was declared to be zero. if ( (end_of_data && in_header) || body_length == 0 ) return false; if ( is_partial_content ) { precomputed_file_id = file_mgr->Gap(body_length, len, http_message->MyHTTP_Analyzer()->GetAnalyzerTag(), http_message->MyHTTP_Analyzer()->Conn(), http_message->IsOrig(), precomputed_file_id); offset += len; } else precomputed_file_id = file_mgr->Gap(body_length, len, http_message->MyHTTP_Analyzer()->GetAnalyzerTag(), http_message->MyHTTP_Analyzer()->Conn(), http_message->IsOrig(), precomputed_file_id); if ( chunked_transfer_state != NON_CHUNKED_TRANSFER ) { if ( chunked_transfer_state == EXPECT_CHUNK_DATA && expect_data_length >= len ) { body_length += len; expect_data_length -= len; SetPlainDelivery(expect_data_length); if ( expect_data_length == 0 ) chunked_transfer_state = EXPECT_CHUNK_DATA_CRLF; return true; } else return false; } else if ( content_length >= 0 ) { if ( expect_data_length >= len ) { body_length += len; expect_data_length -= len; SetPlainDelivery(expect_data_length); if ( expect_data_length <= 0 ) EndOfData(); return true; } else return false; } return false; } void HTTP_Entity::SubmitData(int len, const char* buf) { if ( deliver_body ) analyzer::mime::MIME_Entity::SubmitData(len, buf); if ( send_size && (encoding == GZIP || encoding == DEFLATE) ) // Auto-decompress in DeliverBody invalidates sizes derived from headers send_size = false; if ( is_partial_content ) { if ( send_size && instance_length > 0 ) precomputed_file_id = file_mgr->SetSize(instance_length, http_message->MyHTTP_Analyzer()->GetAnalyzerTag(), http_message->MyHTTP_Analyzer()->Conn(), http_message->IsOrig(), precomputed_file_id); precomputed_file_id = file_mgr->DataIn(reinterpret_cast(buf), len, offset, http_message->MyHTTP_Analyzer()->GetAnalyzerTag(), http_message->MyHTTP_Analyzer()->Conn(), http_message->IsOrig(), precomputed_file_id); offset += len; } else { if ( send_size && content_length > 0 ) precomputed_file_id = file_mgr->SetSize(content_length, http_message->MyHTTP_Analyzer()->GetAnalyzerTag(), http_message->MyHTTP_Analyzer()->Conn(), http_message->IsOrig(), precomputed_file_id); precomputed_file_id = file_mgr->DataIn(reinterpret_cast(buf), len, http_message->MyHTTP_Analyzer()->GetAnalyzerTag(), http_message->MyHTTP_Analyzer()->Conn(), http_message->IsOrig(), precomputed_file_id); } send_size = false; } void HTTP_Entity::SetPlainDelivery(int64_t length) { ASSERT(length >= 0); ASSERT(length == 0 || ! in_header); http_message->SetPlainDelivery(length); // If we skip HTTP data, the skipped part will appear as // 'undelivered' data, so we do not need to adjust // expect_data_length. } void HTTP_Entity::SubmitHeader(analyzer::mime::MIME_Header* h) { if ( analyzer::mime::istrequal(h->get_name(), "content-length") ) { data_chunk_t vt = h->get_value_token(); if ( ! analyzer::mime::is_null_data_chunk(vt) ) { int64_t n; if ( util::atoi_n(vt.length, vt.data, nullptr, 10, n) ) { content_length = n; if ( is_partial_content && range_length != content_length ) { // Possible evasion attempt. http_message->Weird("HTTP_range_not_matching_len"); // Take the maximum of both lengths to avoid evasions. if ( range_length > content_length ) content_length = range_length; } } else content_length = 0; } } // Figure out content-length for HTTP 206 Partial Content response else if ( analyzer::mime::istrequal(h->get_name(), "content-range") && http_message->MyHTTP_Analyzer()->HTTP_ReplyCode() == 206 ) { data_chunk_t vt = h->get_value_token(); string byte_unit(vt.data, vt.length); vt = h->get_value_after_token(); string byte_range(vt.data, vt.length); byte_range.erase(remove(byte_range.begin(), byte_range.end(), ' '), byte_range.end()); if ( byte_unit != "bytes" ) { http_message->Weird("HTTP_content_range_unknown_byte_unit"); return; } size_t p = byte_range.find('/'); if ( p == string::npos ) { http_message->Weird("HTTP_content_range_cannot_parse"); return; } string byte_range_resp_spec = byte_range.substr(0, p); string instance_length_str = byte_range.substr(p + 1); p = byte_range_resp_spec.find('-'); if ( p == string::npos ) { http_message->Weird("HTTP_content_range_cannot_parse"); return; } string first_byte_pos = byte_range_resp_spec.substr(0, p); string last_byte_pos = byte_range_resp_spec.substr(p + 1); if ( DEBUG_http ) DEBUG_MSG("Parsed Content-Range: %s %s-%s/%s\n", byte_unit.c_str(), first_byte_pos.c_str(), last_byte_pos.c_str(), instance_length_str.c_str()); int64_t f, l; int fr = util::atoi_n(first_byte_pos.size(), first_byte_pos.c_str(), nullptr, 10, f); int lr = util::atoi_n(last_byte_pos.size(), last_byte_pos.c_str(), nullptr, 10, l); if ( fr != 1 || lr != 1 ) { http_message->Weird("HTTP_content_range_cannot_parse"); return; } int64_t len = l - f + 1; if ( DEBUG_http ) DEBUG_MSG("Content-Range length = %" PRId64 "\n", len); if ( len > 0 ) { if ( instance_length_str != "*" ) { if ( ! util::atoi_n(instance_length_str.size(), instance_length_str.c_str(), nullptr, 10, instance_length) ) instance_length = 0; } is_partial_content = true; offset = f; range_length = len; if ( content_length > 0 ) { if ( content_length != range_length ) { // Possible evasion attempt. http_message->Weird("HTTP_range_not_matching_len"); // Take the maximum of both lengths to avoid evasions. if ( range_length > content_length ) content_length = range_length; } } else content_length = range_length; } else { http_message->Weird("HTTP_non_positive_content_range"); return; } } else if ( analyzer::mime::istrequal(h->get_name(), "transfer-encoding") ) { HTTP_Analyzer::HTTP_VersionNumber http_version; if ( http_message->analyzer->GetRequestOngoing() ) http_version = http_message->analyzer->GetRequestVersionNumber(); else // reply_ongoing http_version = http_message->analyzer->GetReplyVersionNumber(); data_chunk_t vt = h->get_value_token(); if ( analyzer::mime::istrequal(vt, "chunked") && http_version == HTTP_Analyzer::HTTP_VersionNumber{1, 1} ) chunked_transfer_state = BEFORE_CHUNK; } else if ( analyzer::mime::istrequal(h->get_name(), "content-encoding") ) { data_chunk_t vt = h->get_value_token(); if ( analyzer::mime::istrequal(vt, "gzip") || analyzer::mime::istrequal(vt, "x-gzip") ) encoding = GZIP; if ( analyzer::mime::istrequal(vt, "deflate") ) encoding = DEFLATE; } analyzer::mime::MIME_Entity::SubmitHeader(h); } void HTTP_Entity::SubmitAllHeaders() { // in_header should be set to false when SubmitAllHeaders() is called. ASSERT(! in_header); if ( DEBUG_http ) DEBUG_MSG("%.6f end of headers\n", run_state::network_time); if ( Parent() && Parent()->MIMEContentType() == analyzer::mime::CONTENT_TYPE_MULTIPART ) { // Don't treat single \r or \n characters in the multipart body content // as lines because the MIME_Entity code will implicitly add back a // \r\n for each line it receives. We do this instead of setting // plain delivery mode for the content line analyzer because // the size of the content to deliver "plainly" may be unknown // and just leaving it in that mode indefinitely screws up the // detection of multipart boundaries. http_message->content_line->SuppressWeirds(true); http_message->content_line->SetCRLFAsEOL(0); } if ( content_length >= 0 ) http_message->SetDeliverySize(content_length); // The presence of a message-body in a request is signaled by // the inclusion of a Content-Length or Transfer-Encoding // header field in the request's message-headers. if ( chunked_transfer_state == EXPECT_CHUNK_TRAILER ) { http_message->SubmitTrailingHeaders(headers); chunked_transfer_state = EXPECT_NOTHING; EndOfData(); return; } analyzer::mime::MIME_Entity::SubmitAllHeaders(); if ( expect_body == HTTP_BODY_NOT_EXPECTED ) { EndOfData(); return; } if ( content_type == analyzer::mime::CONTENT_TYPE_MULTIPART || content_type == analyzer::mime::CONTENT_TYPE_MESSAGE ) { // Do nothing. // Make sure that we check for multiple/message contents first, // because we do not have to turn on . if ( chunked_transfer_state != NON_CHUNKED_TRANSFER ) { http_message->Weird("HTTP_chunked_transfer_for_multipart_message"); } } else if ( chunked_transfer_state != NON_CHUNKED_TRANSFER ) chunked_transfer_state = EXPECT_CHUNK_SIZE; else if ( content_length >= 0 ) { if ( content_length > 0 ) { expect_data_length = content_length; SetPlainDelivery(content_length); } else EndOfData(); // handle the case that content-length = 0 } // Turn plain delivery on permanently for compressed bodies without // content-length headers or if connection is to be closed afterwards // anyway. else if ( http_message->MyHTTP_Analyzer()->IsConnectionClose() || encoding == GZIP || encoding == DEFLATE ) { // FIXME: Using INT_MAX is kind of a hack here. Better // would be to make -1 as special value interpreted as // "until the end of the connection". expect_data_length = INT_MAX; SetPlainDelivery(INT_MAX); } else { if ( expect_body != HTTP_BODY_EXPECTED ) // there is no body EndOfData(); } } HTTP_Message::HTTP_Message(HTTP_Analyzer* arg_analyzer, analyzer::tcp::ContentLine_Analyzer* arg_cl, bool arg_is_orig, int expect_body, int64_t init_header_length) : analyzer::mime::MIME_Message(arg_analyzer) { analyzer = arg_analyzer; content_line = arg_cl; is_orig = arg_is_orig; current_entity = nullptr; top_level = new HTTP_Entity(this, nullptr, expect_body); entity_data_buffer = nullptr; BeginEntity(top_level); start_time = run_state::network_time; body_length = 0; content_gap_length = 0; header_length = init_header_length; } HTTP_Message::~HTTP_Message() { delete top_level; delete[] entity_data_buffer; } RecordValPtr HTTP_Message::BuildMessageStat(bool interrupted, const char* msg) { static auto http_message_stat = id::find_type("http_message_stat"); auto stat = make_intrusive(http_message_stat); int field = 0; stat->AssignTime(field++, start_time); stat->Assign(field++, interrupted); stat->Assign(field++, msg); stat->Assign(field++, static_cast(body_length)); stat->Assign(field++, static_cast(content_gap_length)); stat->Assign(field++, static_cast(header_length)); return stat; } void HTTP_Message::Done(bool interrupted, const char* detail) { if ( finished ) return; analyzer::mime::MIME_Message::Done(); // DEBUG_MSG("%.6f HTTP message done.\n", run_state::network_time); top_level->EndOfData(); if ( is_orig || MyHTTP_Analyzer()->HTTP_ReplyCode() != 206 ) { // multipart/byteranges may span multiple connections, so don't EOF. HTTP_Entity* he = dynamic_cast(top_level); if ( he && ! he->FileID().empty() ) file_mgr->EndOfFile(he->FileID()); else file_mgr->EndOfFile(MyHTTP_Analyzer()->GetAnalyzerTag(), MyHTTP_Analyzer()->Conn(), is_orig); } if ( http_message_done ) GetAnalyzer()->EnqueueConnEvent(http_message_done, analyzer->ConnVal(), val_mgr->Bool(is_orig), BuildMessageStat(interrupted, detail)); MyHTTP_Analyzer()->HTTP_MessageDone(is_orig, this); } bool HTTP_Message::Undelivered(int64_t len) { HTTP_Entity* e = current_entity ? current_entity : static_cast(top_level); if ( e && e->Undelivered(len) ) { content_gap_length += len; return true; } return false; } void HTTP_Message::BeginEntity(analyzer::mime::MIME_Entity* entity) { if ( DEBUG_http ) DEBUG_MSG("%.6f: begin entity (%d)\n", run_state::network_time, is_orig); current_entity = (HTTP_Entity*)entity; if ( http_begin_entity ) analyzer->EnqueueConnEvent(http_begin_entity, analyzer->ConnVal(), val_mgr->Bool(is_orig)); } void HTTP_Message::EndEntity(analyzer::mime::MIME_Entity* entity) { if ( DEBUG_http ) DEBUG_MSG("%.6f: end entity (%d)\n", run_state::network_time, is_orig); if ( entity == top_level ) { body_length += ((HTTP_Entity*)entity)->BodyLength(); header_length += ((HTTP_Entity*)entity)->HeaderLength(); } if ( http_end_entity ) analyzer->EnqueueConnEvent(http_end_entity, analyzer->ConnVal(), val_mgr->Bool(is_orig)); current_entity = (HTTP_Entity*)entity->Parent(); if ( entity->Parent() && entity->Parent()->MIMEContentType() == analyzer::mime::CONTENT_TYPE_MULTIPART ) { content_line->SuppressWeirds(false); content_line->SetCRLFAsEOL(); } // It is necessary to call Done when EndEntity is triggered by // SubmitAllHeaders (through EndOfData). if ( entity == top_level ) Done(); else if ( is_orig || MyHTTP_Analyzer()->HTTP_ReplyCode() != 206 ) { HTTP_Entity* he = dynamic_cast(entity); if ( he && ! he->FileID().empty() ) file_mgr->EndOfFile(he->FileID()); else file_mgr->EndOfFile(MyHTTP_Analyzer()->GetAnalyzerTag(), MyHTTP_Analyzer()->Conn(), is_orig); } } void HTTP_Message::SubmitHeader(analyzer::mime::MIME_Header* h) { MyHTTP_Analyzer()->HTTP_Header(is_orig, h); } void HTTP_Message::SubmitAllHeaders(analyzer::mime::MIME_HeaderList& hlist) { if ( http_all_headers ) analyzer->EnqueueConnEvent(http_all_headers, analyzer->ConnVal(), val_mgr->Bool(is_orig), ToHeaderTable(hlist)); if ( http_content_type ) analyzer->EnqueueConnEvent(http_content_type, analyzer->ConnVal(), val_mgr->Bool(is_orig), current_entity->GetContentType(), current_entity->GetContentSubType()); } void HTTP_Message::SubmitTrailingHeaders(analyzer::mime::MIME_HeaderList& /* hlist */) { // Do nothing for now. Note that if this ever changes do something // which relies on the header list argument, that's currently not // populated unless the http_all_headers or mime_all_headers events // are being used (so you may need to change that, too). } void HTTP_Message::SubmitData(int len, const char* buf) { if ( http_entity_data ) MyHTTP_Analyzer()->HTTP_EntityData(is_orig, new String(reinterpret_cast(buf), len, false)); } bool HTTP_Message::RequestBuffer(int* plen, char** pbuf) { if ( ! entity_data_buffer ) entity_data_buffer = new char[zeek::detail::http_entity_data_delivery_size]; *plen = zeek::detail::http_entity_data_delivery_size; *pbuf = entity_data_buffer; return true; } void HTTP_Message::SubmitAllData() { // This marks the end of message } void HTTP_Message::SubmitEvent(int event_type, const char* detail) { const char* category = ""; switch ( event_type ) { case analyzer::mime::MIME_EVENT_ILLEGAL_FORMAT: category = "illegal format"; break; case analyzer::mime::MIME_EVENT_ILLEGAL_ENCODING: category = "illegal encoding"; break; case analyzer::mime::MIME_EVENT_CONTENT_GAP: category = "content gap"; break; default: reporter->AnalyzerError(MyHTTP_Analyzer(), "unrecognized HTTP message event"); return; } MyHTTP_Analyzer()->HTTP_Event(category, detail); } void HTTP_Message::SetPlainDelivery(int64_t length) { content_line->SetPlainDelivery(length); if ( length > 0 && BifConst::skip_http_data ) content_line->SkipBytesAfterThisLine(length); } void HTTP_Message::SetDeliverySize(int64_t length) { content_line->SetDeliverySize(length); } void HTTP_Message::SkipEntityData() { if ( current_entity ) current_entity->SkipBody(); } void HTTP_Message::Weird(const char* msg) { analyzer->Weird(msg); } HTTP_Analyzer::HTTP_Analyzer(Connection* conn) : analyzer::tcp::TCP_ApplicationAnalyzer("HTTP", conn) { num_requests = num_replies = 0; num_request_lines = num_reply_lines = 0; keep_alive = 0; connection_close = 0; request_message = reply_message = nullptr; request_state = EXPECT_REQUEST_LINE; reply_state = EXPECT_REPLY_LINE; request_ongoing = 0; reply_ongoing = 0; reply_code = 0; connect_request = false; pia = nullptr; upgraded = false; upgrade_connection = false; upgrade_protocol.clear(); content_line_orig = new analyzer::tcp::ContentLine_Analyzer(conn, true); AddSupportAnalyzer(content_line_orig); content_line_resp = new analyzer::tcp::ContentLine_Analyzer(conn, false); content_line_resp->SetSkipPartial(true); AddSupportAnalyzer(content_line_resp); } void HTTP_Analyzer::Done() { if ( IsFinished() ) return; RequestMade(true, "message interrupted when connection done"); ReplyMade(true, "message interrupted when connection done"); // Call Done() on support and child analyzers only after RequestMade() // and ReplyMade() completed. These methods may interact with the analyzer's // child analyzers (specifically through HTTP_Entity's EndOfData() and // ForwardEndOfData()) which isn't valid after a call to Done(). analyzer::tcp::TCP_ApplicationAnalyzer::Done(); delete request_message; request_message = nullptr; delete reply_message; reply_message = nullptr; GenStats(); unanswered_requests = {}; file_mgr->EndOfFile(GetAnalyzerTag(), Conn(), true); /* TODO: this might be nice to have, but reply code is cleared by now. if ( HTTP_ReplyCode() != 206 ) // multipart/byteranges may span multiple connections file_mgr->EndOfFile(GetAnalyzerTag(), Conn(), false); */ } void HTTP_Analyzer::DeliverStream(int len, const u_char* data, bool is_orig) { analyzer::tcp::TCP_ApplicationAnalyzer::DeliverStream(len, data, is_orig); if ( TCP() && TCP()->IsPartial() ) return; if ( upgraded || pia ) { // There will be a PIA instance if this connection has been identified // as a connect proxy, or a child analyzer if there was an upgrade. ForwardStream(len, data, is_orig); return; } const char* line = reinterpret_cast(data); const char* end_of_line = line + len; // HTTP 0.9 is just raw data directly from the server, special case. if ( reply_state == EXPECT_REPLY_HTTP09 && ! is_orig ) { if ( ! reply_message ) { SetVersion(&reply_version, {0, 9}); if ( ! unanswered_requests.empty() ) { AnalyzerConfirmation(); unanswered_requests.pop(); } // Expect the server to close the connection after replying. This is used within // HTTP_Message() below to switch the message into plain delivery mode (and // the content_line_analyzer, but that's not used anymore). connection_close = 1; reply_ongoing = 1; HTTP_Reply(); InitHTTPMessage(content_line_resp, reply_message, is_orig, ExpectReplyMessageBody(), 0); // Finish header processing right way and switch into plain delivery. // Need trailing_CRLF set to avoid a weird. reply_message->Deliver(0, "", true); } reply_message->Deliver(len, line, false); return; } analyzer::tcp::ContentLine_Analyzer* content_line = is_orig ? content_line_orig : content_line_resp; if ( content_line->IsPlainDelivery() ) { if ( is_orig ) { if ( request_message ) request_message->Deliver(len, line, false); else Weird("unexpected_client_HTTP_data"); } else { if ( reply_message ) reply_message->Deliver(len, line, false); else Weird("unexpected_server_HTTP_data"); } return; } // HTTP_Event("HTTP line", to_string_val(length, line)); if ( is_orig ) { ++num_request_lines; switch ( request_state ) { case EXPECT_REQUEST_LINE: { int res = HTTP_RequestLine(line, end_of_line); if ( res < 0 ) return; else if ( res > 0 ) { ++num_requests; if ( ! keep_alive && num_requests > 1 ) Weird("unexpected_multiple_HTTP_requests"); request_state = EXPECT_REQUEST_MESSAGE; request_ongoing = 1; unanswered_requests.push(request_method); HTTP_Request(); InitHTTPMessage(content_line, request_message, is_orig, HTTP_BODY_MAYBE, len); // For HTTP/0.9, turn off the content_line analyzer for the // responder because we expect raw data. if ( request_version == HTTP_VersionNumber{0, 9} ) { if ( request_method->ToStdString() != "GET" ) Weird("invalid_http_09_request_method", request_method->CheckString()); // If we already have a reply_message that means we saw // an HTTP response before a request and interpreted // it as HTTP/1.1 already. Reset the state here because // we're removing the ContentLine support analyzer and // any assumptions about expected delivery size state // become invalid. if ( reply_message ) { Weird("http_09_reply_before_request"); reply_message->Done(); delete reply_message; reply_message = nullptr; } reply_state = EXPECT_REPLY_HTTP09; RemoveSupportAnalyzer(content_line_resp); } } else { if ( ! RequestExpected() ) HTTP_Event("crud_trailing_HTTP_request", analyzer::mime::to_string_val(line, end_of_line)); else { // We do see HTTP requests with a // trailing EOL that's not accounted // for by the content-length. This // will lead to a call to this method // with len==0 while we are expecting // a new request. Since HTTP servers // handle such requests gracefully, // we should do so as well. if ( len == 0 ) Weird("empty_http_request"); else { AnalyzerViolation("not a http request line"); request_state = EXPECT_REQUEST_NOTHING; } } } } break; case EXPECT_REQUEST_MESSAGE: request_message->Deliver(len, line, true); break; case EXPECT_REQUEST_TRAILER: case EXPECT_REQUEST_NOTHING: break; } } else { // HTTP reply switch ( reply_state ) { case EXPECT_REPLY_LINE: if ( HTTP_ReplyLine(line, end_of_line) ) { ++num_replies; if ( ! unanswered_requests.empty() ) AnalyzerConfirmation(); reply_state = EXPECT_REPLY_MESSAGE; reply_ongoing = 1; HTTP_Reply(); if ( connect_request && reply_code != 200 ) // Request failed, do not set up tunnel. connect_request = false; InitHTTPMessage(content_line, reply_message, is_orig, ExpectReplyMessageBody(), len); } else { if ( line != end_of_line ) { AnalyzerViolation("not a http reply line"); reply_state = EXPECT_REPLY_NOTHING; } } break; case EXPECT_REPLY_MESSAGE: reply_message->Deliver(len, line, true); if ( connect_request && len == 0 ) { // End of message header reached, set up // tunnel decapsulation. pia = new analyzer::pia::PIA_TCP(Conn()); if ( AddChildAnalyzer(pia) ) { pia->FirstPacket(true, TransportProto::TRANSPORT_TCP); pia->FirstPacket(false, TransportProto::TRANSPORT_TCP); int remaining_in_content_line = content_line_resp->GetDeliverStreamRemainingLength(); if ( remaining_in_content_line > 0 ) { // If there's immediately data following the empty line // of a successful CONNECT reply, that's at least curious. // Further, switch the responder's ContentLine analyzer // into plain delivery mode so anything left is sent to // PIA unaltered. const char* addl = zeek::util::fmt("%d", remaining_in_content_line); Weird("protocol_data_with_HTTP_CONNECT_reply", addl); content_line_resp->SetPlainDelivery(remaining_in_content_line); } // This connection has transitioned to no longer // being http and the content line support analyzers // need to be removed. RemoveSupportAnalyzer(content_line_orig); RemoveSupportAnalyzer(content_line_resp); } else // AddChildAnalyzer() will have deleted PIA. pia = nullptr; } break; case EXPECT_REPLY_HTTP09: // unreachable case EXPECT_REPLY_TRAILER: case EXPECT_REPLY_NOTHING: break; } } } void HTTP_Analyzer::Undelivered(uint64_t seq, int len, bool is_orig) { analyzer::tcp::TCP_ApplicationAnalyzer::Undelivered(seq, len, is_orig); // DEBUG_MSG("Undelivered from %"PRIu64": %d bytes\n", seq, length); HTTP_Message* msg = is_orig ? request_message : reply_message; analyzer::tcp::ContentLine_Analyzer* content_line = is_orig ? content_line_orig : content_line_resp; if ( ! content_line->IsSkippedContents(seq, len) ) { if ( msg ) msg->SubmitEvent(analyzer::mime::MIME_EVENT_CONTENT_GAP, util::fmt("seq=%" PRIu64 ", len=%d", seq, len)); } // Check if the content gap falls completely within a message body if ( msg && msg->Undelivered(len) ) // If so, we are safe to skip the content and go on parsing return; // Otherwise stop parsing the connection if ( is_orig ) { // Stop parsing reply messages too, because whether a // reply contains a body may depend on knowing the // request method RequestMade(true, "message interrupted by a content gap"); ReplyMade(true, "message interrupted by a content gap"); content_line->SetSkipDeliveries(true); } else { ReplyMade(true, "message interrupted by a content gap"); content_line->SetSkipDeliveries(true); } } void HTTP_Analyzer::FlipRoles() { analyzer::tcp::TCP_ApplicationAnalyzer::FlipRoles(); // If FlipRoles() is invoked after we've upgraded to something, // don't do anything. This shouldn't happen as flipping of TCP // connections currently happens before any data is transferred, // but better safe than sorry. if ( upgraded || pia ) { Weird("HTTP_late_flip_roles"); return; } // If we haven't upgraded but saw request or replies, just bail // for the rest of this connection. Again, this should never happen // right now, but raise a weird in case it starts to happen. if ( num_requests > 0 || num_replies > 0 ) { Weird("HTTP_late_flip_roles"); SetSkip(true); return; } // IsOrig() of the support analyzer has been updated, but we still need // to change the analyzer's local state and the partial skipping setting. bool skip_partial_orig = content_line_orig->SkipPartial(); bool skip_partial_resp = content_line_resp->SkipPartial(); std::swap(content_line_orig, content_line_resp); content_line_orig->SetSkipPartial(skip_partial_orig); content_line_resp->SetSkipPartial(skip_partial_resp); } void HTTP_Analyzer::EndpointEOF(bool is_orig) { analyzer::tcp::TCP_ApplicationAnalyzer::EndpointEOF(is_orig); // DEBUG_MSG("%.6f eof\n", run_state::network_time); if ( is_orig ) RequestMade(false, "message ends as connection contents are completely delivered"); else ReplyMade(false, "message ends as connection contents are completely delivered"); } void HTTP_Analyzer::ConnectionFinished(bool half_finished) { analyzer::tcp::TCP_ApplicationAnalyzer::ConnectionFinished(half_finished); // DEBUG_MSG("%.6f connection finished\n", run_state::network_time); RequestMade(true, "message ends as connection is finished"); ReplyMade(true, "message ends as connection is finished"); } void HTTP_Analyzer::ConnectionReset() { analyzer::tcp::TCP_ApplicationAnalyzer::ConnectionReset(); RequestMade(true, "message interrupted by RST"); ReplyMade(true, "message interrupted by RST"); } void HTTP_Analyzer::PacketWithRST() { analyzer::tcp::TCP_ApplicationAnalyzer::PacketWithRST(); RequestMade(true, "message interrupted by RST"); ReplyMade(true, "message interrupted by RST"); } void HTTP_Analyzer::GenStats() { if ( http_stats ) { static auto http_stats_rec = id::find_type("http_stats_rec"); auto r = make_intrusive(http_stats_rec); r->Assign(0, num_requests); r->Assign(1, num_replies); r->Assign(2, request_version.ToDouble()); r->Assign(3, reply_version.ToDouble()); // DEBUG_MSG("%.6f http_stats\n", run_state::network_time); EnqueueConnEvent(http_stats, ConnVal(), std::move(r)); } } const char* HTTP_Analyzer::PrefixMatch(const char* line, const char* end_of_line, const char* prefix, bool ignore_case) { while ( *prefix && line < end_of_line && ((ignore_case && tolower((unsigned char)*prefix) == tolower((unsigned char)*line)) || *prefix == *line) ) { ++prefix; ++line; } if ( *prefix ) // It didn't match. return nullptr; return line; } const char* HTTP_Analyzer::PrefixWordMatch(const char* line, const char* end_of_line, const char* prefix, bool ignore_case) { if ( (line = PrefixMatch(line, end_of_line, prefix, ignore_case)) == nullptr ) return nullptr; const char* orig_line = line; line = util::skip_whitespace(line, end_of_line); if ( line == orig_line ) // Word didn't end at prefix. return nullptr; return line; } static bool is_HTTP_token_char(unsigned char c) { return c > 31 && c < 127 && // Exclude non-ascii and DEL/CTL per RFC 2616 c != ' ' && c != '\t' && // Separators. c != '(' && c != ')' && c != '<' && c != '>' && c != '@' && c != ',' && c != ';' && c != ':' && c != '\\' && c != '"' && c != '/' && c != '[' && c != ']' && c != '?' && c != '=' && c != '{' && c != '}'; } static const char* get_HTTP_token(const char* s, const char* e) { while ( s < e && is_HTTP_token_char(*s) ) ++s; return s; } int HTTP_Analyzer::HTTP_RequestLine(const char* line, const char* end_of_line) { const char* rest = nullptr; const char* end_of_method = get_HTTP_token(line, end_of_line); if ( end_of_method == line ) { // something went wrong with get_HTTP_token // perform a weak test to see if the string "HTTP/" // is found at the end of the RequestLine if ( end_of_line - 9 >= line && strncasecmp(end_of_line - 9, " HTTP/", 6) == 0 ) goto bad_http_request_with_version; goto error; } rest = util::skip_whitespace(end_of_method, end_of_line); if ( rest == end_of_method ) goto error; if ( ! ParseRequest(rest, end_of_line) ) { reporter->AnalyzerError(this, "HTTP ParseRequest failed"); return -1; } // If we determined HTTP/0.9 (no HTTP/ in the request line), assert that // minimally we have an URI and a 3 character method (HTTP 0.9 only // supports GET). If that doesn't hold, probably not HTTP or very strange. if ( request_version == HTTP_VersionNumber{0, 9} ) { bool maybe_get_method = (end_of_method - line) >= 3; bool has_uri = request_URI && request_URI->Len() > 0; if ( ! maybe_get_method || ! has_uri ) goto error; } request_method = make_intrusive(end_of_method - line, line); Conn()->Match(zeek::detail::Rule::HTTP_REQUEST, (const u_char*)unescaped_URI->AsString()->Bytes(), unescaped_URI->AsString()->Len(), true, true, true, true); return 1; bad_http_request_with_version: Weird("bad_HTTP_request_with_version"); return 0; error: Weird("bad_HTTP_request"); return 0; } bool HTTP_Analyzer::ParseRequest(const char* line, const char* end_of_line) { const char* end_of_uri; const char* version_start; const char* version_end; const char* match; for ( end_of_uri = line; end_of_uri < end_of_line; ++end_of_uri ) { if ( ! is_reserved_URI_char(*end_of_uri) && ! is_unreserved_URI_char(*end_of_uri) && *end_of_uri != '%' ) break; } match = PrefixMatch(line, end_of_line, "HTTP/", false); if ( ! match ) { // If the uppercase version didn't match, try a case-insensitive version, but // send a weird if it matches. match = PrefixMatch(line, end_of_line, "HTTP/", true); if ( match ) Weird("lowercase_HTTP_keyword"); } if ( end_of_uri >= end_of_line && match ) { Weird("missing_HTTP_uri"); end_of_uri = line; // Leave URI empty. } for ( version_start = end_of_uri; version_start < end_of_line; ++version_start ) { end_of_uri = version_start; version_start = util::skip_whitespace(version_start, end_of_line); if ( PrefixMatch(version_start, end_of_line, "HTTP/", false) ) break; // If the uppercase version didn't match, try a case-insensitive version, but // send a weird if it matches. if ( PrefixMatch(version_start, end_of_line, "HTTP/", true) ) { Weird("lowercase_HTTP_keyword"); break; } } if ( version_start >= end_of_line ) { // If no version is found SetVersion(&request_version, {0, 9}); } else { if ( version_start + 8 <= end_of_line ) { version_start += 5; // "HTTP/" SetVersion(&request_version, HTTP_Version(end_of_line - version_start, version_start)); version_end = version_start + 3; if ( util::skip_whitespace(version_end, end_of_line) != end_of_line ) HTTP_Event("crud after HTTP version is ignored", analyzer::mime::to_string_val(line, end_of_line)); } else HTTP_Event("bad_HTTP_version", analyzer::mime::to_string_val(line, end_of_line)); } // NormalizeURI(line, end_of_uri); request_URI = make_intrusive(end_of_uri - line, line); unescaped_URI = make_intrusive(unescape_URI((const u_char*)line, (const u_char*)end_of_uri, this)); return true; } // Only recognize [0-9][.][0-9]. HTTP_Analyzer::HTTP_VersionNumber HTTP_Analyzer::HTTP_Version(int len, const char* data) { if ( len >= 3 && data[0] >= '0' && data[0] <= '9' && data[1] == '.' && data[2] >= '0' && data[2] <= '9' ) { uint8_t major = data[0] - '0'; uint8_t minor = data[2] - '0'; return {major, minor}; } else { HTTP_Event("bad_HTTP_version", analyzer::mime::to_string_val(len, data)); return {}; } } void HTTP_Analyzer::SetVersion(HTTP_VersionNumber* version, HTTP_VersionNumber new_version) { if ( *version == HTTP_VersionNumber{} ) *version = new_version; else if ( *version != new_version ) { Weird("HTTP_version_mismatch"); *version = new_version; } if ( version->major > 1 || (version->major == 1 && version->minor > 0) ) keep_alive = 1; } void HTTP_Analyzer::HTTP_Event(const char* category, const char* detail) { HTTP_Event(category, make_intrusive(detail)); } void HTTP_Analyzer::HTTP_Event(const char* category, StringValPtr detail) { if ( http_event ) // DEBUG_MSG("%.6f http_event\n", run_state::network_time); EnqueueConnEvent(http_event, ConnVal(), make_intrusive(category), std::move(detail)); } StringValPtr HTTP_Analyzer::TruncateURI(const StringValPtr& uri) { const String* str = uri->AsString(); if ( zeek::detail::truncate_http_URI >= 0 && str->Len() > zeek::detail::truncate_http_URI ) { u_char* s = new u_char[zeek::detail::truncate_http_URI + 4]; memcpy(s, str->Bytes(), zeek::detail::truncate_http_URI); memcpy(s + zeek::detail::truncate_http_URI, "...", 4); return zeek::make_intrusive(new zeek::String(true, s, zeek::detail::truncate_http_URI + 3)); } else return uri; } void HTTP_Analyzer::HTTP_Request() { AnalyzerConfirmation(); const char* method = (const char*)request_method->AsString()->Bytes(); int method_len = request_method->AsString()->Len(); if ( strncasecmp(method, "CONNECT", method_len) == 0 ) connect_request = true; if ( http_request ) // DEBUG_MSG("%.6f http_request\n", run_state::network_time); EnqueueConnEvent(http_request, ConnVal(), request_method, TruncateURI(request_URI), TruncateURI(unescaped_URI), make_intrusive(util::fmt("%.1f", request_version.ToDouble()))); } void HTTP_Analyzer::HTTP_Reply() { if ( http_reply ) EnqueueConnEvent(http_reply, ConnVal(), make_intrusive(util::fmt("%.1f", reply_version.ToDouble())), val_mgr->Count(reply_code), reply_reason_phrase ? reply_reason_phrase : make_intrusive("")); else reply_reason_phrase = nullptr; } void HTTP_Analyzer::RequestMade(bool interrupted, const char* msg) { if ( ! request_ongoing ) return; request_ongoing = 0; if ( request_message ) request_message->Done(interrupted, msg); // DEBUG_MSG("%.6f request made\n", run_state::network_time); request_method = nullptr; unescaped_URI = nullptr; request_URI = nullptr; num_request_lines = 0; if ( interrupted ) request_state = EXPECT_REQUEST_NOTHING; else request_state = EXPECT_REQUEST_LINE; } void HTTP_Analyzer::ReplyMade(bool interrupted, const char* msg) { if ( ! reply_ongoing ) return; reply_ongoing = 0; // DEBUG_MSG("%.6f reply made\n", run_state::network_time); if ( reply_message ) reply_message->Done(interrupted, msg); // 1xx replies do not indicate the final response to a request, // so don't pop an unanswered request in that case. if ( (reply_code < 100 || reply_code >= 200) && ! unanswered_requests.empty() ) unanswered_requests.pop(); if ( reply_reason_phrase ) reply_reason_phrase = nullptr; // unanswered requests = 1 because there is no pop after 101. if ( reply_code == 101 && unanswered_requests.size() == 1 && upgrade_connection && upgrade_protocol.size() ) HTTP_Upgrade(); reply_code = 0; upgrade_connection = false; upgrade_protocol.clear(); if ( interrupted || upgraded ) reply_state = EXPECT_REPLY_NOTHING; else reply_state = EXPECT_REPLY_LINE; } void HTTP_Analyzer::HTTP_Upgrade() { // Upgraded connection that switches immediately - e.g. websocket. int remaining_in_content_line = content_line_resp->GetDeliverStreamRemainingLength(); if ( remaining_in_content_line > 0 ) { // We've seen a complete HTTP response for an upgrade request and there's // more data buffered in the ContentLine analyzer. This means the next // protocol's data is in the same packet as the HTTP reply. Log a weird // as this seems not very likely to happen in the wild. const char* addl = zeek::util::fmt("%d", remaining_in_content_line); Weird("protocol_data_with_HTTP_upgrade_reply", addl); // Switch the ContentLine analyzer to deliver anything remaining in // plain mode so it can be forwarded to the upgrade analyzer. content_line_resp->SetPlainDelivery(remaining_in_content_line); } // Lookup an analyzer tag in the HTTP::upgrade_analyzer table. static const auto& upgrade_analyzers = id::find_val("HTTP::upgrade_analyzers"); auto upgrade_protocol_val = make_intrusive(upgrade_protocol); auto v = upgrade_analyzers->Find(upgrade_protocol_val); if ( ! v ) { // If not found, try the all lower version, too. auto lower_upgrade_protocol = util::strtolower(upgrade_protocol); upgrade_protocol_val = make_intrusive(lower_upgrade_protocol); v = upgrade_analyzers->Find(upgrade_protocol_val); } if ( v ) { auto analyzer_tag_val = cast_intrusive(v); DBG_LOG(DBG_ANALYZER, "Found %s in HTTP::upgrade_analyzers for %s", analyzer_tag_val->GetType()->Lookup(analyzer_tag_val->AsEnum()), upgrade_protocol_val->CheckString()); auto analyzer_tag = analyzer_mgr->GetComponentTag(analyzer_tag_val.get()); auto* analyzer = analyzer_mgr->InstantiateAnalyzer(std::move(analyzer_tag), Conn()); if ( analyzer ) { AddChildAnalyzer(analyzer); // The analyzer's Init() may have scheduled an event for analyzer configuration. // Drain the event queue now to process it. This further ensures that other // events already in the event queue (http_reply, http_header, ...) are drained // as well and accessible when the configuration runs. // // Don't just copy this code into a new analyzer, there might be better and more // more general approaches. // // Alternative proposal from Robin: // // Collect all HTTP headers (pattern/names configurable by script land) // and forward the collected headers to the analyzer via a custom // configuration method or some in-band channel. event_mgr.Drain(); } } else { DBG_LOG(DBG_ANALYZER, "No mapping for %s in HTTP::upgrade_analyzers, using PIA instead", upgrade_protocol.c_str()); pia = new analyzer::pia::PIA_TCP(Conn()); if ( AddChildAnalyzer(pia) ) { pia->FirstPacket(true, TransportProto::TRANSPORT_TCP); pia->FirstPacket(false, TransportProto::TRANSPORT_TCP); } } upgraded = true; RemoveSupportAnalyzer(content_line_orig); RemoveSupportAnalyzer(content_line_resp); if ( http_connection_upgrade ) EnqueueConnEvent(http_connection_upgrade, ConnVal(), make_intrusive(upgrade_protocol)); } void HTTP_Analyzer::RequestClash(Val* /* clash_val */) { Weird("multiple_HTTP_request_elements"); // Flush out old values. RequestMade(true, "request clash"); } const String* HTTP_Analyzer::UnansweredRequestMethod() { return unanswered_requests.empty() ? nullptr : unanswered_requests.front()->AsString(); } int HTTP_Analyzer::HTTP_ReplyLine(const char* line, const char* end_of_line) { const char* rest; rest = PrefixMatch(line, end_of_line, "HTTP/", false); if ( ! rest ) { // If the uppercase version didn't match, try a case-insensitive version, but // send a weird if it matches. rest = PrefixMatch(line, end_of_line, "HTTP/", true); if ( rest ) Weird("lowercase_HTTP_keyword"); } if ( ! rest ) { // ##TODO: some server replies with an HTML document // without a status line and a MIME header, when the // request is malformed. HTTP_Event("bad_HTTP_reply", analyzer::mime::to_string_val(line, end_of_line)); return 0; } SetVersion(&reply_version, HTTP_Version(end_of_line - rest, rest)); for ( ; rest < end_of_line; ++rest ) if ( analyzer::mime::is_lws(*rest) ) break; if ( rest >= end_of_line ) { HTTP_Event("HTTP_reply_code_missing", analyzer::mime::to_string_val(line, end_of_line)); return 0; } rest = util::skip_whitespace(rest, end_of_line); if ( rest + 3 > end_of_line ) { HTTP_Event("HTTP_reply_code_missing", analyzer::mime::to_string_val(line, end_of_line)); return 0; } reply_code = HTTP_ReplyCode(rest); for ( rest += 3; rest < end_of_line; ++rest ) if ( analyzer::mime::is_lws(*rest) ) break; if ( rest >= end_of_line ) { HTTP_Event("HTTP_reply_reason_phrase_missing", analyzer::mime::to_string_val(line, end_of_line)); // Tolerate missing reason phrase? return 1; } rest = util::skip_whitespace(rest, end_of_line); reply_reason_phrase = make_intrusive(end_of_line - rest, (const char*)rest); return 1; } int HTTP_Analyzer::HTTP_ReplyCode(const char* code_str) { if ( isdigit(code_str[0]) && isdigit(code_str[1]) && isdigit(code_str[2]) ) return (code_str[0] - '0') * 100 + (code_str[1] - '0') * 10 + (code_str[2] - '0'); else return 0; } int HTTP_Analyzer::ExpectReplyMessageBody() { // RFC 2616: // // For response messages, whether or not a message-body is included with // a message is dependent on both the request method and the response // status code (section 6.1.1). All responses to the HEAD request method // MUST NOT include a message-body, even though the presence of entity- // header fields might lead one to believe they do. All 1xx // (informational), 204 (no content), and 304 (not modified) responses // MUST NOT include a message-body. All other responses do include a // message-body, although it MAY be of zero length. const String* method = UnansweredRequestMethod(); if ( method && strncasecmp((const char*)(method->Bytes()), "HEAD", method->Len()) == 0 ) return HTTP_BODY_NOT_EXPECTED; if ( (reply_code >= 100 && reply_code < 200) || reply_code == 204 || reply_code == 304 ) return HTTP_BODY_NOT_EXPECTED; return HTTP_BODY_EXPECTED; } void HTTP_Analyzer::HTTP_Header(bool is_orig, analyzer::mime::MIME_Header* h) { // To be "liberal", we only look at "keep-alive" on the client // side, and if seen assume the connection to be persistent. // This seems fairly safe - at worst, the client does indeed // send additional requests, and the server ignores them. if ( is_orig && analyzer::mime::istrequal(h->get_name(), "connection") ) { if ( analyzer::mime::istrequal(h->get_value_token(), "keep-alive") ) keep_alive = 1; } if ( ! is_orig && analyzer::mime::istrequal(h->get_name(), "connection") ) { if ( analyzer::mime::istrequal(h->get_value_token(), "close") ) connection_close = 1; else if ( analyzer::mime::istrequal(h->get_value_token(), "upgrade") ) upgrade_connection = true; } if ( ! is_orig && analyzer::mime::istrequal(h->get_name(), "upgrade") ) upgrade_protocol.assign(h->get_value_token().data, h->get_value_token().length); if ( http_header ) { zeek::detail::Rule::PatternType rule = is_orig ? zeek::detail::Rule::HTTP_REQUEST_HEADER : zeek::detail::Rule::HTTP_REPLY_HEADER; data_chunk_t hd_name = h->get_name(); data_chunk_t hd_value = h->get_value(); Conn()->Match(rule, (const u_char*)hd_name.data, hd_name.length, is_orig, true, false, true); Conn()->Match(rule, (const u_char*)": ", 2, is_orig, false, false, false); Conn()->Match(rule, (const u_char*)hd_value.data, hd_value.length, is_orig, false, true, false); if ( DEBUG_http ) DEBUG_MSG("%.6f http_header\n", run_state::network_time); auto upper_hn = analyzer::mime::to_string_val(h->get_name()); upper_hn->ToUpper(); EnqueueConnEvent(http_header, ConnVal(), val_mgr->Bool(is_orig), analyzer::mime::to_string_val(h->get_name()), std::move(upper_hn), analyzer::mime::to_string_val(h->get_value())); } } void HTTP_Analyzer::HTTP_EntityData(bool is_orig, String* entity_data) { if ( http_entity_data ) EnqueueConnEvent(http_entity_data, ConnVal(), val_mgr->Bool(is_orig), val_mgr->Count(entity_data->Len()), make_intrusive(entity_data)); else delete entity_data; } // Calls request/reply done void HTTP_Analyzer::HTTP_MessageDone(bool is_orig, HTTP_Message* /* message */) { if ( is_orig ) RequestMade(false, "message ends normally"); else ReplyMade(false, "message ends normally"); } void HTTP_Analyzer::InitHTTPMessage(analyzer::tcp::ContentLine_Analyzer* cl, HTTP_Message*& message, bool is_orig, int expect_body, int64_t init_header_length) { if ( message ) { if ( ! message->Finished() ) Weird("HTTP_overlapping_messages"); delete message; } // DEBUG_MSG("%.6f init http message\n", run_state::network_time); message = new HTTP_Message(this, cl, is_orig, expect_body, init_header_length); } void HTTP_Analyzer::SkipEntityData(bool is_orig) { HTTP_Message* msg = is_orig ? request_message : reply_message; if ( msg ) msg->SkipEntityData(); } bool is_reserved_URI_char(unsigned char ch) { // see RFC 3986 (definition of URI) return strchr(":/?#[]@!$&'()*+,;=", ch) != 0; } bool is_unreserved_URI_char(unsigned char ch) { // see RFC 3986 (definition of URI) return isalnum(ch) != 0 || strchr("-_.!~*\'()", ch) != 0; } void escape_URI_char(unsigned char ch, unsigned char*& p) { *p++ = '%'; *p++ = util::encode_hex((ch >> 4) & 0xf); *p++ = util::encode_hex(ch & 0xf); } String* unescape_URI(const u_char* line, const u_char* line_end, analyzer::Analyzer* analyzer) { byte_vec decoded_URI = new u_char[line_end - line + 1]; byte_vec URI_p = decoded_URI; while ( line < line_end ) { if ( *line == '%' ) { ++line; if ( line == line_end ) { *URI_p++ = '%'; if ( analyzer ) analyzer->Weird("illegal_%_at_end_of_URI"); break; } else if ( line + 1 == line_end ) { // % + one character at end of line. Log weird // and just add to unescaped URI. *URI_p++ = '%'; *URI_p++ = *line; if ( analyzer ) analyzer->Weird("partial_escape_at_end_of_URI"); break; } else if ( *line == '%' ) { // Double '%' might be either due to // software bug, or more likely, an // evasion (e.g. used by Nimda). // *URI_p++ = '%'; if ( analyzer ) analyzer->Weird("double_%_in_URI"); --line; // ignore the first '%' } else if ( isxdigit(line[0]) && isxdigit(line[1]) ) { *URI_p++ = (util::decode_hex(line[0]) << 4) + util::decode_hex(line[1]); ++line; // place line at the last hex digit } else if ( line_end - line >= 5 && line[0] == 'u' && isxdigit(line[1]) && isxdigit(line[2]) && isxdigit(line[3]) && isxdigit(line[4]) ) { // Decode escaping like this: %u00AE // The W3C rejected escaping this way, and // there is no RFC that specifies it. // Apparently there is some software doing // this sort of 4 byte unicode encoding anyway. // Likely causing an increase in it's use is // the third edition of the ECMAScript spec // having functions for encoding and decoding // data in this format. // If the first byte is null, let's eat it. // It could just be ASCII encoded into this // unicode escaping structure. if ( ! (line[1] == '0' && line[2] == '0') ) *URI_p++ = (util::decode_hex(line[1]) << 4) + util::decode_hex(line[2]); *URI_p++ = (util::decode_hex(line[3]) << 4) + util::decode_hex(line[4]); line += 4; } else { if ( analyzer ) analyzer->Weird("unescaped_%_in_URI"); *URI_p++ = '%'; // put back initial '%' *URI_p++ = *line; // take char w/o interp. } } else *URI_p++ = *line; ++line; } URI_p[0] = 0; return new String(true, decoded_URI, URI_p - decoded_URI); } } // namespace zeek::analyzer::http