From a5632aff4e971b89f216c6a1b0b19fa3ea1a7a73 Mon Sep 17 00:00:00 2001 From: Gregor Maier Date: Wed, 12 Jan 2011 09:38:13 -0800 Subject: [PATCH] TCP Reassembler hotfix for conns > 2GB. The TCP Reassembler does not deliver any data to analyzers after the first 2GB due to signed integer overflow (Actually it will deliver again between 4--6GB, etc.) This happens silently, i.e., without content_gap events or Undelivered calls. See Comments in TCP_Reassembler.cc for more details. As a hotfix that seems to work I disabled the seq_to_skip features. It wasn't used by any analyzer or policy script (Note, that seq_to_skip is different from skip_deliveries). See also ticket #348 --- src/Reassem.cc | 4 ++- src/Reassem.h | 2 +- src/TCP_Reassembler.cc | 70 ++++++++++++++++++++++++++++++++++-------- src/TCP_Reassembler.h | 14 ++++++++- 4 files changed, 74 insertions(+), 16 deletions(-) diff --git a/src/Reassem.cc b/src/Reassem.cc index c6ec6a3420..51e39ce83d 100644 --- a/src/Reassem.cc +++ b/src/Reassem.cc @@ -195,8 +195,10 @@ void Reassembler::Describe(ODesc* d) const d->Add("reassembler"); } -void Reassembler::Undelivered(int /* up_to_seq */) +void Reassembler::Undelivered(int up_to_seq) { + // TrimToSeq() expects this. + last_reassem_seq = up_to_seq; } DataBlock* Reassembler::AddAndCheck(DataBlock* b, int seq, int upper, diff --git a/src/Reassem.h b/src/Reassem.h index 0200f8577b..ee70d70b15 100644 --- a/src/Reassem.h +++ b/src/Reassem.h @@ -11,7 +11,7 @@ class DataBlock { public: DataBlock(const u_char* data, int size, int seq, - DataBlock* next, DataBlock* prev); + DataBlock* prev, DataBlock* next); ~DataBlock(); diff --git a/src/TCP_Reassembler.cc b/src/TCP_Reassembler.cc index c6adec2294..e4fd7077c7 100644 --- a/src/TCP_Reassembler.cc +++ b/src/TCP_Reassembler.cc @@ -9,6 +9,30 @@ // Only needed for gap_report events. #include "Event.h" +// Note, sequence numbers are relative. I.e., they start with 1. + +// The Reassembler uses 32 bit ints for keeping track of sequence numbers. +// This means that the seq numbers will become negative once we exceed +// 2 GB of data. The Reassembler seems to mostly work despite negative +// sequence numbers, since seq_delta() will handle them gracefully. +// However, there are a couple of issues. E.g., seq_to_skip doesn't work +// (which is now disabled with an ifdef, since it wasn't used) +// Also, a check in Undelivered() had a problem with negative sequence +// numbers. +// +// There are numerous counters (e.g., number of total bytes, etc.) that are +// incorrect due to overflow too. However, these seem to be for informative +// purposes only, so we currently ignore them. +// +// There might be other problems hidden somewhere, that I haven't discovered +// yet...... +// +// Ideally the Reassembler should start using 64 bit ints for keeping track +// of sequence numbers +// +// Reassem.{cc|h} and other "Reassemblers" that inherit from it (e.g., Frag) +// need to be updated too. + const bool DEBUG_tcp_contents = false; const bool DEBUG_tcp_connection_close = false; const bool DEBUG_tcp_match_undelivered = false; @@ -35,7 +59,9 @@ TCP_Reassembler::TCP_Reassembler(Analyzer* arg_dst_analyzer, deliver_tcp_contents = 0; skip_deliveries = 0; did_EOF = 0; +#ifdef XXX_USE_SEQ_TO_SKIP seq_to_skip = 0; +#endif in_delivery = false; if ( tcp_contents ) @@ -120,7 +146,7 @@ void TCP_Reassembler::Undelivered(int up_to_seq) TCP_Endpoint* endpoint = endp; TCP_Endpoint* peer = endpoint->peer; - if ( up_to_seq <= 2 && tcp_analyzer->IsPartial() ) + if ( up_to_seq <= 2 && tcp_analyzer->IsPartial() ) { // Since it was a partial connection, we faked up its // initial sequence numbers as though we'd seen a SYN. // We've now received the first ack and are getting a @@ -129,7 +155,17 @@ void TCP_Reassembler::Undelivered(int up_to_seq) // (if up_to_seq is 2). The latter can occur when the // first packet we saw instantiating the partial connection // was a keep-alive. So, in either case, just ignore it. - return; + + // TODO: Don't we need to update last_reassm_seq ???? + + if (up_to_seq >=0 ) + // Since seq are currently only 32 bit signed integers, they + // will become negative if a connection has more than 2GB of + // data..... + // Remove the above if and always return here, + // once we're using 64 bit ints + return; + } #if 0 if ( endpoint->FIN_cnt > 0 ) @@ -144,16 +180,20 @@ void TCP_Reassembler::Undelivered(int up_to_seq) if ( DEBUG_tcp_contents ) { - DEBUG_MSG("%.6f Undelivered: up_to_seq=%d, last_reassm=%d, " + DEBUG_MSG("%.6f Undelivered: is_orig=%d up_to_seq=%d, last_reassm=%d, " "endp: FIN_cnt=%d, RST_cnt=%d, " "peer: FIN_cnt=%d, RST_cnt=%d\n", - network_time, up_to_seq, last_reassem_seq, + network_time, is_orig, up_to_seq, last_reassem_seq, endpoint->FIN_cnt, endpoint->RST_cnt, peer->FIN_cnt, peer->RST_cnt); } if ( seq_delta(up_to_seq, last_reassem_seq) <= 0 ) + { + // This should never happen. + internal_error("Calling Undelivered for data that has already been delivered (or has already been marked as undelivered"); return; + } if ( last_reassem_seq == 1 && (endpoint->FIN_cnt > 0 || endpoint->RST_cnt > 0 || @@ -177,9 +217,9 @@ void TCP_Reassembler::Undelivered(int up_to_seq) { if ( DEBUG_tcp_contents ) { - DEBUG_MSG("%.6f Undelivered: seq=%d, len=%d, " + DEBUG_MSG("%.6f Undelivered: is_orig=%d, seq=%d, len=%d, " "skip_deliveries=%d\n", - network_time, last_reassem_seq, + network_time, is_orig, last_reassem_seq, seq_delta(up_to_seq, last_reassem_seq), skip_deliveries); } @@ -376,7 +416,7 @@ void TCP_Reassembler::BlockInserted(DataBlock* start_block) void TCP_Reassembler::Overlap(const u_char* b1, const u_char* b2, int n) { if ( DEBUG_tcp_contents ) - DEBUG_MSG("%.6f TCP contents overlap: %d\n", network_time, n); + DEBUG_MSG("%.6f TCP contents overlap: %d is_orig=%d\n", network_time, n, is_orig); if ( rexmit_inconsistency && memcmp((const void*) b1, (const void*) b2, n) && @@ -419,8 +459,8 @@ int TCP_Reassembler::DataSent(double t, int seq, int len, if ( DEBUG_tcp_contents ) { - DEBUG_MSG("%.6f DataSent: seq=%d upper=%d ack=%d\n", - network_time, seq, upper_seq, ack); + DEBUG_MSG("%.6f DataSent: is_orig=%d seq=%d upper=%d ack=%d\n", + network_time, is_orig, seq, upper_seq, ack); } if ( skip_deliveries ) @@ -477,8 +517,7 @@ void TCP_Reassembler::AckReceived(int seq) // Zero, or negative in sequence-space terms. Nothing to do. return; - bool test_active = - ! skip_deliveries && ! tcp_analyzer->Skipping() && + bool test_active = ! skip_deliveries && ! tcp_analyzer->Skipping() && endp->state == TCP_ENDPOINT_ESTABLISHED && endp->peer->state == TCP_ENDPOINT_ESTABLISHED; @@ -569,7 +608,8 @@ void TCP_Reassembler::CheckEOF() void TCP_Reassembler::DeliverBlock(int seq, int len, const u_char* data) { - if ( seq_delta(seq + len, seq_to_skip) <= 0 ) +#ifdef XXX_USE_SEQ_TO_SKIP + if ( seq_delta(seq + len, seq_to_skip) <= 0 ) return; if ( seq_delta(seq, seq_to_skip) < 0 ) @@ -579,6 +619,7 @@ void TCP_Reassembler::DeliverBlock(int seq, int len, const u_char* data) data += to_skip; seq = seq_to_skip; } +#endif if ( deliver_tcp_contents ) { @@ -603,11 +644,13 @@ void TCP_Reassembler::DeliverBlock(int seq, int len, const u_char* data) in_delivery = true; Deliver(seq, len, data); in_delivery = false; - +#ifdef XXX_USE_SEQ_TO_SKIP if ( seq_delta(seq + len, seq_to_skip) < 0 ) SkipToSeq(seq_to_skip); +#endif } +#ifdef XXX_USE_SEQ_TO_SKIP void TCP_Reassembler::SkipToSeq(int seq) { if ( seq_delta(seq, seq_to_skip) > 0 ) @@ -617,6 +660,7 @@ void TCP_Reassembler::SkipToSeq(int seq) TrimToSeq(seq); } } +#endif int TCP_Reassembler::DataPending() const { diff --git a/src/TCP_Reassembler.h b/src/TCP_Reassembler.h index 3e8426c9fb..e6c9e35d16 100644 --- a/src/TCP_Reassembler.h +++ b/src/TCP_Reassembler.h @@ -6,6 +6,13 @@ #include "Reassem.h" #include "TCP_Endpoint.h" +// The skip_to_seq feature does not work correctly with +// connections >2GB due to use of 32 bit signed ints (see +// comments in TCP_Reassembler.cc) +// Since it's not used by any analyzer or policy script we disable +// it. Could be added back in once we start using 64bit integers. +// #define XXX_USE_SEQ_TO_SKIP + class BroFile; class Connection; class TCP_Analyzer; @@ -60,9 +67,11 @@ public: void MatchUndelivered(int up_to_seq = -1); +#ifdef XXX_USE_SEQ_TO_SKIP // Skip up to seq, as if there's a content gap. // Can be used to skip HTTP data for performance considerations. void SkipToSeq(int seq); +#endif int DataSent(double t, int seq, int len, const u_char* data, bool replaying=true); @@ -85,9 +94,10 @@ public: const TCP_Endpoint* Endpoint() const { return endp; } int IsOrig() const { return endp->IsOrig(); } - +#ifdef XXX_USE_SEQ_TO_SKIP bool IsSkippedContents(int seq, int length) const { return seq + length <= seq_to_skip; } +#endif private: TCP_Reassembler() { } @@ -110,7 +120,9 @@ private: unsigned int did_EOF:1; unsigned int skip_deliveries:1; +#ifdef XXX_USE_SEQ_TO_SKIP int seq_to_skip; +#endif bool in_delivery; BroFile* record_contents_file; // file on which to reassemble contents