diff --git a/CHANGES b/CHANGES index b144579ecf..24ab8330e2 100644 --- a/CHANGES +++ b/CHANGES @@ -1,4 +1,10 @@ +3.1.0-dev.140 | 2019-09-24 09:03:56 +0000 + + * Reorganize reassembly data structures. This replaces the previous + linked list-based implementation with std::map to avoid O(n) worst + case performance. (Jon Siwek, Corelight) + 3.1.0-dev.131 | 2019-09-23 13:07:09 -0700 * Add --build-dir as alias for --builddir (Dominik Charousset, Corelight) diff --git a/VERSION b/VERSION index 3ee6b718bd..ff29cfc4d4 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -3.1.0-dev.131 +3.1.0-dev.140 diff --git a/src/Frag.cc b/src/Frag.cc index 51f82e84f3..4d1f7ace47 100644 --- a/src/Frag.cc +++ b/src/Frag.cc @@ -185,22 +185,32 @@ void FragReassembler::Overlap(const u_char* b1, const u_char* b2, uint64_t n) Weird("fragment_overlap"); } -void FragReassembler::BlockInserted(DataBlock* /* start_block */) +void FragReassembler::BlockInserted(DataBlockMap::const_iterator /* it */) { - if ( blocks->seq > 0 || ! frag_size ) + auto it = block_list.Begin(); + + if ( it->second.seq > 0 || ! frag_size ) // For sure don't have it all yet. return; + auto next = std::next(it); + // We might have it all - look for contiguous all the way. - DataBlock* b; - for ( b = blocks; b->next; b = b->next ) - if ( b->upper != b->next->seq ) + while ( next != block_list.End() ) + { + if ( it->second.upper != next->second.seq ) break; - if ( b->next ) + ++it; + ++next; + } + + const auto& last = std::prev(block_list.End())->second; + + if ( next != block_list.End() ) { // We have a hole. - if ( b->upper >= frag_size ) + if ( it->second.upper >= frag_size ) { // We're stuck. The point where we stopped is // contiguous up through the expected end of @@ -213,19 +223,19 @@ void FragReassembler::BlockInserted(DataBlock* /* start_block */) // We decide to analyze the contiguous portion now. // Extend the fragment up through the end of what // we have. - frag_size = b->upper; + frag_size = it->second.upper; } else return; } - else if ( last_block->upper > frag_size ) + else if ( last.upper > frag_size ) { Weird("fragment_size_inconsistency"); - frag_size = last_block->upper; + frag_size = last.upper; } - else if ( last_block->upper < frag_size ) + else if ( last.upper < frag_size ) // Missing the tail. return; @@ -246,15 +256,22 @@ void FragReassembler::BlockInserted(DataBlock* /* start_block */) pkt += proto_hdr_len; - for ( b = blocks; b; b = b->next ) + for ( it = block_list.Begin(); it != block_list.End(); ++it ) { - // If we're above a hole, stop. This can happen because - // the logic above regarding a hole that's above the - // expected fragment size. - if ( b->prev && b->prev->upper < b->seq ) - break; + const auto& b = it->second; - if ( b->upper > n ) + if ( it != block_list.Begin() ) + { + const auto& prev = std::prev(it)->second; + + // If we're above a hole, stop. This can happen because + // the logic above regarding a hole that's above the + // expected fragment size. + if ( prev.upper < b.seq ) + break; + } + + if ( b.upper > n ) { reporter->InternalWarning("bad fragment reassembly"); DeleteTimer(); @@ -263,8 +280,7 @@ void FragReassembler::BlockInserted(DataBlock* /* start_block */) return; } - memcpy((void*) &pkt[b->seq], (const void*) b->block, - b->upper - b->seq); + memcpy(&pkt[b.seq], b.block, b.upper - b.seq); } delete reassembled_pkt; @@ -299,13 +315,7 @@ void FragReassembler::BlockInserted(DataBlock* /* start_block */) void FragReassembler::Expire(double t) { - while ( blocks ) - { - DataBlock* b = blocks->next; - delete blocks; - blocks = b; - } - + block_list.Clear(); expire_timer->ClearReassembler(); expire_timer = 0; // timer manager will delete it diff --git a/src/Frag.h b/src/Frag.h index 1eccabcb94..1c5d83d89b 100644 --- a/src/Frag.h +++ b/src/Frag.h @@ -36,7 +36,7 @@ public: const FragReassemblerKey& Key() const { return key; } protected: - void BlockInserted(DataBlock* start_block) override; + void BlockInserted(DataBlockMap::const_iterator it) override; void Overlap(const u_char* b1, const u_char* b2, uint64_t n) override; void Weird(const char* name) const; diff --git a/src/Reassem.cc b/src/Reassem.cc index 50cea52153..333452433a 100644 --- a/src/Reassem.cc +++ b/src/Reassem.cc @@ -1,95 +1,312 @@ // See the file "COPYING" in the main distribution directory for copyright. #include -#include #include "zeek-config.h" #include "Reassem.h" -static const bool DEBUG_reassem = false; +uint64_t Reassembler::total_size = 0; +uint64_t Reassembler::sizes[REASSEM_NUM]; -DataBlock::DataBlock(Reassembler* reass, const u_char* data, - uint64_t size, uint64_t arg_seq, DataBlock* arg_prev, - DataBlock* arg_next, ReassemblerType reassem_type) +DataBlock::DataBlock(const u_char* data, uint64_t size, uint64_t arg_seq) { seq = arg_seq; upper = seq + size; block = new u_char[size]; - - memcpy((void*) block, (const void*) data, size); - - prev = arg_prev; - next = arg_next; - - if ( prev ) - prev->next = this; - if ( next ) - next->prev = this; - - reassembler = reass; - reassembler->size_of_all_blocks += size; - - rtype = reassem_type; - Reassembler::sizes[rtype] += pad_size(size) + padded_sizeof(DataBlock); - Reassembler::total_size += pad_size(size) + padded_sizeof(DataBlock); + memcpy(block, data, size); } -uint64_t Reassembler::total_size = 0; -uint64_t Reassembler::sizes[REASSEM_NUM]; +void DataBlockList::DataSize(uint64_t seq_cutoff, uint64_t* below, uint64_t* above) const + { + for ( const auto& e : block_map ) + { + const auto& b = e.second; + + if ( b.seq <= seq_cutoff ) + *above += b.Size(); + else + *below += b.Size(); + } + } + +void DataBlockList::Delete(DataBlockMap::const_iterator it) + { + const auto& b = it->second; + auto size = b.Size(); + + block_map.erase(it); + total_data_size -= size; + + Reassembler::total_size -= size + sizeof(DataBlock); + Reassembler::sizes[reassembler->rtype] -= size + sizeof(DataBlock); + } + +DataBlock DataBlockList::Remove(DataBlockMap::const_iterator it) + { + auto b = std::move(it->second); + auto size = b.Size(); + + block_map.erase(it); + total_data_size -= size; + + return b; + } + +void DataBlockList::Clear() + { + auto total_db_size = sizeof(DataBlock) * block_map.size(); + auto total = total_data_size + total_db_size; + Reassembler::total_size -= total; + Reassembler::sizes[reassembler->rtype] -= total; + total_data_size = 0; + block_map.clear(); + } + +void DataBlockList::Append(DataBlock block, uint64_t limit) + { + total_data_size += block.Size(); + + block_map.emplace_hint(block_map.end(), block.seq, std::move(block)); + + while ( block_map.size() > limit ) + Delete(block_map.begin()); + } + +DataBlockMap::const_iterator DataBlockList::FirstBlockAtOrBefore(uint64_t seq) const + { + // Upper sequence number doesn't matter for the search + auto it = block_map.upper_bound(seq); + + if ( it == block_map.end() ) + return block_map.empty() ? it : std::prev(it); + + if ( it == block_map.begin() ) + return block_map.end(); + + return std::prev(it); + } + +DataBlockMap::const_iterator +DataBlockList::Insert(uint64_t seq, uint64_t upper, const u_char* data, + DataBlockMap::const_iterator hint) + { + auto size = upper - seq; + auto rval = block_map.emplace_hint(hint, seq, DataBlock(data, size, seq)); + + total_data_size += size; + Reassembler::sizes[reassembler->rtype] += size + sizeof(DataBlock); + Reassembler::total_size += size + sizeof(DataBlock); + + return rval; + } + +DataBlockMap::const_iterator +DataBlockList::Insert(uint64_t seq, uint64_t upper, const u_char* data, + DataBlockMap::const_iterator* hint) + { + // Empty list. + if ( block_map.empty() ) + return Insert(seq, upper, data, block_map.end()); + + const auto& last = block_map.rbegin()->second; + + // Special check for the common case of appending to the end. + if ( seq == last.upper ) + return Insert(seq, upper, data, block_map.end()); + + // Find the first block that doesn't come completely before the new data. + DataBlockMap::const_iterator it; + + if ( hint ) + it = *hint; + else + { + it = FirstBlockAtOrBefore(seq); + + if ( it == block_map.end() ) + it = block_map.begin(); + } + + while ( std::next(it) != block_map.end() && it->second.upper <= seq ) + ++it; + + const auto& b = it->second; + + if ( b.upper <= seq ) + // b is the last block, and it comes completely before the new block. + return Insert(seq, upper, data, block_map.end()); + + if ( upper <= b.seq ) + // The new block comes completely before b. + return Insert(seq, upper, data, it); + + DataBlockMap::const_iterator rval; + + // The blocks overlap. + if ( seq < b.seq ) + { + // The new block has a prefix that comes before b. + uint64_t prefix_len = b.seq - seq; + + rval = Insert(seq, seq + prefix_len, data, it); + + data += prefix_len; + seq += prefix_len; + } + else + rval = it; + + uint64_t overlap_start = seq; + uint64_t overlap_offset = overlap_start - b.seq; + uint64_t new_b_len = upper - seq; + uint64_t b_len = b.upper - overlap_start; + uint64_t overlap_len = min(new_b_len, b_len); + + if ( overlap_len < new_b_len ) + { + // Recurse to resolve remainder of the new data. + data += overlap_len; + seq += overlap_len; + + auto r = Insert(seq, upper, data, &it); + + if ( rval == it ) + rval = r; + } + + return rval; + } + +uint64_t DataBlockList::Trim(uint64_t seq, uint64_t max_old, + DataBlockList* old_list) + { + uint64_t num_missing = 0; + + // Do this accounting before looking for Undelivered data, + // since that will alter last_reassem_seq. + + if ( ! block_map.empty() ) + { + const auto& first = block_map.begin()->second; + + if ( first.seq > reassembler->LastReassemSeq() ) + // An initial hole. + num_missing += first.seq - reassembler->LastReassemSeq(); + } + else if ( seq > reassembler->LastReassemSeq() ) + { + // Trimming data we never delivered. + // We won't have any accounting based on blocks for this hole. + num_missing += seq - reassembler->LastReassemSeq(); + } + + if ( seq > reassembler->LastReassemSeq() ) + { + // We're trimming data we never delivered. + reassembler->Undelivered(seq); + } + + while ( ! block_map.empty() ) + { + auto first_it = block_map.begin(); + const auto& first = first_it->second; + + if ( first.upper > seq ) + break; + + auto next = std::next(first_it); + + if ( next != block_map.end() && next->second.seq <= seq ) + { + if ( first.upper != next->second.seq ) + num_missing += next->second.seq - first.upper; + } + else + { + // No more blocks - did this one make it to seq? + // Second half of test is for acks of FINs, which + // don't get entered into the sequence space. + if ( first.upper != seq && first.upper != seq - 1 ) + num_missing += seq - first.upper; + } + + if ( max_old ) + old_list->Append(Remove(first_it), max_old); + else + Delete(first_it); + } + + if ( ! block_map.empty() ) + { + auto first_it = block_map.begin(); + const auto& first = first_it->second; + + // If we skipped over some undeliverable data, then + // it's possible that this block is now deliverable. + // Give it a try. + if ( first.seq == reassembler->LastReassemSeq() ) + reassembler->BlockInserted(first_it); + } + + reassembler->SetTrimSeq(seq); + return num_missing; + } Reassembler::Reassembler(uint64_t init_seq, ReassemblerType reassem_type) - : blocks(), last_block(), old_blocks(), last_old_block(), + : block_list(this), old_block_list(this), last_reassem_seq(init_seq), trim_seq(init_seq), - max_old_blocks(0), total_old_blocks(0), size_of_all_blocks(0), - rtype(reassem_type) + max_old_blocks(0), rtype(reassem_type) { } -Reassembler::~Reassembler() +void Reassembler::CheckOverlap(const DataBlockList& list, + uint64_t seq, uint64_t len, + const u_char* data) { - ClearBlocks(); - ClearOldBlocks(); - } - -void Reassembler::CheckOverlap(DataBlock *head, DataBlock *tail, - uint64_t seq, uint64_t len, const u_char* data) - { - if ( ! head || ! tail ) + if ( list.Empty() ) return; - if ( seq == tail->upper ) + const auto& last = list.LastBlock(); + + if ( seq == last.upper ) // Special case check for common case of appending to the end. return; uint64_t upper = (seq + len); - for ( DataBlock* b = head; b; b = b->next ) + auto it = list.FirstBlockAtOrBefore(seq); + + if ( it == list.End() ) + it = list.Begin(); + + for ( ; it != list.End(); ++it ) { + const auto& b = it->second; uint64_t nseq = seq; uint64_t nupper = upper; const u_char* ndata = data; - if ( nupper <= b->seq ) + if ( nupper <= b.seq ) + break; + + if ( nseq >= b.upper ) continue; - if ( nseq >= b->upper ) - continue; - - if ( nseq < b->seq ) + if ( nseq < b.seq ) { - ndata += (b->seq - seq); - nseq = b->seq; + ndata += (b.seq - seq); + nseq = b.seq; } - if ( nupper > b->upper ) - nupper = b->upper; + if ( nupper > b.upper ) + nupper = b.upper; - uint64_t overlap_offset = (nseq - b->seq); + uint64_t overlap_offset = (nseq - b.seq); uint64_t overlap_len = (nupper - nseq); if ( overlap_len ) - Overlap(&b->block[overlap_offset], ndata, overlap_len); + Overlap(&b.block[overlap_offset], ndata, overlap_len); } } @@ -100,13 +317,13 @@ void Reassembler::NewBlock(double t, uint64_t seq, uint64_t len, const u_char* d uint64_t upper_seq = seq + len; - CheckOverlap(old_blocks, last_old_block, seq, len, data); + CheckOverlap(old_block_list, seq, len, data); if ( upper_seq <= trim_seq ) // Old data, don't do any work for it. return; - CheckOverlap(blocks, last_block, seq, len, data); + CheckOverlap(block_list, seq, len, data); if ( seq < trim_seq ) { // Partially old data, just keep the good stuff. @@ -117,144 +334,28 @@ void Reassembler::NewBlock(double t, uint64_t seq, uint64_t len, const u_char* d len -= amount_old; } - DataBlock* start_block; - - if ( ! blocks ) - blocks = last_block = start_block = - new DataBlock(this, data, len, seq, 0, 0, rtype); - else - start_block = AddAndCheck(blocks, seq, upper_seq, data); - - BlockInserted(start_block); + auto it = block_list.Insert(seq, upper_seq, data);; + BlockInserted(it); } uint64_t Reassembler::TrimToSeq(uint64_t seq) { - uint64_t num_missing = 0; - - // Do this accounting before looking for Undelivered data, - // since that will alter last_reassem_seq. - - if ( blocks ) - { - if ( blocks->seq > last_reassem_seq ) - // An initial hole. - num_missing += blocks->seq - last_reassem_seq; - } - - else if ( seq > last_reassem_seq ) - { // Trimming data we never delivered. - if ( ! blocks ) - // We won't have any accounting based on blocks - // for this hole. - num_missing += seq - last_reassem_seq; - } - - if ( seq > last_reassem_seq ) - { - // We're trimming data we never delivered. - Undelivered(seq); - } - - while ( blocks && blocks->upper <= seq ) - { - DataBlock* b = blocks->next; - - if ( b && b->seq <= seq ) - { - if ( blocks->upper != b->seq ) - num_missing += b->seq - blocks->upper; - } - else - { - // No more blocks - did this one make it to seq? - // Second half of test is for acks of FINs, which - // don't get entered into the sequence space. - if ( blocks->upper != seq && blocks->upper != seq - 1 ) - num_missing += seq - blocks->upper; - } - - if ( max_old_blocks ) - { - // Move block over to old_blocks queue. - blocks->next = 0; - - if ( last_old_block ) - { - blocks->prev = last_old_block; - last_old_block->next = blocks; - } - else - { - blocks->prev = 0; - old_blocks = blocks; - } - - last_old_block = blocks; - total_old_blocks++; - - while ( old_blocks && total_old_blocks > max_old_blocks ) - { - DataBlock* next = old_blocks->next; - delete old_blocks; - old_blocks = next; - total_old_blocks--; - } - } - - else - delete blocks; - - blocks = b; - } - - if ( blocks ) - { - blocks->prev = 0; - - // If we skipped over some undeliverable data, then - // it's possible that this block is now deliverable. - // Give it a try. - if ( blocks->seq == last_reassem_seq ) - BlockInserted(blocks); - } - else - last_block = 0; - - if ( seq > trim_seq ) - // seq is further ahead in the sequence space. - trim_seq = seq; - - return num_missing; + return block_list.Trim(seq, max_old_blocks, &old_block_list); } void Reassembler::ClearBlocks() { - while ( blocks ) - { - DataBlock* b = blocks->next; - delete blocks; - blocks = b; - } - - last_block = 0; + block_list.Clear(); } void Reassembler::ClearOldBlocks() { - while ( old_blocks ) - { - DataBlock* b = old_blocks->next; - delete old_blocks; - old_blocks = b; - } - - last_old_block = 0; + old_block_list.Clear(); } uint64_t Reassembler::TotalSize() const { - return size_of_all_blocks; + return block_list.DataSize() + old_block_list.DataSize(); } void Reassembler::Describe(ODesc* d) const @@ -268,89 +369,6 @@ void Reassembler::Undelivered(uint64_t up_to_seq) last_reassem_seq = up_to_seq; } -DataBlock* Reassembler::AddAndCheck(DataBlock* b, uint64_t seq, uint64_t upper, - const u_char* data) - { - if ( DEBUG_reassem ) - { - DEBUG_MSG("%.6f Reassembler::AddAndCheck seq=%" PRIu64", upper=%" PRIu64"\n", - network_time, seq, upper); - } - - // Special check for the common case of appending to the end. - if ( last_block && seq == last_block->upper ) - { - last_block = new DataBlock(this, data, upper - seq, - seq, last_block, 0, rtype); - return last_block; - } - - // Find the first block that doesn't come completely before the - // new data. - while ( b->next && b->upper <= seq ) - b = b->next; - - if ( b->upper <= seq ) - { - // b is the last block, and it comes completely before - // the new block. - last_block = new DataBlock(this, data, upper - seq, - seq, b, 0, rtype); - return last_block; - } - - DataBlock* new_b = 0; - - if ( upper <= b->seq ) - { - // The new block comes completely before b. - new_b = new DataBlock(this, data, upper - seq, seq, - b->prev, b, rtype); - if ( b == blocks ) - blocks = new_b; - return new_b; - } - - // The blocks overlap. - if ( seq < b->seq ) - { - // The new block has a prefix that comes before b. - uint64_t prefix_len = b->seq - seq; - new_b = new DataBlock(this, data, prefix_len, seq, - b->prev, b, rtype); - if ( b == blocks ) - blocks = new_b; - - data += prefix_len; - seq += prefix_len; - } - else - new_b = b; - - uint64_t overlap_start = seq; - uint64_t overlap_offset = overlap_start - b->seq; - uint64_t new_b_len = upper - seq; - uint64_t b_len = b->upper - overlap_start; - uint64_t overlap_len = min(new_b_len, b_len); - - if ( overlap_len < new_b_len ) - { - // Recurse to resolve remainder of the new data. - data += overlap_len; - seq += overlap_len; - - if ( new_b == b ) - new_b = AddAndCheck(b, seq, upper, data); - else - (void) AddAndCheck(b, seq, upper, data); - } - - if ( new_b->prev == last_block ) - last_block = new_b; - - return new_b; - } - uint64_t Reassembler::MemoryAllocation(ReassemblerType rtype) { return Reassembler::sizes[rtype]; diff --git a/src/Reassem.h b/src/Reassem.h index 44f00b8806..e0c066149b 100644 --- a/src/Reassem.h +++ b/src/Reassem.h @@ -2,6 +2,8 @@ #pragma once +#include + #include "Obj.h" #include "IPAddr.h" @@ -19,30 +21,235 @@ enum ReassemblerType { class Reassembler; + +/** + * A block/segment of data for use in the reassembly process. + */ class DataBlock { public: - DataBlock(Reassembler* reass, const u_char* data, - uint64_t size, uint64_t seq, - DataBlock* prev, DataBlock* next, - ReassemblerType reassem_type = REASSEM_UNKNOWN); - ~DataBlock(); + /** + * Create a data block/segment with associated sequence numbering. + */ + DataBlock(const u_char* data, uint64_t size, uint64_t seq); - uint64_t Size() const { return upper - seq; } + DataBlock(const DataBlock& other) + { + seq = other.seq; + upper = other.upper; + auto size = other.Size(); + block = new u_char[size]; + memcpy(block, other.block, size); + } - DataBlock* next; // next block with higher seq # - DataBlock* prev; // previous block with lower seq # - uint64_t seq, upper; + DataBlock(DataBlock&& other) + { + seq = other.seq; + upper = other.upper; + block = other.block; + other.block = nullptr; + } + + DataBlock& operator=(const DataBlock& other) + { + if ( this == &other ) + return *this; + + seq = other.seq; + upper = other.upper; + auto size = other.Size(); + delete [] block; + block = new u_char[size]; + memcpy(block, other.block, size); + return *this; + } + + DataBlock& operator=(DataBlock&& other) + { + if ( this == &other ) + return *this; + + seq = other.seq; + upper = other.upper; + delete [] block; + block = other.block; + other.block = nullptr; + return *this; + } + + ~DataBlock() + { delete [] block; } + + /** + * @return length of the data block + */ + uint64_t Size() const + { return upper - seq; } + + uint64_t seq; + uint64_t upper; u_char* block; - ReassemblerType rtype; +}; - Reassembler* reassembler; // Non-owning pointer back to parent. +using DataBlockMap = std::map; + + +/** + * The data structure used for reassembling arbitrary sequences of data + * blocks/segments. It internally uses an ordered map (std::map). + */ +class DataBlockList { +public: + + DataBlockList() + { } + + DataBlockList(Reassembler* r) : reassembler(r) + { } + + ~DataBlockList() + { Clear(); } + + /** + * @return iterator to start of the block list. + */ + DataBlockMap::const_iterator Begin() const + { return block_map.begin(); } + + /** + * @return iterator to end of the block list (one past last element). + */ + DataBlockMap::const_iterator End() const + { return block_map.end(); } + + /** + * @return reference to the first data block in the list. + * Must not be called when the list is empty. + */ + const DataBlock& FirstBlock() const + { assert(block_map.size()); return block_map.begin()->second; } + + /** + * @return reference to the last data block in the list. + * Must not be called when the list is empty. + */ + const DataBlock& LastBlock() const + { assert(block_map.size()); return block_map.rbegin()->second; } + + /** + * @return whether the list is empty. + */ + bool Empty() const + { return block_map.empty(); }; + + /** + * @return the number of blocks in the list. + */ + size_t NumBlocks() const + { return block_map.size(); }; + + /** + * @return the total size, in bytes, of all blocks in the list. + */ + size_t DataSize() const + { return total_data_size; } + + /** + * Counts the total size of all list elements paritioned by some cuttof. + * WARNING: this is an O(n) operation and potentially very slow. + * @param seq_cutoff the sequence number used to partition + * element sizes returned via "below" and "above" parameters + * @param below the size in bytes of all elements below "seq_cutoff" + * @param above the size in bytes of all elements above "seq_cutoff" + */ + void DataSize(uint64_t seq_cutoff, uint64_t* below, uint64_t* above) const; + + /** + * Remove all elements from the list + */ + void Clear(); + + /** + * Insert a new data block into the list. + * @param seq lower sequence number of the data block + * @param upper highest sequence number of the data block + * @param data points to the data block contents + * @param hint a suggestion of the node from which to start searching + * for an insertion point or null to search from the beginning of the list + * @return an iterator to the element that was inserted + */ + DataBlockMap::const_iterator + Insert(uint64_t seq, uint64_t upper, const u_char* data, + DataBlockMap::const_iterator* hint = nullptr); + + /** + * Insert a new data block at the end of the list and remove blocks + * from the beginning of the list to keep the list size under a limit. + * @param block the block to append + * @param limit the max number of blocks allowed (list is pruned from + * starting from the beginning after the insertion takes place). + */ + void Append(DataBlock block, uint64_t limit); + + + /** + * Remove all elements below a given sequence number. + * @param seq blocks below this number are discarded (removed/deleted) + * @param max_old if non-zero instead of deleting the underlying block, + * move it to "old_list" + * @param old_list another list to move discarded blocks into + * @return the amount of data (in bytes) that was not part of any + * discarded block (the total size of all bypassed gaps). + */ + uint64_t Trim(uint64_t seq, uint64_t max_old, DataBlockList* old_list); + + /** + * @return an iterator pointing to the first element with a segment whose + * starting sequence number is less than or equal to "seq". If no such + * element exists, returns an iterator denoting one-past the end of the + * list. + */ + DataBlockMap::const_iterator FirstBlockAtOrBefore(uint64_t seq) const; + +private: + + /** + * Insert a new data block into the list. + * @param seq lower sequence number of the data block + * @param upper highest sequence number of the data block + * @param data points to the data block contents + * @param hint a suggestion of the node from which to start searching + * for an insertion point + * @return an iterator to the element that was inserted + */ + DataBlockMap::const_iterator + Insert(uint64_t seq, uint64_t upper, const u_char* data, + DataBlockMap::const_iterator hint); + + /** + * Removes a block from the list and updates other state which keeps + * track of total size of blocks. + * @param it the element to remove + */ + void Delete(DataBlockMap::const_iterator it); + + /** + * Removes a block from the list and returns it, assuming it will + * immediately be appended to another list. + * @param it the element to remove + * @return the removed block + */ + DataBlock Remove(DataBlockMap::const_iterator it); + + Reassembler* reassembler = nullptr; + size_t total_data_size = 0; + DataBlockMap block_map; }; class Reassembler : public BroObj { public: Reassembler(uint64_t init_seq, ReassemblerType reassem_type = REASSEM_UNKNOWN); - ~Reassembler() override; + ~Reassembler() override {} void NewBlock(double t, uint64_t seq, uint64_t len, const u_char* data); @@ -54,9 +261,17 @@ public: void ClearBlocks(); void ClearOldBlocks(); - int HasBlocks() const { return blocks != 0; } + int HasBlocks() const + { return ! block_list.Empty(); } + uint64_t LastReassemSeq() const { return last_reassem_seq; } + uint64_t TrimSeq() const + { return trim_seq; } + + void SetTrimSeq(uint64_t seq) + { if ( seq > trim_seq ) trim_seq = seq; } + uint64_t TotalSize() const; // number of bytes buffered up void Describe(ODesc* d) const override; @@ -72,30 +287,22 @@ public: protected: Reassembler() { } - friend class DataBlock; + friend class DataBlockList; virtual void Undelivered(uint64_t up_to_seq); - virtual void BlockInserted(DataBlock* b) = 0; + virtual void BlockInserted(DataBlockMap::const_iterator it) = 0; virtual void Overlap(const u_char* b1, const u_char* b2, uint64_t n) = 0; - DataBlock* AddAndCheck(DataBlock* b, uint64_t seq, - uint64_t upper, const u_char* data); - - void CheckOverlap(DataBlock *head, DataBlock *tail, + void CheckOverlap(const DataBlockList& list, uint64_t seq, uint64_t len, const u_char* data); - DataBlock* blocks; - DataBlock* last_block; - - DataBlock* old_blocks; - DataBlock* last_old_block; + DataBlockList block_list; + DataBlockList old_block_list; uint64_t last_reassem_seq; uint64_t trim_seq; // how far we've trimmed uint32_t max_old_blocks; - uint32_t total_old_blocks; - uint64_t size_of_all_blocks; ReassemblerType rtype; diff --git a/src/analyzer/protocol/tcp/TCP_Endpoint.h b/src/analyzer/protocol/tcp/TCP_Endpoint.h index 6cd2d17ea9..160ad335fa 100644 --- a/src/analyzer/protocol/tcp/TCP_Endpoint.h +++ b/src/analyzer/protocol/tcp/TCP_Endpoint.h @@ -161,6 +161,8 @@ public: // // If we're not processing contents, then naturally each of // these is empty. + // + // WARNING: this is an O(n) operation and potentially very slow. void SizeBufferedData(uint64_t& waiting_on_hole, uint64_t& waiting_on_ack); int ValidChecksum(const struct tcphdr* tp, int len) const; diff --git a/src/analyzer/protocol/tcp/TCP_Reassembler.cc b/src/analyzer/protocol/tcp/TCP_Reassembler.cc index 9e8e709c97..e5b1250a05 100644 --- a/src/analyzer/protocol/tcp/TCP_Reassembler.cc +++ b/src/analyzer/protocol/tcp/TCP_Reassembler.cc @@ -66,9 +66,14 @@ void TCP_Reassembler::Done() if ( record_contents_file ) { // Record any undelivered data. - if ( blocks && last_reassem_seq < last_block->upper ) - RecordToSeq(last_reassem_seq, last_block->upper, - record_contents_file); + if ( ! block_list.Empty() ) + { + auto last_block = std::prev(block_list.End())->second; + + if ( last_reassem_seq < last_block.upper ) + RecordToSeq(last_reassem_seq, last_block.upper, + record_contents_file); + } record_contents_file->Close(); } @@ -78,15 +83,16 @@ void TCP_Reassembler::SizeBufferedData(uint64_t& waiting_on_hole, uint64_t& waiting_on_ack) const { waiting_on_hole = waiting_on_ack = 0; - for ( DataBlock* b = blocks; b; b = b->next ) - { - if ( b->seq <= last_reassem_seq ) - // We must have delivered this block, but - // haven't yet trimmed it. - waiting_on_ack += b->Size(); - else - waiting_on_hole += b->Size(); - } + block_list.DataSize(last_reassem_seq, &waiting_on_ack, &waiting_on_hole); + } + +uint64_t TCP_Reassembler::NumUndeliveredBytes() const + { + if ( block_list.Empty() ) + return 0; + + auto last_block = std::prev(block_list.End())->second; + return last_block.upper - last_reassem_seq; } void TCP_Reassembler::SetContentsFile(BroFile* f) @@ -102,8 +108,8 @@ void TCP_Reassembler::SetContentsFile(BroFile* f) Unref(record_contents_file); else { - if ( blocks ) - RecordToSeq(blocks->seq, last_reassem_seq, f); + if ( ! block_list.Empty() ) + RecordToSeq(block_list.Begin()->second.seq, last_reassem_seq, f); } Ref(f); @@ -231,29 +237,32 @@ void TCP_Reassembler::Undelivered(uint64_t up_to_seq) if ( ! skip_deliveries ) { // If we have blocks that begin below up_to_seq, deliver them. - DataBlock* b = blocks; - while ( b ) + auto it = block_list.Begin(); + + while ( it != block_list.End() ) { - if ( b->seq < last_reassem_seq ) + const auto& b = it->second; + + if ( b.seq < last_reassem_seq ) { // Already delivered this block. - b = b->next; + ++it; continue; } - if ( b->seq >= up_to_seq ) + if ( b.seq >= up_to_seq ) // Block is beyond what we need to process at this point. break; uint64_t gap_at_seq = last_reassem_seq; - uint64_t gap_len = b->seq - last_reassem_seq; + uint64_t gap_len = b.seq - last_reassem_seq; Gap(gap_at_seq, gap_len); last_reassem_seq += gap_len; - BlockInserted(b); + BlockInserted(it); // Inserting a block may cause trimming of what's buffered, // so have to assume 'b' is invalid, hence re-assign to start. - b = blocks; + it = block_list.Begin(); } if ( up_to_seq > last_reassem_seq ) @@ -277,12 +286,13 @@ void TCP_Reassembler::Undelivered(uint64_t up_to_seq) void TCP_Reassembler::MatchUndelivered(uint64_t up_to_seq, bool use_last_upper) { - if ( ! blocks || ! rule_matcher ) + if ( block_list.Empty() || ! rule_matcher ) return; - ASSERT(last_block); + const auto& last_block = std::prev(block_list.End())->second; + if ( use_last_upper ) - up_to_seq = last_block->upper; + up_to_seq = last_block.upper; // ### Note: the original code did not check whether blocks have // already been delivered, but not ACK'ed, and therefore still @@ -292,50 +302,59 @@ void TCP_Reassembler::MatchUndelivered(uint64_t up_to_seq, bool use_last_upper) // min(last_block->upper, up_to_seq). // Is there such data? if ( up_to_seq <= last_reassem_seq || - last_block->upper <= last_reassem_seq ) + last_block.upper <= last_reassem_seq ) return; // Skip blocks that are already delivered (but not ACK'ed). // Question: shall we instead keep a pointer to the first undelivered // block? - DataBlock* b; - for ( b = blocks; b && b->upper <= last_reassem_seq; b = b->next ) - tcp_analyzer->Conn()->Match(Rule::PAYLOAD, b->block, b->Size(), - false, false, IsOrig(), false); - ASSERT(b); + for ( auto it = block_list.Begin(); it != block_list.End(); ++it ) + { + const auto& b = it->second; + + if ( b.upper > last_reassem_seq ) + break; + + tcp_analyzer->Conn()->Match(Rule::PAYLOAD, b.block, b.Size(), + false, false, IsOrig(), false); + } } void TCP_Reassembler::RecordToSeq(uint64_t start_seq, uint64_t stop_seq, BroFile* f) { - DataBlock* b = blocks; - // Skip over blocks up to the start seq. - while ( b && b->upper <= start_seq ) - b = b->next; + auto it = block_list.Begin(); - if ( ! b ) + // Skip over blocks up to the start seq. + while ( it != block_list.End() && it->second.upper <= start_seq ) + ++it; + + if ( it == block_list.End() ) return; uint64_t last_seq = start_seq; - while ( b && b->upper <= stop_seq ) + + while ( it != block_list.End() && it->second.upper <= stop_seq ) { - if ( b->seq > last_seq ) - RecordGap(last_seq, b->seq, f); + const auto& b = it->second; + + if ( b.seq > last_seq ) + RecordGap(last_seq, b.seq, f); RecordBlock(b, f); - last_seq = b->upper; - b = b->next; + last_seq = b.upper; + ++it; } - if ( b ) + if ( it != block_list.End() ) // Check for final gap. if ( last_seq < stop_seq ) RecordGap(last_seq, stop_seq, f); } -void TCP_Reassembler::RecordBlock(DataBlock* b, BroFile* f) +void TCP_Reassembler::RecordBlock(const DataBlock& b, BroFile* f) { - if ( f->Write((const char*) b->block, b->Size()) ) + if ( f->Write((const char*) b.block, b.Size()) ) return; reporter->Error("TCP_Reassembler contents write failed"); @@ -367,10 +386,12 @@ void TCP_Reassembler::RecordGap(uint64_t start_seq, uint64_t upper_seq, BroFile* } } -void TCP_Reassembler::BlockInserted(DataBlock* start_block) +void TCP_Reassembler::BlockInserted(DataBlockMap::const_iterator it) { - if ( start_block->seq > last_reassem_seq || - start_block->upper <= last_reassem_seq ) + const auto& start_block = it->second; + + if ( start_block.seq > last_reassem_seq || + start_block.upper <= last_reassem_seq ) return; // We've filled a leading hole. Deliver as much as possible. @@ -379,20 +400,26 @@ void TCP_Reassembler::BlockInserted(DataBlock* start_block) // new stuff off into its own block(s), but in the following // loop we have to take care not to deliver already-delivered // data. - for ( DataBlock* b = start_block; - b && b->seq <= last_reassem_seq; b = b->next ) + while ( it != block_list.End() ) { - if ( b->seq == last_reassem_seq ) + const auto& b = it->second; + + if ( b.seq > last_reassem_seq ) + break; + + if ( b.seq == last_reassem_seq ) { // New stuff. - uint64_t len = b->Size(); + uint64_t len = b.Size(); uint64_t seq = last_reassem_seq; last_reassem_seq += len; if ( record_contents_file ) RecordBlock(b, record_contents_file); - DeliverBlock(seq, len, b->block); + DeliverBlock(seq, len, b.block); } + + ++it; } TCP_Endpoint* e = endp; @@ -494,7 +521,7 @@ int TCP_Reassembler::DataSent(double t, uint64_t seq, int len, } if ( tcp_excessive_data_without_further_acks && - size_of_all_blocks > static_cast(tcp_excessive_data_without_further_acks) ) + block_list.DataSize() > static_cast(tcp_excessive_data_without_further_acks) ) { tcp_analyzer->Weird("excessive_data_without_further_acks"); ClearBlocks(); diff --git a/src/analyzer/protocol/tcp/TCP_Reassembler.h b/src/analyzer/protocol/tcp/TCP_Reassembler.h index 5fc7a7fc8e..37c04c9e37 100644 --- a/src/analyzer/protocol/tcp/TCP_Reassembler.h +++ b/src/analyzer/protocol/tcp/TCP_Reassembler.h @@ -38,18 +38,14 @@ public: // // If we're not processing contents, then naturally each of // these is empty. + // + // WARNING: this is an O(n) operation and potentially very slow. void SizeBufferedData(uint64_t& waiting_on_hole, uint64_t& waiting_on_ack) const; // How much data is pending delivery since it's not yet reassembled. // Includes the data due to holes (so this value is a bit different // from waiting_on_hole above; and is computed in a different fashion). - uint64_t NumUndeliveredBytes() const - { - if ( last_block ) - return last_block->upper - last_reassem_seq; - else - return 0; - } + uint64_t NumUndeliveredBytes() const; void SetContentsFile(BroFile* f); BroFile* GetContentsFile() const { return record_contents_file; } @@ -92,10 +88,10 @@ private: void Gap(uint64_t seq, uint64_t len); void RecordToSeq(uint64_t start_seq, uint64_t stop_seq, BroFile* f); - void RecordBlock(DataBlock* b, BroFile* f); + void RecordBlock(const DataBlock& b, BroFile* f); void RecordGap(uint64_t start_seq, uint64_t upper_seq, BroFile* f); - void BlockInserted(DataBlock* b) override; + void BlockInserted(DataBlockMap::const_iterator it) override; void Overlap(const u_char* b1, const u_char* b2, uint64_t n) override; TCP_Endpoint* endp; diff --git a/src/file_analysis/FileReassembler.cc b/src/file_analysis/FileReassembler.cc index e0009de3fe..6eb5453beb 100644 --- a/src/file_analysis/FileReassembler.cc +++ b/src/file_analysis/FileReassembler.cc @@ -26,16 +26,16 @@ uint64_t FileReassembler::Flush() if ( flushing ) return 0; - if ( last_block ) - { - // This is expected to call back into FileReassembler::Undelivered(). - flushing = true; - uint64_t rval = TrimToSeq(last_block->upper); - flushing = false; - return rval; - } + if ( block_list.Empty() ) + return 0; - return 0; + const auto& last_block = std::prev(block_list.End())->second; + + // This is expected to call back into FileReassembler::Undelivered(). + flushing = true; + uint64_t rval = TrimToSeq(last_block.upper); + flushing = false; + return rval; } uint64_t FileReassembler::FlushTo(uint64_t sequence) @@ -50,21 +50,29 @@ uint64_t FileReassembler::FlushTo(uint64_t sequence) return rval; } -void FileReassembler::BlockInserted(DataBlock* start_block) +void FileReassembler::BlockInserted(DataBlockMap::const_iterator it) { - if ( start_block->seq > last_reassem_seq || - start_block->upper <= last_reassem_seq ) + const auto& start_block = it->second; + + if ( start_block.seq > last_reassem_seq || + start_block.upper <= last_reassem_seq ) return; - for ( DataBlock* b = start_block; - b && b->seq <= last_reassem_seq; b = b->next ) + while ( it != block_list.End() ) { - if ( b->seq == last_reassem_seq ) + const auto& b = it->second; + + if ( b.seq > last_reassem_seq ) + break; + + if ( b.seq == last_reassem_seq ) { // New stuff. - uint64_t len = b->Size(); + uint64_t len = b.Size(); last_reassem_seq += len; - the_file->DeliverStream(b->block, len); + the_file->DeliverStream(b.block, len); } + + ++it; } // Throw out forwarded data @@ -74,29 +82,31 @@ void FileReassembler::BlockInserted(DataBlock* start_block) void FileReassembler::Undelivered(uint64_t up_to_seq) { // If we have blocks that begin below up_to_seq, deliver them. - DataBlock* b = blocks; + auto it = block_list.Begin(); - while ( b ) + while ( it != block_list.End() ) { - if ( b->seq < last_reassem_seq ) + const auto& b = it->second; + + if ( b.seq < last_reassem_seq ) { // Already delivered this block. - b = b->next; + ++it; continue; } - if ( b->seq >= up_to_seq ) + if ( b.seq >= up_to_seq ) // Block is beyond what we need to process at this point. break; uint64_t gap_at_seq = last_reassem_seq; - uint64_t gap_len = b->seq - last_reassem_seq; + uint64_t gap_len = b.seq - last_reassem_seq; the_file->Gap(gap_at_seq, gap_len); last_reassem_seq += gap_len; - BlockInserted(b); + BlockInserted(it); // Inserting a block may cause trimming of what's buffered, // so have to assume 'b' is invalid, hence re-assign to start. - b = blocks; + it = block_list.Begin(); } if ( up_to_seq > last_reassem_seq ) diff --git a/src/file_analysis/FileReassembler.h b/src/file_analysis/FileReassembler.h index 9788948cb0..287ebd8d22 100644 --- a/src/file_analysis/FileReassembler.h +++ b/src/file_analysis/FileReassembler.h @@ -50,7 +50,7 @@ protected: FileReassembler(); void Undelivered(uint64_t up_to_seq) override; - void BlockInserted(DataBlock* b) override; + void BlockInserted(DataBlockMap::const_iterator it) override; void Overlap(const u_char* b1, const u_char* b2, uint64_t n) override; File* the_file;