Merge remote-tracking branch 'origin/topic/jsiwek/reassembly-improvements-map'

* origin/topic/jsiwek/reassembly-improvements-map:
  Rename a reassembly DataBlockList function
  Add comments to reassembly classes
  Use DataBlock value instead of pointer in reassembly map
  Remove linked list from reassembly data structures
  Use an std::map for reassembly DataBlock searches
  Refactor Reassembler/DataBlock bookkeeping
  Reorganize reassembly data structures
  Remove a superfluous reassembler DataBlock member
This commit is contained in:
Robin Sommer 2019-09-24 09:03:56 +00:00
commit c23764483d
11 changed files with 675 additions and 399 deletions

View file

@ -1,4 +1,10 @@
3.1.0-dev.140 | 2019-09-24 09:03:56 +0000
* Reorganize reassembly data structures. This replaces the previous
linked list-based implementation with std::map to avoid O(n) worst
case performance. (Jon Siwek, Corelight)
3.1.0-dev.131 | 2019-09-23 13:07:09 -0700 3.1.0-dev.131 | 2019-09-23 13:07:09 -0700
* Add --build-dir as alias for --builddir (Dominik Charousset, Corelight) * Add --build-dir as alias for --builddir (Dominik Charousset, Corelight)

View file

@ -1 +1 @@
3.1.0-dev.131 3.1.0-dev.140

View file

@ -185,22 +185,32 @@ void FragReassembler::Overlap(const u_char* b1, const u_char* b2, uint64_t n)
Weird("fragment_overlap"); Weird("fragment_overlap");
} }
void FragReassembler::BlockInserted(DataBlock* /* start_block */) void FragReassembler::BlockInserted(DataBlockMap::const_iterator /* it */)
{ {
if ( blocks->seq > 0 || ! frag_size ) auto it = block_list.Begin();
if ( it->second.seq > 0 || ! frag_size )
// For sure don't have it all yet. // For sure don't have it all yet.
return; return;
auto next = std::next(it);
// We might have it all - look for contiguous all the way. // We might have it all - look for contiguous all the way.
DataBlock* b; while ( next != block_list.End() )
for ( b = blocks; b->next; b = b->next ) {
if ( b->upper != b->next->seq ) if ( it->second.upper != next->second.seq )
break; break;
if ( b->next ) ++it;
++next;
}
const auto& last = std::prev(block_list.End())->second;
if ( next != block_list.End() )
{ {
// We have a hole. // We have a hole.
if ( b->upper >= frag_size ) if ( it->second.upper >= frag_size )
{ {
// We're stuck. The point where we stopped is // We're stuck. The point where we stopped is
// contiguous up through the expected end of // contiguous up through the expected end of
@ -213,19 +223,19 @@ void FragReassembler::BlockInserted(DataBlock* /* start_block */)
// We decide to analyze the contiguous portion now. // We decide to analyze the contiguous portion now.
// Extend the fragment up through the end of what // Extend the fragment up through the end of what
// we have. // we have.
frag_size = b->upper; frag_size = it->second.upper;
} }
else else
return; return;
} }
else if ( last_block->upper > frag_size ) else if ( last.upper > frag_size )
{ {
Weird("fragment_size_inconsistency"); Weird("fragment_size_inconsistency");
frag_size = last_block->upper; frag_size = last.upper;
} }
else if ( last_block->upper < frag_size ) else if ( last.upper < frag_size )
// Missing the tail. // Missing the tail.
return; return;
@ -246,15 +256,22 @@ void FragReassembler::BlockInserted(DataBlock* /* start_block */)
pkt += proto_hdr_len; pkt += proto_hdr_len;
for ( b = blocks; b; b = b->next ) for ( it = block_list.Begin(); it != block_list.End(); ++it )
{ {
// If we're above a hole, stop. This can happen because const auto& b = it->second;
// the logic above regarding a hole that's above the
// expected fragment size.
if ( b->prev && b->prev->upper < b->seq )
break;
if ( b->upper > n ) if ( it != block_list.Begin() )
{
const auto& prev = std::prev(it)->second;
// If we're above a hole, stop. This can happen because
// the logic above regarding a hole that's above the
// expected fragment size.
if ( prev.upper < b.seq )
break;
}
if ( b.upper > n )
{ {
reporter->InternalWarning("bad fragment reassembly"); reporter->InternalWarning("bad fragment reassembly");
DeleteTimer(); DeleteTimer();
@ -263,8 +280,7 @@ void FragReassembler::BlockInserted(DataBlock* /* start_block */)
return; return;
} }
memcpy((void*) &pkt[b->seq], (const void*) b->block, memcpy(&pkt[b.seq], b.block, b.upper - b.seq);
b->upper - b->seq);
} }
delete reassembled_pkt; delete reassembled_pkt;
@ -299,13 +315,7 @@ void FragReassembler::BlockInserted(DataBlock* /* start_block */)
void FragReassembler::Expire(double t) void FragReassembler::Expire(double t)
{ {
while ( blocks ) block_list.Clear();
{
DataBlock* b = blocks->next;
delete blocks;
blocks = b;
}
expire_timer->ClearReassembler(); expire_timer->ClearReassembler();
expire_timer = 0; // timer manager will delete it expire_timer = 0; // timer manager will delete it

View file

@ -36,7 +36,7 @@ public:
const FragReassemblerKey& Key() const { return key; } const FragReassemblerKey& Key() const { return key; }
protected: protected:
void BlockInserted(DataBlock* start_block) override; void BlockInserted(DataBlockMap::const_iterator it) override;
void Overlap(const u_char* b1, const u_char* b2, uint64_t n) override; void Overlap(const u_char* b1, const u_char* b2, uint64_t n) override;
void Weird(const char* name) const; void Weird(const char* name) const;

View file

@ -1,95 +1,312 @@
// See the file "COPYING" in the main distribution directory for copyright. // See the file "COPYING" in the main distribution directory for copyright.
#include <algorithm> #include <algorithm>
#include <vector>
#include "zeek-config.h" #include "zeek-config.h"
#include "Reassem.h" #include "Reassem.h"
static const bool DEBUG_reassem = false; uint64_t Reassembler::total_size = 0;
uint64_t Reassembler::sizes[REASSEM_NUM];
DataBlock::DataBlock(Reassembler* reass, const u_char* data, DataBlock::DataBlock(const u_char* data, uint64_t size, uint64_t arg_seq)
uint64_t size, uint64_t arg_seq, DataBlock* arg_prev,
DataBlock* arg_next, ReassemblerType reassem_type)
{ {
seq = arg_seq; seq = arg_seq;
upper = seq + size; upper = seq + size;
block = new u_char[size]; block = new u_char[size];
memcpy(block, data, size);
memcpy((void*) block, (const void*) data, size);
prev = arg_prev;
next = arg_next;
if ( prev )
prev->next = this;
if ( next )
next->prev = this;
reassembler = reass;
reassembler->size_of_all_blocks += size;
rtype = reassem_type;
Reassembler::sizes[rtype] += pad_size(size) + padded_sizeof(DataBlock);
Reassembler::total_size += pad_size(size) + padded_sizeof(DataBlock);
} }
uint64_t Reassembler::total_size = 0; void DataBlockList::DataSize(uint64_t seq_cutoff, uint64_t* below, uint64_t* above) const
uint64_t Reassembler::sizes[REASSEM_NUM]; {
for ( const auto& e : block_map )
{
const auto& b = e.second;
if ( b.seq <= seq_cutoff )
*above += b.Size();
else
*below += b.Size();
}
}
void DataBlockList::Delete(DataBlockMap::const_iterator it)
{
const auto& b = it->second;
auto size = b.Size();
block_map.erase(it);
total_data_size -= size;
Reassembler::total_size -= size + sizeof(DataBlock);
Reassembler::sizes[reassembler->rtype] -= size + sizeof(DataBlock);
}
DataBlock DataBlockList::Remove(DataBlockMap::const_iterator it)
{
auto b = std::move(it->second);
auto size = b.Size();
block_map.erase(it);
total_data_size -= size;
return b;
}
void DataBlockList::Clear()
{
auto total_db_size = sizeof(DataBlock) * block_map.size();
auto total = total_data_size + total_db_size;
Reassembler::total_size -= total;
Reassembler::sizes[reassembler->rtype] -= total;
total_data_size = 0;
block_map.clear();
}
void DataBlockList::Append(DataBlock block, uint64_t limit)
{
total_data_size += block.Size();
block_map.emplace_hint(block_map.end(), block.seq, std::move(block));
while ( block_map.size() > limit )
Delete(block_map.begin());
}
DataBlockMap::const_iterator DataBlockList::FirstBlockAtOrBefore(uint64_t seq) const
{
// Upper sequence number doesn't matter for the search
auto it = block_map.upper_bound(seq);
if ( it == block_map.end() )
return block_map.empty() ? it : std::prev(it);
if ( it == block_map.begin() )
return block_map.end();
return std::prev(it);
}
DataBlockMap::const_iterator
DataBlockList::Insert(uint64_t seq, uint64_t upper, const u_char* data,
DataBlockMap::const_iterator hint)
{
auto size = upper - seq;
auto rval = block_map.emplace_hint(hint, seq, DataBlock(data, size, seq));
total_data_size += size;
Reassembler::sizes[reassembler->rtype] += size + sizeof(DataBlock);
Reassembler::total_size += size + sizeof(DataBlock);
return rval;
}
DataBlockMap::const_iterator
DataBlockList::Insert(uint64_t seq, uint64_t upper, const u_char* data,
DataBlockMap::const_iterator* hint)
{
// Empty list.
if ( block_map.empty() )
return Insert(seq, upper, data, block_map.end());
const auto& last = block_map.rbegin()->second;
// Special check for the common case of appending to the end.
if ( seq == last.upper )
return Insert(seq, upper, data, block_map.end());
// Find the first block that doesn't come completely before the new data.
DataBlockMap::const_iterator it;
if ( hint )
it = *hint;
else
{
it = FirstBlockAtOrBefore(seq);
if ( it == block_map.end() )
it = block_map.begin();
}
while ( std::next(it) != block_map.end() && it->second.upper <= seq )
++it;
const auto& b = it->second;
if ( b.upper <= seq )
// b is the last block, and it comes completely before the new block.
return Insert(seq, upper, data, block_map.end());
if ( upper <= b.seq )
// The new block comes completely before b.
return Insert(seq, upper, data, it);
DataBlockMap::const_iterator rval;
// The blocks overlap.
if ( seq < b.seq )
{
// The new block has a prefix that comes before b.
uint64_t prefix_len = b.seq - seq;
rval = Insert(seq, seq + prefix_len, data, it);
data += prefix_len;
seq += prefix_len;
}
else
rval = it;
uint64_t overlap_start = seq;
uint64_t overlap_offset = overlap_start - b.seq;
uint64_t new_b_len = upper - seq;
uint64_t b_len = b.upper - overlap_start;
uint64_t overlap_len = min(new_b_len, b_len);
if ( overlap_len < new_b_len )
{
// Recurse to resolve remainder of the new data.
data += overlap_len;
seq += overlap_len;
auto r = Insert(seq, upper, data, &it);
if ( rval == it )
rval = r;
}
return rval;
}
uint64_t DataBlockList::Trim(uint64_t seq, uint64_t max_old,
DataBlockList* old_list)
{
uint64_t num_missing = 0;
// Do this accounting before looking for Undelivered data,
// since that will alter last_reassem_seq.
if ( ! block_map.empty() )
{
const auto& first = block_map.begin()->second;
if ( first.seq > reassembler->LastReassemSeq() )
// An initial hole.
num_missing += first.seq - reassembler->LastReassemSeq();
}
else if ( seq > reassembler->LastReassemSeq() )
{
// Trimming data we never delivered.
// We won't have any accounting based on blocks for this hole.
num_missing += seq - reassembler->LastReassemSeq();
}
if ( seq > reassembler->LastReassemSeq() )
{
// We're trimming data we never delivered.
reassembler->Undelivered(seq);
}
while ( ! block_map.empty() )
{
auto first_it = block_map.begin();
const auto& first = first_it->second;
if ( first.upper > seq )
break;
auto next = std::next(first_it);
if ( next != block_map.end() && next->second.seq <= seq )
{
if ( first.upper != next->second.seq )
num_missing += next->second.seq - first.upper;
}
else
{
// No more blocks - did this one make it to seq?
// Second half of test is for acks of FINs, which
// don't get entered into the sequence space.
if ( first.upper != seq && first.upper != seq - 1 )
num_missing += seq - first.upper;
}
if ( max_old )
old_list->Append(Remove(first_it), max_old);
else
Delete(first_it);
}
if ( ! block_map.empty() )
{
auto first_it = block_map.begin();
const auto& first = first_it->second;
// If we skipped over some undeliverable data, then
// it's possible that this block is now deliverable.
// Give it a try.
if ( first.seq == reassembler->LastReassemSeq() )
reassembler->BlockInserted(first_it);
}
reassembler->SetTrimSeq(seq);
return num_missing;
}
Reassembler::Reassembler(uint64_t init_seq, ReassemblerType reassem_type) Reassembler::Reassembler(uint64_t init_seq, ReassemblerType reassem_type)
: blocks(), last_block(), old_blocks(), last_old_block(), : block_list(this), old_block_list(this),
last_reassem_seq(init_seq), trim_seq(init_seq), last_reassem_seq(init_seq), trim_seq(init_seq),
max_old_blocks(0), total_old_blocks(0), size_of_all_blocks(0), max_old_blocks(0), rtype(reassem_type)
rtype(reassem_type)
{ {
} }
Reassembler::~Reassembler() void Reassembler::CheckOverlap(const DataBlockList& list,
uint64_t seq, uint64_t len,
const u_char* data)
{ {
ClearBlocks(); if ( list.Empty() )
ClearOldBlocks();
}
void Reassembler::CheckOverlap(DataBlock *head, DataBlock *tail,
uint64_t seq, uint64_t len, const u_char* data)
{
if ( ! head || ! tail )
return; return;
if ( seq == tail->upper ) const auto& last = list.LastBlock();
if ( seq == last.upper )
// Special case check for common case of appending to the end. // Special case check for common case of appending to the end.
return; return;
uint64_t upper = (seq + len); uint64_t upper = (seq + len);
for ( DataBlock* b = head; b; b = b->next ) auto it = list.FirstBlockAtOrBefore(seq);
if ( it == list.End() )
it = list.Begin();
for ( ; it != list.End(); ++it )
{ {
const auto& b = it->second;
uint64_t nseq = seq; uint64_t nseq = seq;
uint64_t nupper = upper; uint64_t nupper = upper;
const u_char* ndata = data; const u_char* ndata = data;
if ( nupper <= b->seq ) if ( nupper <= b.seq )
break;
if ( nseq >= b.upper )
continue; continue;
if ( nseq >= b->upper ) if ( nseq < b.seq )
continue;
if ( nseq < b->seq )
{ {
ndata += (b->seq - seq); ndata += (b.seq - seq);
nseq = b->seq; nseq = b.seq;
} }
if ( nupper > b->upper ) if ( nupper > b.upper )
nupper = b->upper; nupper = b.upper;
uint64_t overlap_offset = (nseq - b->seq); uint64_t overlap_offset = (nseq - b.seq);
uint64_t overlap_len = (nupper - nseq); uint64_t overlap_len = (nupper - nseq);
if ( overlap_len ) if ( overlap_len )
Overlap(&b->block[overlap_offset], ndata, overlap_len); Overlap(&b.block[overlap_offset], ndata, overlap_len);
} }
} }
@ -100,13 +317,13 @@ void Reassembler::NewBlock(double t, uint64_t seq, uint64_t len, const u_char* d
uint64_t upper_seq = seq + len; uint64_t upper_seq = seq + len;
CheckOverlap(old_blocks, last_old_block, seq, len, data); CheckOverlap(old_block_list, seq, len, data);
if ( upper_seq <= trim_seq ) if ( upper_seq <= trim_seq )
// Old data, don't do any work for it. // Old data, don't do any work for it.
return; return;
CheckOverlap(blocks, last_block, seq, len, data); CheckOverlap(block_list, seq, len, data);
if ( seq < trim_seq ) if ( seq < trim_seq )
{ // Partially old data, just keep the good stuff. { // Partially old data, just keep the good stuff.
@ -117,144 +334,28 @@ void Reassembler::NewBlock(double t, uint64_t seq, uint64_t len, const u_char* d
len -= amount_old; len -= amount_old;
} }
DataBlock* start_block; auto it = block_list.Insert(seq, upper_seq, data);;
BlockInserted(it);
if ( ! blocks )
blocks = last_block = start_block =
new DataBlock(this, data, len, seq, 0, 0, rtype);
else
start_block = AddAndCheck(blocks, seq, upper_seq, data);
BlockInserted(start_block);
} }
uint64_t Reassembler::TrimToSeq(uint64_t seq) uint64_t Reassembler::TrimToSeq(uint64_t seq)
{ {
uint64_t num_missing = 0; return block_list.Trim(seq, max_old_blocks, &old_block_list);
// Do this accounting before looking for Undelivered data,
// since that will alter last_reassem_seq.
if ( blocks )
{
if ( blocks->seq > last_reassem_seq )
// An initial hole.
num_missing += blocks->seq - last_reassem_seq;
}
else if ( seq > last_reassem_seq )
{ // Trimming data we never delivered.
if ( ! blocks )
// We won't have any accounting based on blocks
// for this hole.
num_missing += seq - last_reassem_seq;
}
if ( seq > last_reassem_seq )
{
// We're trimming data we never delivered.
Undelivered(seq);
}
while ( blocks && blocks->upper <= seq )
{
DataBlock* b = blocks->next;
if ( b && b->seq <= seq )
{
if ( blocks->upper != b->seq )
num_missing += b->seq - blocks->upper;
}
else
{
// No more blocks - did this one make it to seq?
// Second half of test is for acks of FINs, which
// don't get entered into the sequence space.
if ( blocks->upper != seq && blocks->upper != seq - 1 )
num_missing += seq - blocks->upper;
}
if ( max_old_blocks )
{
// Move block over to old_blocks queue.
blocks->next = 0;
if ( last_old_block )
{
blocks->prev = last_old_block;
last_old_block->next = blocks;
}
else
{
blocks->prev = 0;
old_blocks = blocks;
}
last_old_block = blocks;
total_old_blocks++;
while ( old_blocks && total_old_blocks > max_old_blocks )
{
DataBlock* next = old_blocks->next;
delete old_blocks;
old_blocks = next;
total_old_blocks--;
}
}
else
delete blocks;
blocks = b;
}
if ( blocks )
{
blocks->prev = 0;
// If we skipped over some undeliverable data, then
// it's possible that this block is now deliverable.
// Give it a try.
if ( blocks->seq == last_reassem_seq )
BlockInserted(blocks);
}
else
last_block = 0;
if ( seq > trim_seq )
// seq is further ahead in the sequence space.
trim_seq = seq;
return num_missing;
} }
void Reassembler::ClearBlocks() void Reassembler::ClearBlocks()
{ {
while ( blocks ) block_list.Clear();
{
DataBlock* b = blocks->next;
delete blocks;
blocks = b;
}
last_block = 0;
} }
void Reassembler::ClearOldBlocks() void Reassembler::ClearOldBlocks()
{ {
while ( old_blocks ) old_block_list.Clear();
{
DataBlock* b = old_blocks->next;
delete old_blocks;
old_blocks = b;
}
last_old_block = 0;
} }
uint64_t Reassembler::TotalSize() const uint64_t Reassembler::TotalSize() const
{ {
return size_of_all_blocks; return block_list.DataSize() + old_block_list.DataSize();
} }
void Reassembler::Describe(ODesc* d) const void Reassembler::Describe(ODesc* d) const
@ -268,89 +369,6 @@ void Reassembler::Undelivered(uint64_t up_to_seq)
last_reassem_seq = up_to_seq; last_reassem_seq = up_to_seq;
} }
DataBlock* Reassembler::AddAndCheck(DataBlock* b, uint64_t seq, uint64_t upper,
const u_char* data)
{
if ( DEBUG_reassem )
{
DEBUG_MSG("%.6f Reassembler::AddAndCheck seq=%" PRIu64", upper=%" PRIu64"\n",
network_time, seq, upper);
}
// Special check for the common case of appending to the end.
if ( last_block && seq == last_block->upper )
{
last_block = new DataBlock(this, data, upper - seq,
seq, last_block, 0, rtype);
return last_block;
}
// Find the first block that doesn't come completely before the
// new data.
while ( b->next && b->upper <= seq )
b = b->next;
if ( b->upper <= seq )
{
// b is the last block, and it comes completely before
// the new block.
last_block = new DataBlock(this, data, upper - seq,
seq, b, 0, rtype);
return last_block;
}
DataBlock* new_b = 0;
if ( upper <= b->seq )
{
// The new block comes completely before b.
new_b = new DataBlock(this, data, upper - seq, seq,
b->prev, b, rtype);
if ( b == blocks )
blocks = new_b;
return new_b;
}
// The blocks overlap.
if ( seq < b->seq )
{
// The new block has a prefix that comes before b.
uint64_t prefix_len = b->seq - seq;
new_b = new DataBlock(this, data, prefix_len, seq,
b->prev, b, rtype);
if ( b == blocks )
blocks = new_b;
data += prefix_len;
seq += prefix_len;
}
else
new_b = b;
uint64_t overlap_start = seq;
uint64_t overlap_offset = overlap_start - b->seq;
uint64_t new_b_len = upper - seq;
uint64_t b_len = b->upper - overlap_start;
uint64_t overlap_len = min(new_b_len, b_len);
if ( overlap_len < new_b_len )
{
// Recurse to resolve remainder of the new data.
data += overlap_len;
seq += overlap_len;
if ( new_b == b )
new_b = AddAndCheck(b, seq, upper, data);
else
(void) AddAndCheck(b, seq, upper, data);
}
if ( new_b->prev == last_block )
last_block = new_b;
return new_b;
}
uint64_t Reassembler::MemoryAllocation(ReassemblerType rtype) uint64_t Reassembler::MemoryAllocation(ReassemblerType rtype)
{ {
return Reassembler::sizes[rtype]; return Reassembler::sizes[rtype];

View file

@ -2,6 +2,8 @@
#pragma once #pragma once
#include <map>
#include "Obj.h" #include "Obj.h"
#include "IPAddr.h" #include "IPAddr.h"
@ -19,30 +21,235 @@ enum ReassemblerType {
class Reassembler; class Reassembler;
/**
* A block/segment of data for use in the reassembly process.
*/
class DataBlock { class DataBlock {
public: public:
DataBlock(Reassembler* reass, const u_char* data,
uint64_t size, uint64_t seq,
DataBlock* prev, DataBlock* next,
ReassemblerType reassem_type = REASSEM_UNKNOWN);
~DataBlock(); /**
* Create a data block/segment with associated sequence numbering.
*/
DataBlock(const u_char* data, uint64_t size, uint64_t seq);
uint64_t Size() const { return upper - seq; } DataBlock(const DataBlock& other)
{
seq = other.seq;
upper = other.upper;
auto size = other.Size();
block = new u_char[size];
memcpy(block, other.block, size);
}
DataBlock* next; // next block with higher seq # DataBlock(DataBlock&& other)
DataBlock* prev; // previous block with lower seq # {
uint64_t seq, upper; seq = other.seq;
upper = other.upper;
block = other.block;
other.block = nullptr;
}
DataBlock& operator=(const DataBlock& other)
{
if ( this == &other )
return *this;
seq = other.seq;
upper = other.upper;
auto size = other.Size();
delete [] block;
block = new u_char[size];
memcpy(block, other.block, size);
return *this;
}
DataBlock& operator=(DataBlock&& other)
{
if ( this == &other )
return *this;
seq = other.seq;
upper = other.upper;
delete [] block;
block = other.block;
other.block = nullptr;
return *this;
}
~DataBlock()
{ delete [] block; }
/**
* @return length of the data block
*/
uint64_t Size() const
{ return upper - seq; }
uint64_t seq;
uint64_t upper;
u_char* block; u_char* block;
ReassemblerType rtype; };
Reassembler* reassembler; // Non-owning pointer back to parent. using DataBlockMap = std::map<uint64_t, DataBlock>;
/**
* The data structure used for reassembling arbitrary sequences of data
* blocks/segments. It internally uses an ordered map (std::map).
*/
class DataBlockList {
public:
DataBlockList()
{ }
DataBlockList(Reassembler* r) : reassembler(r)
{ }
~DataBlockList()
{ Clear(); }
/**
* @return iterator to start of the block list.
*/
DataBlockMap::const_iterator Begin() const
{ return block_map.begin(); }
/**
* @return iterator to end of the block list (one past last element).
*/
DataBlockMap::const_iterator End() const
{ return block_map.end(); }
/**
* @return reference to the first data block in the list.
* Must not be called when the list is empty.
*/
const DataBlock& FirstBlock() const
{ assert(block_map.size()); return block_map.begin()->second; }
/**
* @return reference to the last data block in the list.
* Must not be called when the list is empty.
*/
const DataBlock& LastBlock() const
{ assert(block_map.size()); return block_map.rbegin()->second; }
/**
* @return whether the list is empty.
*/
bool Empty() const
{ return block_map.empty(); };
/**
* @return the number of blocks in the list.
*/
size_t NumBlocks() const
{ return block_map.size(); };
/**
* @return the total size, in bytes, of all blocks in the list.
*/
size_t DataSize() const
{ return total_data_size; }
/**
* Counts the total size of all list elements paritioned by some cuttof.
* WARNING: this is an O(n) operation and potentially very slow.
* @param seq_cutoff the sequence number used to partition
* element sizes returned via "below" and "above" parameters
* @param below the size in bytes of all elements below "seq_cutoff"
* @param above the size in bytes of all elements above "seq_cutoff"
*/
void DataSize(uint64_t seq_cutoff, uint64_t* below, uint64_t* above) const;
/**
* Remove all elements from the list
*/
void Clear();
/**
* Insert a new data block into the list.
* @param seq lower sequence number of the data block
* @param upper highest sequence number of the data block
* @param data points to the data block contents
* @param hint a suggestion of the node from which to start searching
* for an insertion point or null to search from the beginning of the list
* @return an iterator to the element that was inserted
*/
DataBlockMap::const_iterator
Insert(uint64_t seq, uint64_t upper, const u_char* data,
DataBlockMap::const_iterator* hint = nullptr);
/**
* Insert a new data block at the end of the list and remove blocks
* from the beginning of the list to keep the list size under a limit.
* @param block the block to append
* @param limit the max number of blocks allowed (list is pruned from
* starting from the beginning after the insertion takes place).
*/
void Append(DataBlock block, uint64_t limit);
/**
* Remove all elements below a given sequence number.
* @param seq blocks below this number are discarded (removed/deleted)
* @param max_old if non-zero instead of deleting the underlying block,
* move it to "old_list"
* @param old_list another list to move discarded blocks into
* @return the amount of data (in bytes) that was not part of any
* discarded block (the total size of all bypassed gaps).
*/
uint64_t Trim(uint64_t seq, uint64_t max_old, DataBlockList* old_list);
/**
* @return an iterator pointing to the first element with a segment whose
* starting sequence number is less than or equal to "seq". If no such
* element exists, returns an iterator denoting one-past the end of the
* list.
*/
DataBlockMap::const_iterator FirstBlockAtOrBefore(uint64_t seq) const;
private:
/**
* Insert a new data block into the list.
* @param seq lower sequence number of the data block
* @param upper highest sequence number of the data block
* @param data points to the data block contents
* @param hint a suggestion of the node from which to start searching
* for an insertion point
* @return an iterator to the element that was inserted
*/
DataBlockMap::const_iterator
Insert(uint64_t seq, uint64_t upper, const u_char* data,
DataBlockMap::const_iterator hint);
/**
* Removes a block from the list and updates other state which keeps
* track of total size of blocks.
* @param it the element to remove
*/
void Delete(DataBlockMap::const_iterator it);
/**
* Removes a block from the list and returns it, assuming it will
* immediately be appended to another list.
* @param it the element to remove
* @return the removed block
*/
DataBlock Remove(DataBlockMap::const_iterator it);
Reassembler* reassembler = nullptr;
size_t total_data_size = 0;
DataBlockMap block_map;
}; };
class Reassembler : public BroObj { class Reassembler : public BroObj {
public: public:
Reassembler(uint64_t init_seq, ReassemblerType reassem_type = REASSEM_UNKNOWN); Reassembler(uint64_t init_seq, ReassemblerType reassem_type = REASSEM_UNKNOWN);
~Reassembler() override; ~Reassembler() override {}
void NewBlock(double t, uint64_t seq, uint64_t len, const u_char* data); void NewBlock(double t, uint64_t seq, uint64_t len, const u_char* data);
@ -54,9 +261,17 @@ public:
void ClearBlocks(); void ClearBlocks();
void ClearOldBlocks(); void ClearOldBlocks();
int HasBlocks() const { return blocks != 0; } int HasBlocks() const
{ return ! block_list.Empty(); }
uint64_t LastReassemSeq() const { return last_reassem_seq; } uint64_t LastReassemSeq() const { return last_reassem_seq; }
uint64_t TrimSeq() const
{ return trim_seq; }
void SetTrimSeq(uint64_t seq)
{ if ( seq > trim_seq ) trim_seq = seq; }
uint64_t TotalSize() const; // number of bytes buffered up uint64_t TotalSize() const; // number of bytes buffered up
void Describe(ODesc* d) const override; void Describe(ODesc* d) const override;
@ -72,30 +287,22 @@ public:
protected: protected:
Reassembler() { } Reassembler() { }
friend class DataBlock; friend class DataBlockList;
virtual void Undelivered(uint64_t up_to_seq); virtual void Undelivered(uint64_t up_to_seq);
virtual void BlockInserted(DataBlock* b) = 0; virtual void BlockInserted(DataBlockMap::const_iterator it) = 0;
virtual void Overlap(const u_char* b1, const u_char* b2, uint64_t n) = 0; virtual void Overlap(const u_char* b1, const u_char* b2, uint64_t n) = 0;
DataBlock* AddAndCheck(DataBlock* b, uint64_t seq, void CheckOverlap(const DataBlockList& list,
uint64_t upper, const u_char* data);
void CheckOverlap(DataBlock *head, DataBlock *tail,
uint64_t seq, uint64_t len, const u_char* data); uint64_t seq, uint64_t len, const u_char* data);
DataBlock* blocks; DataBlockList block_list;
DataBlock* last_block; DataBlockList old_block_list;
DataBlock* old_blocks;
DataBlock* last_old_block;
uint64_t last_reassem_seq; uint64_t last_reassem_seq;
uint64_t trim_seq; // how far we've trimmed uint64_t trim_seq; // how far we've trimmed
uint32_t max_old_blocks; uint32_t max_old_blocks;
uint32_t total_old_blocks;
uint64_t size_of_all_blocks;
ReassemblerType rtype; ReassemblerType rtype;

View file

@ -161,6 +161,8 @@ public:
// //
// If we're not processing contents, then naturally each of // If we're not processing contents, then naturally each of
// these is empty. // these is empty.
//
// WARNING: this is an O(n) operation and potentially very slow.
void SizeBufferedData(uint64_t& waiting_on_hole, uint64_t& waiting_on_ack); void SizeBufferedData(uint64_t& waiting_on_hole, uint64_t& waiting_on_ack);
int ValidChecksum(const struct tcphdr* tp, int len) const; int ValidChecksum(const struct tcphdr* tp, int len) const;

View file

@ -66,9 +66,14 @@ void TCP_Reassembler::Done()
if ( record_contents_file ) if ( record_contents_file )
{ // Record any undelivered data. { // Record any undelivered data.
if ( blocks && last_reassem_seq < last_block->upper ) if ( ! block_list.Empty() )
RecordToSeq(last_reassem_seq, last_block->upper, {
record_contents_file); auto last_block = std::prev(block_list.End())->second;
if ( last_reassem_seq < last_block.upper )
RecordToSeq(last_reassem_seq, last_block.upper,
record_contents_file);
}
record_contents_file->Close(); record_contents_file->Close();
} }
@ -78,15 +83,16 @@ void TCP_Reassembler::SizeBufferedData(uint64_t& waiting_on_hole,
uint64_t& waiting_on_ack) const uint64_t& waiting_on_ack) const
{ {
waiting_on_hole = waiting_on_ack = 0; waiting_on_hole = waiting_on_ack = 0;
for ( DataBlock* b = blocks; b; b = b->next ) block_list.DataSize(last_reassem_seq, &waiting_on_ack, &waiting_on_hole);
{ }
if ( b->seq <= last_reassem_seq )
// We must have delivered this block, but uint64_t TCP_Reassembler::NumUndeliveredBytes() const
// haven't yet trimmed it. {
waiting_on_ack += b->Size(); if ( block_list.Empty() )
else return 0;
waiting_on_hole += b->Size();
} auto last_block = std::prev(block_list.End())->second;
return last_block.upper - last_reassem_seq;
} }
void TCP_Reassembler::SetContentsFile(BroFile* f) void TCP_Reassembler::SetContentsFile(BroFile* f)
@ -102,8 +108,8 @@ void TCP_Reassembler::SetContentsFile(BroFile* f)
Unref(record_contents_file); Unref(record_contents_file);
else else
{ {
if ( blocks ) if ( ! block_list.Empty() )
RecordToSeq(blocks->seq, last_reassem_seq, f); RecordToSeq(block_list.Begin()->second.seq, last_reassem_seq, f);
} }
Ref(f); Ref(f);
@ -231,29 +237,32 @@ void TCP_Reassembler::Undelivered(uint64_t up_to_seq)
if ( ! skip_deliveries ) if ( ! skip_deliveries )
{ {
// If we have blocks that begin below up_to_seq, deliver them. // If we have blocks that begin below up_to_seq, deliver them.
DataBlock* b = blocks; auto it = block_list.Begin();
while ( b )
while ( it != block_list.End() )
{ {
if ( b->seq < last_reassem_seq ) const auto& b = it->second;
if ( b.seq < last_reassem_seq )
{ {
// Already delivered this block. // Already delivered this block.
b = b->next; ++it;
continue; continue;
} }
if ( b->seq >= up_to_seq ) if ( b.seq >= up_to_seq )
// Block is beyond what we need to process at this point. // Block is beyond what we need to process at this point.
break; break;
uint64_t gap_at_seq = last_reassem_seq; uint64_t gap_at_seq = last_reassem_seq;
uint64_t gap_len = b->seq - last_reassem_seq; uint64_t gap_len = b.seq - last_reassem_seq;
Gap(gap_at_seq, gap_len); Gap(gap_at_seq, gap_len);
last_reassem_seq += gap_len; last_reassem_seq += gap_len;
BlockInserted(b); BlockInserted(it);
// Inserting a block may cause trimming of what's buffered, // Inserting a block may cause trimming of what's buffered,
// so have to assume 'b' is invalid, hence re-assign to start. // so have to assume 'b' is invalid, hence re-assign to start.
b = blocks; it = block_list.Begin();
} }
if ( up_to_seq > last_reassem_seq ) if ( up_to_seq > last_reassem_seq )
@ -277,12 +286,13 @@ void TCP_Reassembler::Undelivered(uint64_t up_to_seq)
void TCP_Reassembler::MatchUndelivered(uint64_t up_to_seq, bool use_last_upper) void TCP_Reassembler::MatchUndelivered(uint64_t up_to_seq, bool use_last_upper)
{ {
if ( ! blocks || ! rule_matcher ) if ( block_list.Empty() || ! rule_matcher )
return; return;
ASSERT(last_block); const auto& last_block = std::prev(block_list.End())->second;
if ( use_last_upper ) if ( use_last_upper )
up_to_seq = last_block->upper; up_to_seq = last_block.upper;
// ### Note: the original code did not check whether blocks have // ### Note: the original code did not check whether blocks have
// already been delivered, but not ACK'ed, and therefore still // already been delivered, but not ACK'ed, and therefore still
@ -292,50 +302,59 @@ void TCP_Reassembler::MatchUndelivered(uint64_t up_to_seq, bool use_last_upper)
// min(last_block->upper, up_to_seq). // min(last_block->upper, up_to_seq).
// Is there such data? // Is there such data?
if ( up_to_seq <= last_reassem_seq || if ( up_to_seq <= last_reassem_seq ||
last_block->upper <= last_reassem_seq ) last_block.upper <= last_reassem_seq )
return; return;
// Skip blocks that are already delivered (but not ACK'ed). // Skip blocks that are already delivered (but not ACK'ed).
// Question: shall we instead keep a pointer to the first undelivered // Question: shall we instead keep a pointer to the first undelivered
// block? // block?
DataBlock* b;
for ( b = blocks; b && b->upper <= last_reassem_seq; b = b->next )
tcp_analyzer->Conn()->Match(Rule::PAYLOAD, b->block, b->Size(),
false, false, IsOrig(), false);
ASSERT(b); for ( auto it = block_list.Begin(); it != block_list.End(); ++it )
{
const auto& b = it->second;
if ( b.upper > last_reassem_seq )
break;
tcp_analyzer->Conn()->Match(Rule::PAYLOAD, b.block, b.Size(),
false, false, IsOrig(), false);
}
} }
void TCP_Reassembler::RecordToSeq(uint64_t start_seq, uint64_t stop_seq, BroFile* f) void TCP_Reassembler::RecordToSeq(uint64_t start_seq, uint64_t stop_seq, BroFile* f)
{ {
DataBlock* b = blocks; auto it = block_list.Begin();
// Skip over blocks up to the start seq.
while ( b && b->upper <= start_seq )
b = b->next;
if ( ! b ) // Skip over blocks up to the start seq.
while ( it != block_list.End() && it->second.upper <= start_seq )
++it;
if ( it == block_list.End() )
return; return;
uint64_t last_seq = start_seq; uint64_t last_seq = start_seq;
while ( b && b->upper <= stop_seq )
while ( it != block_list.End() && it->second.upper <= stop_seq )
{ {
if ( b->seq > last_seq ) const auto& b = it->second;
RecordGap(last_seq, b->seq, f);
if ( b.seq > last_seq )
RecordGap(last_seq, b.seq, f);
RecordBlock(b, f); RecordBlock(b, f);
last_seq = b->upper; last_seq = b.upper;
b = b->next; ++it;
} }
if ( b ) if ( it != block_list.End() )
// Check for final gap. // Check for final gap.
if ( last_seq < stop_seq ) if ( last_seq < stop_seq )
RecordGap(last_seq, stop_seq, f); RecordGap(last_seq, stop_seq, f);
} }
void TCP_Reassembler::RecordBlock(DataBlock* b, BroFile* f) void TCP_Reassembler::RecordBlock(const DataBlock& b, BroFile* f)
{ {
if ( f->Write((const char*) b->block, b->Size()) ) if ( f->Write((const char*) b.block, b.Size()) )
return; return;
reporter->Error("TCP_Reassembler contents write failed"); reporter->Error("TCP_Reassembler contents write failed");
@ -367,10 +386,12 @@ void TCP_Reassembler::RecordGap(uint64_t start_seq, uint64_t upper_seq, BroFile*
} }
} }
void TCP_Reassembler::BlockInserted(DataBlock* start_block) void TCP_Reassembler::BlockInserted(DataBlockMap::const_iterator it)
{ {
if ( start_block->seq > last_reassem_seq || const auto& start_block = it->second;
start_block->upper <= last_reassem_seq )
if ( start_block.seq > last_reassem_seq ||
start_block.upper <= last_reassem_seq )
return; return;
// We've filled a leading hole. Deliver as much as possible. // We've filled a leading hole. Deliver as much as possible.
@ -379,20 +400,26 @@ void TCP_Reassembler::BlockInserted(DataBlock* start_block)
// new stuff off into its own block(s), but in the following // new stuff off into its own block(s), but in the following
// loop we have to take care not to deliver already-delivered // loop we have to take care not to deliver already-delivered
// data. // data.
for ( DataBlock* b = start_block; while ( it != block_list.End() )
b && b->seq <= last_reassem_seq; b = b->next )
{ {
if ( b->seq == last_reassem_seq ) const auto& b = it->second;
if ( b.seq > last_reassem_seq )
break;
if ( b.seq == last_reassem_seq )
{ // New stuff. { // New stuff.
uint64_t len = b->Size(); uint64_t len = b.Size();
uint64_t seq = last_reassem_seq; uint64_t seq = last_reassem_seq;
last_reassem_seq += len; last_reassem_seq += len;
if ( record_contents_file ) if ( record_contents_file )
RecordBlock(b, record_contents_file); RecordBlock(b, record_contents_file);
DeliverBlock(seq, len, b->block); DeliverBlock(seq, len, b.block);
} }
++it;
} }
TCP_Endpoint* e = endp; TCP_Endpoint* e = endp;
@ -494,7 +521,7 @@ int TCP_Reassembler::DataSent(double t, uint64_t seq, int len,
} }
if ( tcp_excessive_data_without_further_acks && if ( tcp_excessive_data_without_further_acks &&
size_of_all_blocks > static_cast<uint64_t>(tcp_excessive_data_without_further_acks) ) block_list.DataSize() > static_cast<uint64_t>(tcp_excessive_data_without_further_acks) )
{ {
tcp_analyzer->Weird("excessive_data_without_further_acks"); tcp_analyzer->Weird("excessive_data_without_further_acks");
ClearBlocks(); ClearBlocks();

View file

@ -38,18 +38,14 @@ public:
// //
// If we're not processing contents, then naturally each of // If we're not processing contents, then naturally each of
// these is empty. // these is empty.
//
// WARNING: this is an O(n) operation and potentially very slow.
void SizeBufferedData(uint64_t& waiting_on_hole, uint64_t& waiting_on_ack) const; void SizeBufferedData(uint64_t& waiting_on_hole, uint64_t& waiting_on_ack) const;
// How much data is pending delivery since it's not yet reassembled. // How much data is pending delivery since it's not yet reassembled.
// Includes the data due to holes (so this value is a bit different // Includes the data due to holes (so this value is a bit different
// from waiting_on_hole above; and is computed in a different fashion). // from waiting_on_hole above; and is computed in a different fashion).
uint64_t NumUndeliveredBytes() const uint64_t NumUndeliveredBytes() const;
{
if ( last_block )
return last_block->upper - last_reassem_seq;
else
return 0;
}
void SetContentsFile(BroFile* f); void SetContentsFile(BroFile* f);
BroFile* GetContentsFile() const { return record_contents_file; } BroFile* GetContentsFile() const { return record_contents_file; }
@ -92,10 +88,10 @@ private:
void Gap(uint64_t seq, uint64_t len); void Gap(uint64_t seq, uint64_t len);
void RecordToSeq(uint64_t start_seq, uint64_t stop_seq, BroFile* f); void RecordToSeq(uint64_t start_seq, uint64_t stop_seq, BroFile* f);
void RecordBlock(DataBlock* b, BroFile* f); void RecordBlock(const DataBlock& b, BroFile* f);
void RecordGap(uint64_t start_seq, uint64_t upper_seq, BroFile* f); void RecordGap(uint64_t start_seq, uint64_t upper_seq, BroFile* f);
void BlockInserted(DataBlock* b) override; void BlockInserted(DataBlockMap::const_iterator it) override;
void Overlap(const u_char* b1, const u_char* b2, uint64_t n) override; void Overlap(const u_char* b1, const u_char* b2, uint64_t n) override;
TCP_Endpoint* endp; TCP_Endpoint* endp;

View file

@ -26,16 +26,16 @@ uint64_t FileReassembler::Flush()
if ( flushing ) if ( flushing )
return 0; return 0;
if ( last_block ) if ( block_list.Empty() )
{ return 0;
// This is expected to call back into FileReassembler::Undelivered().
flushing = true;
uint64_t rval = TrimToSeq(last_block->upper);
flushing = false;
return rval;
}
return 0; const auto& last_block = std::prev(block_list.End())->second;
// This is expected to call back into FileReassembler::Undelivered().
flushing = true;
uint64_t rval = TrimToSeq(last_block.upper);
flushing = false;
return rval;
} }
uint64_t FileReassembler::FlushTo(uint64_t sequence) uint64_t FileReassembler::FlushTo(uint64_t sequence)
@ -50,21 +50,29 @@ uint64_t FileReassembler::FlushTo(uint64_t sequence)
return rval; return rval;
} }
void FileReassembler::BlockInserted(DataBlock* start_block) void FileReassembler::BlockInserted(DataBlockMap::const_iterator it)
{ {
if ( start_block->seq > last_reassem_seq || const auto& start_block = it->second;
start_block->upper <= last_reassem_seq )
if ( start_block.seq > last_reassem_seq ||
start_block.upper <= last_reassem_seq )
return; return;
for ( DataBlock* b = start_block; while ( it != block_list.End() )
b && b->seq <= last_reassem_seq; b = b->next )
{ {
if ( b->seq == last_reassem_seq ) const auto& b = it->second;
if ( b.seq > last_reassem_seq )
break;
if ( b.seq == last_reassem_seq )
{ // New stuff. { // New stuff.
uint64_t len = b->Size(); uint64_t len = b.Size();
last_reassem_seq += len; last_reassem_seq += len;
the_file->DeliverStream(b->block, len); the_file->DeliverStream(b.block, len);
} }
++it;
} }
// Throw out forwarded data // Throw out forwarded data
@ -74,29 +82,31 @@ void FileReassembler::BlockInserted(DataBlock* start_block)
void FileReassembler::Undelivered(uint64_t up_to_seq) void FileReassembler::Undelivered(uint64_t up_to_seq)
{ {
// If we have blocks that begin below up_to_seq, deliver them. // If we have blocks that begin below up_to_seq, deliver them.
DataBlock* b = blocks; auto it = block_list.Begin();
while ( b ) while ( it != block_list.End() )
{ {
if ( b->seq < last_reassem_seq ) const auto& b = it->second;
if ( b.seq < last_reassem_seq )
{ {
// Already delivered this block. // Already delivered this block.
b = b->next; ++it;
continue; continue;
} }
if ( b->seq >= up_to_seq ) if ( b.seq >= up_to_seq )
// Block is beyond what we need to process at this point. // Block is beyond what we need to process at this point.
break; break;
uint64_t gap_at_seq = last_reassem_seq; uint64_t gap_at_seq = last_reassem_seq;
uint64_t gap_len = b->seq - last_reassem_seq; uint64_t gap_len = b.seq - last_reassem_seq;
the_file->Gap(gap_at_seq, gap_len); the_file->Gap(gap_at_seq, gap_len);
last_reassem_seq += gap_len; last_reassem_seq += gap_len;
BlockInserted(b); BlockInserted(it);
// Inserting a block may cause trimming of what's buffered, // Inserting a block may cause trimming of what's buffered,
// so have to assume 'b' is invalid, hence re-assign to start. // so have to assume 'b' is invalid, hence re-assign to start.
b = blocks; it = block_list.Begin();
} }
if ( up_to_seq > last_reassem_seq ) if ( up_to_seq > last_reassem_seq )

View file

@ -50,7 +50,7 @@ protected:
FileReassembler(); FileReassembler();
void Undelivered(uint64_t up_to_seq) override; void Undelivered(uint64_t up_to_seq) override;
void BlockInserted(DataBlock* b) override; void BlockInserted(DataBlockMap::const_iterator it) override;
void Overlap(const u_char* b1, const u_char* b2, uint64_t n) override; void Overlap(const u_char* b1, const u_char* b2, uint64_t n) override;
File* the_file; File* the_file;