More file reassembly work.

- The reassembly behavior can be modified per-file by enabling or
   disabling the reassembler and/or modifying the size of the reassembly
   buffer.

 - Changed the file extraction analyzer to use the stream to avoid
   issues with the chunk based approach not immediately triggering
   the file_new event due to mime-type detection delay.  Early chunks
   frequently ended up lost before.

 - Generally things are working now and I'd consider this in testing.
This commit is contained in:
Seth Hall 2014-01-05 04:58:01 -05:00
parent 0b78f444a1
commit 38dbba7622
23 changed files with 375 additions and 159 deletions

View file

@ -3,7 +3,6 @@
#include <string>
#include "File.h"
#include "FileReassembler.h"
#include "FileTimer.h"
#include "Analyzer.h"
#include "Manager.h"
@ -77,8 +76,8 @@ void File::StaticInit()
File::File(const string& file_id, Connection* conn, analyzer::Tag tag,
bool is_orig)
: id(file_id), val(0), postpone_timeout(false), first_chunk(true),
missed_bof(false), need_reassembly(false), done(false),
: id(file_id), val(0), stream_offset(0), reassembly_max_buffer(0),
reassembly_enabled(false), postpone_timeout(false), done(false),
did_file_new_event(false), analyzers(this)
{
StaticInit();
@ -88,7 +87,6 @@ File::File(const string& file_id, Connection* conn, analyzer::Tag tag,
val = new RecordVal(fa_file_type);
val->Assign(id_idx, new StringVal(file_id.c_str()));
forwarded_offset = 0;
file_reassembler = 0;
if ( conn )
{
@ -244,7 +242,7 @@ bool File::IsComplete() const
if ( ! total )
return false;
if ( LookupFieldDefaultCount(seen_bytes_idx) >= total->AsCount() )
if ( stream_offset >= total->AsCount() )
return true;
return false;
@ -302,6 +300,26 @@ bool File::DetectMIME(const u_char* data, uint64 len)
return mime;
}
void File::EnableReassembly()
{
reassembly_enabled = true;
}
void File::DisableReassembly()
{
reassembly_enabled = false;
if ( file_reassembler )
{
delete file_reassembler;
file_reassembler = NULL;
}
}
void File::SetReassemblyBuffer(uint64 max)
{
reassembly_max_buffer = max;
}
void File::ReplayBOF()
{
if ( bof_buffer.replayed )
@ -311,141 +329,122 @@ void File::ReplayBOF()
if ( bof_buffer.chunks.empty() )
{
// Since we missed the beginning, try file type detect on next data in.
missed_bof = true;
// We definitely can't do anything if we don't have any chunks.
return;
}
BroString* bs = concatenate(bof_buffer.chunks);
val->Assign(bof_buffer_idx, new StringVal(bs));
DetectMIME(bs->Bytes(), bs->Len());
FileEvent(file_new);
for ( size_t i = 0; i < bof_buffer.chunks.size(); ++i )
DataIn(bof_buffer.chunks[i]->Bytes(), bof_buffer.chunks[i]->Len());
}
void File::DataIn(const u_char* data, uint64 len, uint64 offset)
void File::DeliverStream(const u_char* data, uint64 len)
{
analyzers.DrainModifications();
// Buffer enough data send to libmagic.
if ( BufferBOF(data, len) )
return;
if ( file_reassembler )
if ( stream_offset == 0 )
{
// If there is a file reassembler we must forward any data there.
// But this only happens if the incoming data doesn't happen
// to align with the current forwarded_offset
file_reassembler->NewBlock(network_time, offset, len, data);
DetectMIME(data, len);
FileEvent(file_new);
}
if ( !file_reassembler->HasBlocks() )
file_analysis::Analyzer* a = 0;
IterCookie* c = analyzers.InitForIteration();
while ( (a = analyzers.NextEntry(c)) )
{
if ( !a->DeliverStream(data, len) )
{
delete file_reassembler;
file_reassembler = 0;
analyzers.QueueRemove(a->Tag(), a->Args());
}
}
else if ( forwarded_offset == offset )
stream_offset += len;
IncrementByteCount(len, seen_bytes_idx);
}
void File::DeliverChunk(const u_char* data, uint64 len, uint64 offset)
{
// Potentially handle reassembly and deliver to the stream analyzers.
if ( file_reassembler )
{
if ( reassembly_max_buffer > 0 &&
reassembly_max_buffer < file_reassembler->TotalSize() )
{
uint64 first_offset = file_reassembler->GetFirstBlockOffset();
int gap_bytes = file_reassembler->TrimToSeq(first_offset);
if ( FileEventAvailable(file_reassembly_buffer_overflow) )
{
val_list* vl = new val_list();
vl->append(val->Ref());
vl->append(new Val(stream_offset, TYPE_COUNT));
vl->append(new Val(gap_bytes, TYPE_COUNT));
FileEvent(file_reassembly_buffer_overflow, vl);
}
Gap(stream_offset, gap_bytes);
}
// Forward data to the reassembler.
file_reassembler->NewBlock(network_time, offset, len, data);
}
else if ( stream_offset == offset )
{
// This is the normal case where a file is transferred linearly.
// Nothing should be done here.
// Nothing special should be done here.
DeliverStream(data, len);
}
else if ( forwarded_offset > offset && forwarded_offset < offset+len )
else if ( reassembly_enabled )
{
// This is a segment that begins before the forwarded_offset
// but proceeds past the forwarded_offset. It needs
// trimmed but the reassembler is not enabled.
uint64 adjustment = forwarded_offset - offset;
data = data + adjustment;
len = len - adjustment;
offset = forwarded_offset;
IncrementByteCount(adjustment, overflow_bytes_idx);
}
else if ( forwarded_offset < offset )
{
// This is data past a gap and the reassembler needs to be enabled.
file_reassembler = new FileReassembler(this, forwarded_offset);
// This is data that doesn't match the offset and the reassembler
// needs to be enabled.
file_reassembler = new FileReassembler(this, stream_offset);
file_reassembler->NewBlock(network_time, offset, len, data);
return;
}
else
{
// This is data that was already seen so it can be completely ignored.
// We can't reassemble so we throw out the data for streaming.
IncrementByteCount(len, overflow_bytes_idx);
return;
}
if ( first_chunk )
// Deliver to the chunk analyzers.
file_analysis::Analyzer* a = 0;
IterCookie* c = analyzers.InitForIteration();
while ( (a = analyzers.NextEntry(c)) )
{
// TODO: this should all really be delayed until we attempt reassembly.
DetectMIME(data, len);
FileEvent(file_new);
first_chunk = false;
if ( !a->DeliverChunk(data, len, offset) )
{
analyzers.QueueRemove(a->Tag(), a->Args());
}
}
if ( IsComplete() )
{
// If the file is complete we can automatically go and close out the file from here.
EndOfFile();
}
else
{
file_analysis::Analyzer* a = 0;
IterCookie* c = analyzers.InitForIteration();
}
while ( (a = analyzers.NextEntry(c)) )
{
//if ( ! a->DeliverChunk(data, len, offset) )
// {
// analyzers.QueueRemove(a->Tag(), a->Args());
// }
if ( ! a->DeliverStream(data, len) )
{
analyzers.QueueRemove(a->Tag(), a->Args());
}
}
analyzers.DrainModifications();
forwarded_offset += len;
IncrementByteCount(len, seen_bytes_idx);
}
void File::DataIn(const u_char* data, uint64 len, uint64 offset)
{
analyzers.DrainModifications();
DeliverChunk(data, len, offset);
analyzers.DrainModifications();
}
void File::DataIn(const u_char* data, uint64 len)
{
analyzers.DrainModifications();
if ( BufferBOF(data, len) )
return;
if ( missed_bof )
{
DetectMIME(data, len);
FileEvent(file_new);
missed_bof = false;
}
file_analysis::Analyzer* a = 0;
IterCookie* c = analyzers.InitForIteration();
while ( (a = analyzers.NextEntry(c)) )
{
if ( ! a->DeliverStream(data, len) )
{
analyzers.QueueRemove(a->Tag(), a->Args());
continue;
}
uint64 offset = LookupFieldDefaultCount(seen_bytes_idx) +
LookupFieldDefaultCount(missing_bytes_idx);
if ( ! a->DeliverChunk(data, len, offset) )
analyzers.QueueRemove(a->Tag(), a->Args());
}
uint64 offset = LookupFieldDefaultCount(seen_bytes_idx) +
LookupFieldDefaultCount(missing_bytes_idx);
DeliverChunk(data, len, offset);
analyzers.DrainModifications();
IncrementByteCount(len, seen_bytes_idx);
}
void File::EndOfFile()
@ -501,6 +500,8 @@ void File::Gap(uint64 offset, uint64 len)
}
analyzers.DrainModifications();
stream_offset += len;
IncrementByteCount(len, missing_bytes_idx);
}

View file

@ -169,6 +169,7 @@ public:
protected:
friend class Manager;
friend class FileReassembler;
/**
* Constructor; only file_analysis::Manager should be creating these.
@ -236,6 +237,33 @@ protected:
*/
bool DetectMIME(const u_char* data, uint64 len);
/**
* Enables reassembly on the file.
*/
void EnableReassembly();
/**
* Disables reassembly on the file. If there is an existing reassembler
* for the file, this will cause it to be deleted and won't allow a new
* one to be created until reassembly is reenabled.
*/
void DisableReassembly();
/**
* Set a maximum allowed bytes of memory for file reassembly for this file.
*/
void SetReassemblyBuffer(uint64 max);
/**
* Perform stream-wise delivery for analyzers that need it.
*/
void DeliverStream(const u_char* data, uint64 len);
/**
* Perform chunk-wise delivery for analyzers that need it.
*/
void DeliverChunk(const u_char* data, uint64 len, uint64 offset);
/**
* Lookup a record field index/offset by name.
* @param field_name the name of the \c fa_file record field.
@ -248,18 +276,17 @@ protected:
*/
static void StaticInit();
private:
protected:
string id; /**< A pretty hash that likely identifies file */
RecordVal* val; /**< \c fa_file from script layer. */
uint64 forwarded_offset; /**< The offset of the file which has been forwarded. */
FileReassembler *file_reassembler; /**< A reassembler for the file if it's needed. */
uint64 stream_offset; /**< The offset of the file which has been forwarded. */
uint64 reassembly_max_buffer; /**< Maximum allowed buffer for reassembly. */
bool reassembly_enabled; /**< Whether file stream reassembly is needed. */
bool postpone_timeout; /**< Whether postponing timeout is requested. */
bool first_chunk; /**< Track first non-linear chunk. */
bool missed_bof; /**< Flags that we missed start of file. */
bool need_reassembly; /**< Whether file stream reassembly is needed. */
bool done; /**< If this object is about to be deleted. */
bool did_file_new_event; /**< Whether the file_new event has been done. */
AnalyzerSet analyzers; /**< A set of attached file analyzer. */
AnalyzerSet analyzers; /**< A set of attached file analyzers. */
queue<pair<EventHandlerPtr, val_list*> > fonc_queue;
struct BOF_Buffer {

View file

@ -22,13 +22,6 @@ void FileReassembler::BlockInserted(DataBlock* start_block)
seq_delta(start_block->upper, last_reassem_seq) <= 0 )
return;
// We've filled a leading hole. Deliver as much as possible.
// Note that the new block may include both some old stuff
// and some new stuff. AddAndCheck() will have split the
// new stuff off into its own block(s), but in the following
// loop we have to take care not to deliver already-delivered
// data.
for ( DataBlock* b = start_block;
b && seq_delta(b->seq, last_reassem_seq) <= 0; b = b->next )
{
@ -36,23 +29,34 @@ void FileReassembler::BlockInserted(DataBlock* start_block)
{ // New stuff.
int len = b->Size();
int seq = last_reassem_seq;
the_file->DeliverStream(b->block, len);
last_reassem_seq += len;
the_file->DataIn(b->block, len, seq);
}
}
//CheckEOF();
}
void FileReassembler::Undelivered(int up_to_seq)
{
//reporter->Warning("should probably do something here (file reassembler undelivered)\n");
// Not doing anything here yet.
}
void FileReassembler::Overlap(const u_char* b1, const u_char* b2, int n)
{
//reporter->Warning("should probably do something here (file reassembler overlap)\n");
// Not doing anything here yet.
}
IMPLEMENT_SERIAL(FileReassembler, SER_FILE_REASSEMBLER);
bool FileReassembler::DoSerialize(SerialInfo* info) const
{
reporter->InternalError("FileReassembler::DoSerialize not implemented");
return false; // Cannot be reached.
}
bool FileReassembler::DoUnserialize(UnserialInfo* info)
{
reporter->InternalError("FileReassembler::DoUnserialize not implemented");
return false; // Cannot be reached.
}
} // end file_analysis

View file

@ -21,14 +21,16 @@ public:
virtual ~FileReassembler();
void Done();
uint64 GetFirstBlockOffset() { return blocks->seq; }
// Checks if we have delivered all contents that we can possibly
// deliver for this endpoint. Calls TCP_Analyzer::EndpointEOF()
// when so.
//void CheckEOF();
// deliver for this endpoint.
void CheckEOF();
private:
//DECLARE_SERIAL(FileReassembler);
protected:
FileReassembler() { }
DECLARE_SERIAL(FileReassembler);
void Undelivered(int up_to_seq);
void BlockInserted(DataBlock* b);

View file

@ -183,6 +183,39 @@ bool Manager::SetTimeoutInterval(const string& file_id, double interval) const
return true;
}
bool Manager::EnableReassembly(const string& file_id)
{
File* file = LookupFile(file_id);
if ( ! file )
return false;
file->EnableReassembly();
return true;
}
bool Manager::DisableReassembly(const string& file_id)
{
File* file = LookupFile(file_id);
if ( ! file )
return false;
file->DisableReassembly();
return true;
}
bool Manager::SetReassemblyBuffer(const string& file_id, uint64 max)
{
File* file = LookupFile(file_id);
if ( ! file )
return false;
file->SetReassemblyBuffer(max);
return true;
}
bool Manager::SetExtractionLimit(const string& file_id, RecordVal* args,
uint64 n) const
{

View file

@ -173,6 +173,21 @@ public:
*/
bool SetTimeoutInterval(const string& file_id, double interval) const;
/**
* Enable the reassembler for a file.
*/
bool EnableReassembly(const string& file_id);
/**
* Disable the reassembler for a file.
*/
bool DisableReassembly(const string& file_id);
/**
* Set the reassembly for a file in bytes.
*/
bool SetReassemblyBuffer(const string& file_id, uint64 max);
/**
* Sets a limit on the maximum size allowed for extracting the file
* to local disk;

View file

@ -14,7 +14,7 @@ Extract::Extract(RecordVal* args, File* file, const string& arg_filename,
: file_analysis::Analyzer(file_mgr->GetComponentTag("EXTRACT"), args, file),
filename(arg_filename), limit(arg_limit)
{
fd = open(filename.c_str(), O_WRONLY | O_CREAT | O_TRUNC, 0666);
fd = open(filename.c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_APPEND, 0666);
if ( fd < 0 )
{
@ -53,7 +53,7 @@ file_analysis::Analyzer* Extract::Instantiate(RecordVal* args, File* file)
limit->AsCount());
}
static bool check_limit_exceeded(uint64 lim, uint64 off, uint64 len, uint64* n)
static bool check_limit_exceeded(uint64 lim, uint64 len, uint64* n)
{
if ( lim == 0 )
{
@ -61,13 +61,13 @@ static bool check_limit_exceeded(uint64 lim, uint64 off, uint64 len, uint64* n)
return false;
}
if ( off >= lim )
{
*n = 0;
return true;
}
*n = lim - off;
//if ( off >= lim )
// {
// *n = 0;
// return true;
// }
//
//*n = lim - off;
if ( len > *n )
return true;
@ -77,13 +77,13 @@ static bool check_limit_exceeded(uint64 lim, uint64 off, uint64 len, uint64* n)
return false;
}
bool Extract::DeliverChunk(const u_char* data, uint64 len, uint64 offset)
bool Extract::DeliverStream(const u_char* data, uint64 len)
{
if ( ! fd )
return false;
uint64 towrite = 0;
bool limit_exceeded = check_limit_exceeded(limit, offset, len, &towrite);
bool limit_exceeded = check_limit_exceeded(limit, len, &towrite);
if ( limit_exceeded && file_extraction_limit )
{
@ -92,16 +92,15 @@ bool Extract::DeliverChunk(const u_char* data, uint64 len, uint64 offset)
vl->append(f->GetVal()->Ref());
vl->append(Args()->Ref());
vl->append(new Val(limit, TYPE_COUNT));
vl->append(new Val(offset, TYPE_COUNT));
vl->append(new Val(len, TYPE_COUNT));
f->FileEvent(file_extraction_limit, vl);
// Limit may have been modified by BIF, re-check it.
limit_exceeded = check_limit_exceeded(limit, offset, len, &towrite);
limit_exceeded = check_limit_exceeded(limit, len, &towrite);
}
if ( towrite > 0 )
safe_pwrite(fd, data, towrite, offset);
safe_write(fd, (const char *) data, towrite);
return ( ! limit_exceeded );
}

View file

@ -28,11 +28,10 @@ public:
* Write a chunk of file data to the local extraction file.
* @param data pointer to a chunk of file data.
* @param len number of bytes in the data chunk.
* @param offset number of bytes from start of file at which chunk starts.
* @return false if there was no extraction file open and the data couldn't
* be written, else true.
*/
virtual bool DeliverChunk(const u_char* data, uint64 len, uint64 offset);
virtual bool DeliverStream(const u_char* data, uint64 len);
/**
* Create a new instance of an Extract analyzer.

View file

@ -11,9 +11,7 @@
##
## limit: The limit, in bytes, the extracted file is about to breach.
##
## offset: The offset at which a file chunk is about to be written.
##
## len: The length of the file chunk about to be written.
##
## .. bro:see:: Files::add_analyzer Files::ANALYZER_EXTRACT
event file_extraction_limit%(f: fa_file, args: any, limit: count, offset: count, len: count%);
event file_extraction_limit%(f: fa_file, args: any, limit: count, len: count%);

View file

@ -15,6 +15,27 @@ function Files::__set_timeout_interval%(file_id: string, t: interval%): bool
return new Val(result, TYPE_BOOL);
%}
## :bro:see:`Files::enable_reassembly`.
function Files::__enable_reassembly%(file_id: string%): bool
%{
bool result = file_mgr->EnableReassembly(file_id->CheckString());
return new Val(result, TYPE_BOOL);
%}
## :bro:see:`Files::disable_reassembly`.
function Files::__disable_reassembly%(file_id: string%): bool
%{
bool result = file_mgr->DisableReassembly(file_id->CheckString());
return new Val(result, TYPE_BOOL);
%}
## :bro:see:`Files::set_reassembly_buffer`.
function Files::__set_reassembly_buffer%(file_id: string, max: count%): bool
%{
bool result = file_mgr->SetReassemblyBuffer(file_id->CheckString(), max);
return new Val(result, TYPE_BOOL);
%}
## :bro:see:`Files::add_analyzer`.
function Files::__add_analyzer%(file_id: string, tag: Files::Tag, args: any%): bool
%{