Initial commit of file reassembly.

This commit is contained in:
Seth Hall 2013-12-20 00:05:08 -05:00
parent ae9e0d4cb6
commit 0b78f444a1
11 changed files with 189 additions and 19 deletions

View file

@ -356,7 +356,7 @@ type fa_file: record {
missing_bytes: count &default=0;
## The number of not all-in-sequence bytes in the file stream that
## were delivered to file analyzers due to reassembly buffer overflow.
## were not delivered to file analyzers due to reassembly buffer overflow.
overflow_bytes: count &default=0;
## The amount of time between receiving new data for this file that

View file

@ -28,7 +28,7 @@ void FragTimer::Dispatch(double t, int /* is_expire */)
FragReassembler::FragReassembler(NetSessions* arg_s,
const IP_Hdr* ip, const u_char* pkt,
HashKey* k, double t)
: Reassembler(0, REASSEM_IP)
: Reassembler(0)
{
s = arg_s;
key = k;

View file

@ -40,7 +40,7 @@ DataBlock::DataBlock(const u_char* data, int size, int arg_seq,
unsigned int Reassembler::total_size = 0;
Reassembler::Reassembler(int init_seq, ReassemblerType arg_type)
Reassembler::Reassembler(int init_seq)
{
blocks = last_block = 0;
trim_seq = last_reassem_seq = init_seq;

View file

@ -22,11 +22,10 @@ public:
};
enum ReassemblerType { REASSEM_IP, REASSEM_TCP };
class Reassembler : public BroObj {
public:
Reassembler(int init_seq, ReassemblerType arg_type);
Reassembler(int init_seq);
virtual ~Reassembler();
void NewBlock(double t, int seq, int len, const u_char* data);

View file

@ -33,7 +33,7 @@ TCP_Reassembler::TCP_Reassembler(analyzer::Analyzer* arg_dst_analyzer,
TCP_Analyzer* arg_tcp_analyzer,
TCP_Reassembler::Type arg_type,
bool arg_is_orig, TCP_Endpoint* arg_endp)
: Reassembler(1, REASSEM_TCP)
: Reassembler(1)
{
dst_analyzer = arg_dst_analyzer;
tcp_analyzer = arg_tcp_analyzer;

View file

@ -11,6 +11,7 @@ set(file_analysis_SRCS
Manager.cc
File.cc
FileTimer.cc
FileReassembler.cc
Analyzer.cc
AnalyzerSet.cc
Component.cc

View file

@ -3,6 +3,7 @@
#include <string>
#include "File.h"
#include "FileReassembler.h"
#include "FileTimer.h"
#include "Analyzer.h"
#include "Manager.h"
@ -87,6 +88,8 @@ File::File(const string& file_id, Connection* conn, analyzer::Tag tag,
val = new RecordVal(fa_file_type);
val->Assign(id_idx, new StringVal(file_id.c_str()));
forwarded_offset = 0;
file_reassembler = 0;
if ( conn )
{
// add source, connection, is_orig fields
@ -109,6 +112,9 @@ File::~File()
delete_vals(fonc_queue.front().second);
fonc_queue.pop();
}
if ( file_reassembler )
delete file_reassembler;
}
void File::UpdateLastActivityTime()
@ -325,33 +331,86 @@ void File::DataIn(const u_char* data, uint64 len, uint64 offset)
{
analyzers.DrainModifications();
if ( file_reassembler )
{
// If there is a file reassembler we must forward any data there.
// But this only happens if the incoming data doesn't happen
// to align with the current forwarded_offset
file_reassembler->NewBlock(network_time, offset, len, data);
if ( !file_reassembler->HasBlocks() )
{
delete file_reassembler;
file_reassembler = 0;
}
}
else if ( forwarded_offset == offset )
{
// This is the normal case where a file is transferred linearly.
// Nothing should be done here.
}
else if ( forwarded_offset > offset && forwarded_offset < offset+len )
{
// This is a segment that begins before the forwarded_offset
// but proceeds past the forwarded_offset. It needs
// trimmed but the reassembler is not enabled.
uint64 adjustment = forwarded_offset - offset;
data = data + adjustment;
len = len - adjustment;
offset = forwarded_offset;
IncrementByteCount(adjustment, overflow_bytes_idx);
}
else if ( forwarded_offset < offset )
{
// This is data past a gap and the reassembler needs to be enabled.
file_reassembler = new FileReassembler(this, forwarded_offset);
file_reassembler->NewBlock(network_time, offset, len, data);
return;
}
else
{
// This is data that was already seen so it can be completely ignored.
IncrementByteCount(len, overflow_bytes_idx);
return;
}
if ( first_chunk )
{
// TODO: this should all really be delayed until we attempt reassembly
// TODO: this should all really be delayed until we attempt reassembly.
DetectMIME(data, len);
FileEvent(file_new);
first_chunk = false;
}
if ( IsComplete() )
{
EndOfFile();
}
else
{
file_analysis::Analyzer* a = 0;
IterCookie* c = analyzers.InitForIteration();
while ( (a = analyzers.NextEntry(c)) )
{
if ( ! a->DeliverChunk(data, len, offset) )
//if ( ! a->DeliverChunk(data, len, offset) )
// {
// analyzers.QueueRemove(a->Tag(), a->Args());
// }
if ( ! a->DeliverStream(data, len) )
{
analyzers.QueueRemove(a->Tag(), a->Args());
}
}
analyzers.DrainModifications();
// TODO: check reassembly requirement based on buffer size in record
if ( need_reassembly )
reporter->InternalError("file_analyzer::File TODO: reassembly not yet supported");
// TODO: reassembly overflow stuff, increment overflow count, eval trigger
forwarded_offset += len;
IncrementByteCount(len, seen_bytes_idx);
}
}
void File::DataIn(const u_char* data, uint64 len)
{

View file

@ -8,6 +8,7 @@
#include <utility>
#include <vector>
#include "FileReassembler.h"
#include "Conn.h"
#include "Val.h"
#include "Tag.h"
@ -16,6 +17,8 @@
namespace file_analysis {
class FileReassembler;
/**
* Wrapper class around \c fa_file record values from script layer.
*/
@ -248,6 +251,8 @@ protected:
private:
string id; /**< A pretty hash that likely identifies file */
RecordVal* val; /**< \c fa_file from script layer. */
uint64 forwarded_offset; /**< The offset of the file which has been forwarded. */
FileReassembler *file_reassembler; /**< A reassembler for the file if it's needed. */
bool postpone_timeout; /**< Whether postponing timeout is requested. */
bool first_chunk; /**< Track first non-linear chunk. */
bool missed_bof; /**< Flags that we missed start of file. */

View file

@ -0,0 +1,58 @@
#include "FileReassembler.h"
#include "File.h"
namespace file_analysis {
class File;
FileReassembler::FileReassembler(File *f, int starting_offset)
: Reassembler(starting_offset), the_file(f)
{
}
FileReassembler::~FileReassembler()
{
}
void FileReassembler::BlockInserted(DataBlock* start_block)
{
if ( seq_delta(start_block->seq, last_reassem_seq) > 0 ||
seq_delta(start_block->upper, last_reassem_seq) <= 0 )
return;
// We've filled a leading hole. Deliver as much as possible.
// Note that the new block may include both some old stuff
// and some new stuff. AddAndCheck() will have split the
// new stuff off into its own block(s), but in the following
// loop we have to take care not to deliver already-delivered
// data.
for ( DataBlock* b = start_block;
b && seq_delta(b->seq, last_reassem_seq) <= 0; b = b->next )
{
if ( b->seq == last_reassem_seq )
{ // New stuff.
int len = b->Size();
int seq = last_reassem_seq;
last_reassem_seq += len;
the_file->DataIn(b->block, len, seq);
}
}
//CheckEOF();
}
void FileReassembler::Undelivered(int up_to_seq)
{
//reporter->Warning("should probably do something here (file reassembler undelivered)\n");
}
void FileReassembler::Overlap(const u_char* b1, const u_char* b2, int n)
{
//reporter->Warning("should probably do something here (file reassembler overlap)\n");
}
} // end file_analysis

View file

@ -0,0 +1,45 @@
#ifndef FILE_ANALYSIS_FILEREASSEMBLER_H
#define FILE_ANALYSIS_FILEREASSEMBLER_H
#include "Reassem.h"
#include "File.h"
class BroFile;
class Connection;
namespace file_analysis {
class File;
//const int STOP_ON_GAP = 1;
//const int PUNT_ON_PARTIAL = 1;
class FileReassembler : public Reassembler {
public:
FileReassembler(File* f, int starting_offset);
virtual ~FileReassembler();
void Done();
// Checks if we have delivered all contents that we can possibly
// deliver for this endpoint. Calls TCP_Analyzer::EndpointEOF()
// when so.
//void CheckEOF();
private:
//DECLARE_SERIAL(FileReassembler);
void Undelivered(int up_to_seq);
void BlockInserted(DataBlock* b);
void Overlap(const u_char* b1, const u_char* b2, int n);
unsigned int had_gap:1;
unsigned int did_EOF:1;
unsigned int skip_deliveries:1;
File* the_file;
};
} // namespace analyzer::*
#endif

View file

@ -10,3 +10,6 @@ file #0, 555523, 0
[orig_h=10.101.84.70, orig_p=10977/tcp, resp_h=129.174.93.161, resp_p=80/tcp]
total bytes: 555523
source: HTTP
MD5: 5a484ada9c816c0e8b6d2d3978e3f503
SHA1: 54e7d39e99eb9d40d6251c0361a1090a0d278571
SHA256: 61c0718bd534ab55716eba161e91bb49155562ddc7c08f0c20f6359d7b808b66