Updates the files event api and brings file reassembly up to master.

This commit is contained in:
Seth Hall 2014-09-26 00:40:37 -04:00
parent 42b2d56279
commit cafd35e746
47 changed files with 515 additions and 637 deletions

View file

@ -72,10 +72,10 @@ bool AnalyzerSet::Add(file_analysis::Tag tag, RecordVal* args)
return true;
}
bool AnalyzerSet::QueueAdd(file_analysis::Tag tag, RecordVal* args)
bool AnalyzerSet::QueueAdd(file_analysis::Tag tag, RecordVal* args, file_analysis::Analyzer* a)
{
HashKey* key = GetKey(tag, args);
file_analysis::Analyzer* a = InstantiateAnalyzer(tag, args);
a = InstantiateAnalyzer(tag, args);
if ( ! a )
{

View file

@ -57,9 +57,10 @@ public:
* Queue the attachment of an analyzer to #file.
* @param tag the analyzer tag of the file analyzer to add.
* @param args an \c AnalyzerArgs value which specifies an analyzer.
* @param a an analyzer pointer to return the instantiated analyzer to the caller.
* @return true if analyzer was able to be instantiated, else false.
*/
bool QueueAdd(file_analysis::Tag tag, RecordVal* args);
bool QueueAdd(file_analysis::Tag tag, RecordVal* args, file_analysis::Analyzer* a);
/**
* Remove an analyzer from #file immediately.

View file

@ -53,8 +53,6 @@ int File::overflow_bytes_idx = -1;
int File::timeout_interval_idx = -1;
int File::bof_buffer_size_idx = -1;
int File::bof_buffer_idx = -1;
int File::mime_type_idx = -1;
int File::mime_types_idx = -1;
void File::StaticInit()
{
@ -74,15 +72,14 @@ void File::StaticInit()
timeout_interval_idx = Idx("timeout_interval");
bof_buffer_size_idx = Idx("bof_buffer_size");
bof_buffer_idx = Idx("bof_buffer");
mime_type_idx = Idx("mime_type");
mime_types_idx = Idx("mime_types");
}
File::File(const string& file_id, Connection* conn, analyzer::Tag tag,
bool is_orig)
: id(file_id), val(0), stream_offset(0), reassembly_max_buffer(0),
: id(file_id), val(0), file_reassembler(0), stream_offset(0),
reassembly_max_buffer(0), did_mime_type(false),
reassembly_enabled(false), postpone_timeout(false), done(false),
did_file_new_event(false), analyzers(this)
analyzers(this)
{
StaticInit();
@ -91,13 +88,11 @@ File::File(const string& file_id, Connection* conn, analyzer::Tag tag,
val = new RecordVal(fa_file_type);
val->Assign(id_idx, new StringVal(file_id.c_str()));
file_reassembler = 0;
if ( conn )
{
// add source, connection, is_orig fields
SetSource(analyzer_mgr->GetComponentName(tag));
val->Assign(is_orig_idx, new Val(is_orig, TYPE_BOOL));
UpdateConnectionFields(conn, is_orig);
}
UpdateLastActivityTime();
@ -108,14 +103,10 @@ File::~File()
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Destroying File object", id.c_str());
Unref(val);
while ( ! fonc_queue.empty() )
{
delete_vals(fonc_queue.front().second);
fonc_queue.pop();
}
if ( file_reassembler )
{
delete file_reassembler;
}
}
void File::UpdateLastActivityTime()
@ -154,11 +145,7 @@ void File::UpdateConnectionFields(Connection* conn, bool is_orig)
vl->append(conn_val->Ref());
vl->append(new Val(is_orig, TYPE_BOOL));
if ( did_file_new_event )
FileEvent(file_over_new_connection, vl);
else
fonc_queue.push(pair<EventHandlerPtr, val_list*>(
file_over_new_connection, vl));
FileEvent(file_over_new_connection, vl);
}
}
@ -262,7 +249,23 @@ bool File::AddAnalyzer(file_analysis::Tag tag, RecordVal* args)
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Queuing addition of %s analyzer",
id.c_str(), file_mgr->GetComponentName(tag).c_str());
return done ? false : analyzers.QueueAdd(tag, args);
if ( done )
return false;
file_analysis::Analyzer *a = 0;
bool success = analyzers.QueueAdd(tag, args, a);
if ( success && a )
{
// Catch up this analyzer with the BOF buffer
for ( size_t i = 0; i < bof_buffer.chunks.size(); ++i )
{
if ( ! a->DeliverStream(bof_buffer.chunks[i]->Bytes(), bof_buffer.chunks[i]->Len()) )
{
analyzers.QueueRemove(a->Tag(), a->Args());
}
}
}
return success;
}
bool File::RemoveAnalyzer(file_analysis::Tag tag, RecordVal* args)
@ -273,41 +276,6 @@ bool File::RemoveAnalyzer(file_analysis::Tag tag, RecordVal* args)
return done ? false : analyzers.QueueRemove(tag, args);
}
bool File::BufferBOF(const u_char* data, uint64 len)
{
if ( bof_buffer.full || bof_buffer.replayed )
return false;
uint64 desired_size = LookupFieldDefaultCount(bof_buffer_size_idx);
bof_buffer.chunks.push_back(new BroString(data, len, 0));
bof_buffer.size += len;
if ( bof_buffer.size >= desired_size )
{
bof_buffer.full = true;
ReplayBOF();
}
return true;
}
bool File::DetectMIME(const u_char* data, uint64 len)
{
RuleMatcher::MIME_Matches matches;
len = min(len, LookupFieldDefaultCount(bof_buffer_size_idx));
file_mgr->DetectMIME(data, len, &matches);
if ( matches.empty() )
return false;
val->Assign(mime_type_idx,
new StringVal(*(matches.begin()->second.begin())));
val->Assign(mime_types_idx, file_analysis::GenMIMEMatchesVal(matches));
return true;
}
void File::EnableReassembly()
{
reassembly_enabled = true;
@ -328,38 +296,76 @@ void File::SetReassemblyBuffer(uint64 max)
reassembly_max_buffer = max;
}
void File::ReplayBOF()
bool File::DetectMIME()
{
if ( bof_buffer.replayed )
return;
RuleMatcher::MIME_Matches matches;
bof_buffer.replayed = true;
BroString *bs = concatenate(bof_buffer.chunks);
const u_char* data = bs->Bytes();
uint64 len = bs->Len();
len = min(len, LookupFieldDefaultCount(bof_buffer_size_idx));
file_mgr->DetectMIME(data, len, &matches);
if ( bof_buffer.chunks.empty() )
if ( matches.empty() )
return false;
if ( FileEventAvailable(file_mime_type) )
{
// We definitely can't do anything if we don't have any chunks.
return;
val_list* vl = new val_list();
vl->append(val->Ref());
vl->append(new StringVal(*(matches.begin()->second.begin())));
FileEvent(file_mime_type, vl);
}
BroString* bs = concatenate(bof_buffer.chunks);
for ( size_t i = 0; i < bof_buffer.chunks.size(); ++i )
DataIn(bof_buffer.chunks[i]->Bytes(), bof_buffer.chunks[i]->Len());
if ( FileEventAvailable(file_mime_types) )
{
val_list* vl = new val_list();
vl->append(val->Ref());
vl->append(file_analysis::GenMIMEMatchesVal(matches));
FileEvent(file_mime_types, vl);
}
return true;
}
bool File::BufferBOF(const u_char* data, uint64 len)
{
if ( bof_buffer.full )
return false;
uint64 desired_size = LookupFieldDefaultCount(bof_buffer_size_idx);
bof_buffer.chunks.push_back(new BroString(data, len, 0));
bof_buffer.size += len;
if ( bof_buffer.size >= desired_size )
{
bof_buffer.full = true;
}
return true;
}
void File::DeliverStream(const u_char* data, uint64 len)
{
// Buffer enough data send to libmagic.
if ( BufferBOF(data, len) )
return;
// Buffer enough data for the BOF buffer
BufferBOF(data, len);
if ( stream_offset == 0 )
// TODO: mime matching size needs defined.
if ( ! did_mime_type &&
bof_buffer.size >= 1024 &&
LookupFieldDefaultCount(missing_bytes_idx) == 0 )
{
DetectMIME(data, len);
FileEvent(file_new);
did_mime_type = true;
DetectMIME();
// TODO: this needs to be done elsewhere. For now it's here.
BroString* bs = concatenate(bof_buffer.chunks);
val->Assign(bof_buffer_idx, new StringVal(bs));
}
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] %" PRIu64 " bytes in at offset" PRIu64 "; %s [%s]",
id.c_str(), len, offset,
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] %" PRIu64 " bytes in at offset %" PRIu64 "; %s [%s]",
id.c_str(), len, stream_offset,
IsComplete() ? "complete" : "incomplete",
fmt_bytes((const char*) data, min((uint64)40, len)), len > 40 ? "..." : "");
@ -388,13 +394,13 @@ void File::DeliverChunk(const u_char* data, uint64 len, uint64 offset)
uint64 first_offset = file_reassembler->GetFirstBlockOffset();
int gap_bytes = file_reassembler->TrimToSeq(first_offset);
if ( FileEventAvailable(file_reassembly_buffer_overflow) )
if ( FileEventAvailable(file_reassembly_overflow) )
{
val_list* vl = new val_list();
vl->append(val->Ref());
vl->append(new Val(stream_offset, TYPE_COUNT));
vl->append(new Val(gap_bytes, TYPE_COUNT));
FileEvent(file_reassembly_buffer_overflow, vl);
FileEvent(file_reassembly_overflow, vl);
}
Gap(stream_offset, gap_bytes);
@ -469,10 +475,19 @@ void File::EndOfFile()
if ( done )
return;
analyzers.DrainModifications();
if ( ! did_mime_type )
{
DetectMIME();
// Send along anything that's been buffered, but never flushed.
ReplayBOF();
// TODO: this also needs to be done elsewhere.
if ( bof_buffer.size > 0 )
{
BroString* bs = concatenate(bof_buffer.chunks);
val->Assign(bof_buffer_idx, new StringVal(bs));
}
}
analyzers.DrainModifications();
done = true;
@ -497,10 +512,6 @@ void File::Gap(uint64 offset, uint64 len)
analyzers.DrainModifications();
// If we were buffering the beginning of the file, a gap means we've got
// as much contiguous stuff at the beginning as possible, so work with that.
ReplayBOF();
file_analysis::Analyzer* a = 0;
IterCookie* c = analyzers.InitForIteration();
@ -540,30 +551,13 @@ void File::FileEvent(EventHandlerPtr h)
FileEvent(h, vl);
}
static void flush_file_event_queue(queue<pair<EventHandlerPtr, val_list*> >& q)
{
while ( ! q.empty() )
{
pair<EventHandlerPtr, val_list*> p = q.front();
mgr.QueueEvent(p.first, p.second);
q.pop();
}
}
void File::FileEvent(EventHandlerPtr h, val_list* vl)
{
if ( h == file_state_remove )
flush_file_event_queue(fonc_queue);
mgr.QueueEvent(h, vl);
if ( h == file_new )
{
did_file_new_event = true;
flush_file_event_queue(fonc_queue);
}
if ( h == file_new || h == file_timeout || h == file_extraction_limit )
if ( h == file_new || h == file_over_new_connection ||
h == file_mime_type ||
h == file_timeout || h == file_extraction_limit )
{
// immediate feedback is required for these events.
mgr.Drain();

View file

@ -231,12 +231,10 @@ protected:
/**
* Does mime type detection via file magic signatures and assigns
* strongest matching mime type (if available) to \c mime_type
* field in #val.
* @param data pointer to a chunk of file data.
* @param len number of bytes in the data chunk.
* field in #val. It uses the data in the BOF buffer
* @return whether a mime type match was found.
*/
bool DetectMIME(const u_char* data, uint64 len);
bool DetectMIME();
/**
* Enables reassembly on the file.
@ -283,20 +281,18 @@ protected:
FileReassembler *file_reassembler; /**< A reassembler for the file if it's needed. */
uint64 stream_offset; /**< The offset of the file which has been forwarded. */
uint64 reassembly_max_buffer; /**< Maximum allowed buffer for reassembly. */
bool did_mime_type; /**< Whether the mime type ident has already been attempted. */
bool reassembly_enabled; /**< Whether file stream reassembly is needed. */
bool postpone_timeout; /**< Whether postponing timeout is requested. */
bool done; /**< If this object is about to be deleted. */
bool did_file_new_event; /**< Whether the file_new event has been done. */
AnalyzerSet analyzers; /**< A set of attached file analyzers. */
queue<pair<EventHandlerPtr, val_list*> > fonc_queue;
struct BOF_Buffer {
BOF_Buffer() : full(false), replayed(false), size(0) {}
BOF_Buffer() : full(false), size(0) {}
~BOF_Buffer()
{ for ( size_t i = 0; i < chunks.size(); ++i ) delete chunks[i]; }
bool full;
bool replayed;
uint64 size;
BroString::CVec chunks;
} bof_buffer; /**< Beginning of file buffer. */

View file

@ -18,21 +18,24 @@ FileReassembler::~FileReassembler()
void FileReassembler::BlockInserted(DataBlock* start_block)
{
if ( seq_delta(start_block->seq, last_reassem_seq) > 0 ||
seq_delta(start_block->upper, last_reassem_seq) <= 0 )
if ( start_block->seq > last_reassem_seq ||
start_block->upper <= last_reassem_seq )
return;
for ( DataBlock* b = start_block;
b && seq_delta(b->seq, last_reassem_seq) <= 0; b = b->next )
b && b->seq <= last_reassem_seq; b = b->next )
{
if ( b->seq == last_reassem_seq )
{ // New stuff.
int len = b->Size();
int seq = last_reassem_seq;
the_file->DeliverStream(b->block, len);
uint64 len = b->Size();
uint64 seq = last_reassem_seq;
last_reassem_seq += len;
the_file->DeliverStream(b->block, len);
}
}
// Throw out forwarded data
TrimToSeq(last_reassem_seq);
}
void FileReassembler::Undelivered(uint64 up_to_seq)

View file

@ -287,28 +287,6 @@ bool Manager::AddAnalyzer(const string& file_id, file_analysis::Tag tag,
return file->AddAnalyzer(tag, args);
}
TableVal* Manager::AddAnalyzersForMIMEType(const string& file_id, const string& mtype,
RecordVal* args)
{
if ( ! tag_set_type )
tag_set_type = internal_type("files_tag_set")->AsTableType();
TableVal* sval = new TableVal(tag_set_type);
TagSet* l = LookupMIMEType(mtype, false);
if ( ! l )
return sval;
for ( TagSet::const_iterator i = l->begin(); i != l->end(); i++ )
{
file_analysis::Tag tag = *i;
if ( AddAnalyzer(file_id, tag, args) )
sval->Assign(tag.AsEnumVal(), 0);
}
return sval;
}
bool Manager::RemoveAnalyzer(const string& file_id, file_analysis::Tag tag,
RecordVal* args) const
{
@ -337,6 +315,12 @@ File* Manager::GetFile(const string& file_id, Connection* conn,
id_map.Insert(file_id.c_str(), rval);
rval->ScheduleInactivityTimer();
// Generate file_new here so the manager knows about the file.
rval->FileEvent(file_new);
// Same for file_over_new_connection which is generated by
// updating the connection fields.
rval->UpdateConnectionFields(conn, is_orig);
if ( IsIgnored(file_id) )
return 0;
}
@ -494,63 +478,6 @@ Analyzer* Manager::InstantiateAnalyzer(Tag tag, RecordVal* args, File* f) const
return a;
}
Manager::TagSet* Manager::LookupMIMEType(const string& mtype, bool add_if_not_found)
{
MIMEMap::const_iterator i = mime_types.find(to_upper(mtype));
if ( i != mime_types.end() )
return i->second;
if ( ! add_if_not_found )
return 0;
TagSet* l = new TagSet;
mime_types.insert(std::make_pair(to_upper(mtype), l));
return l;
}
bool Manager::RegisterAnalyzerForMIMEType(EnumVal* tag, StringVal* mtype)
{
Component* p = Lookup(tag);
if ( ! p )
return false;
return RegisterAnalyzerForMIMEType(p->Tag(), mtype->CheckString());
}
bool Manager::RegisterAnalyzerForMIMEType(Tag tag, const string& mtype)
{
TagSet* l = LookupMIMEType(mtype, true);
DBG_LOG(DBG_FILE_ANALYSIS, "Register analyzer %s for MIME type %s",
GetComponentName(tag).c_str(), mtype.c_str());
l->insert(tag);
return true;
}
bool Manager::UnregisterAnalyzerForMIMEType(EnumVal* tag, StringVal* mtype)
{
Component* p = Lookup(tag);
if ( ! p )
return false;
return UnregisterAnalyzerForMIMEType(p->Tag(), mtype->CheckString());
}
bool Manager::UnregisterAnalyzerForMIMEType(Tag tag, const string& mtype)
{
TagSet* l = LookupMIMEType(mtype, true);
DBG_LOG(DBG_FILE_ANALYSIS, "Unregister analyzer %s for MIME type %s",
GetComponentName(tag).c_str(), mtype.c_str());
l->erase(tag);
return true;
}
RuleMatcher::MIME_Matches* Manager::DetectMIME(const u_char* data, uint64 len,
RuleMatcher::MIME_Matches* rval) const
{

View file

@ -253,18 +253,6 @@ public:
bool AddAnalyzer(const string& file_id, file_analysis::Tag tag,
RecordVal* args) const;
/**
* Queue attachment of an all analyzers associated with a given MIME
* type to the file identifier.
*
* @param file_id the file identifier/hash.
* @param mtype the MIME type; comparisions will be performanced case-insensitive.
* @param args a \c AnalyzerArgs value which describes a file analyzer.
* @return A ref'ed \c set[Tag] with all added analyzers.
*/
TableVal* AddAnalyzersForMIMEType(const string& file_id, const string& mtype,
RecordVal* args);
/**
* Queue removal of an analyzer for a given file identifier.
* @param file_id the file identifier/hash.
@ -292,62 +280,6 @@ public:
Analyzer* InstantiateAnalyzer(Tag tag, RecordVal* args, File* f) const;
/**
* Registers a MIME type for an analyzer. Once registered, files of
* that MIME type will automatically get a corresponding analyzer
* assigned.
*
* @param tag The analyzer's tag as an enum of script type \c
* Files::Tag.
*
* @param mtype The MIME type. It will be matched case-insenistive.
*
* @return True if successful.
*/
bool RegisterAnalyzerForMIMEType(EnumVal* tag, StringVal* mtype);
/**
* Registers a MIME type for an analyzer. Once registered, files of
* that MIME type will automatically get a corresponding analyzer
* assigned.
*
* @param tag The analyzer's tag as an enum of script type \c
* Files::Tag.
*
* @param mtype The MIME type. It will be matched case-insenistive.
*
* @return True if successful.
*/
bool RegisterAnalyzerForMIMEType(Tag tag, const string& mtype);
/**
* Unregisters a MIME type for an analyzer.
*
* @param tag The analyzer's tag as an enum of script type \c
* Files::Tag.
*
* @param mtype The MIME type. It will be matched case-insenistive.
*
* @return True if successful (incl. when the type wasn't actually
* registered for the analyzer).
*
*/
bool UnregisterAnalyzerForMIMEType(EnumVal* tag, StringVal* mtype);
/**
* Unregisters a MIME type for an analyzer.
*
* @param tag The analyzer's tag as an enum of script type \c
* Files::Tag.
*
* @param mtype The MIME type. It will be matched case-insenistive.
*
* @return True if successful (incl. when the type wasn't actually
* registered for the analyzer).
*
*/
bool UnregisterAnalyzerForMIMEType(Tag tag, const string& mtype);
/**
* Returns a set of all matching MIME magic signatures for a given
* chunk of data.
* @param data A chunk of bytes to match magic MIME signatures against.

View file

@ -12,7 +12,7 @@ using namespace file_analysis;
Extract::Extract(RecordVal* args, File* file, const string& arg_filename,
uint64 arg_limit)
: file_analysis::Analyzer(file_mgr->GetComponentTag("EXTRACT"), args, file),
filename(arg_filename), limit(arg_limit)
filename(arg_filename), limit(arg_limit), depth(0)
{
fd = open(filename.c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_APPEND, 0666);
@ -53,7 +53,7 @@ file_analysis::Analyzer* Extract::Instantiate(RecordVal* args, File* file)
limit->AsCount());
}
static bool check_limit_exceeded(uint64 lim, uint64 len, uint64* n)
static bool check_limit_exceeded(uint64 lim, uint64 depth, uint64 len, uint64* n)
{
if ( lim == 0 )
{
@ -61,18 +61,21 @@ static bool check_limit_exceeded(uint64 lim, uint64 len, uint64* n)
return false;
}
//if ( off >= lim )
// {
// *n = 0;
// return true;
// }
//
//*n = lim - off;
if ( len > *n )
if ( depth >= lim )
{
*n = 0;
return true;
}
else if ( depth + len > lim )
{
printf("exceeded the maximum extraction lenght depth: %llu len: %llu lim: %llu\n", depth, len, lim);
*n = lim - depth;
return true;
}
else
{
*n = len;
}
return false;
}
@ -83,7 +86,7 @@ bool Extract::DeliverStream(const u_char* data, uint64 len)
return false;
uint64 towrite = 0;
bool limit_exceeded = check_limit_exceeded(limit, len, &towrite);
bool limit_exceeded = check_limit_exceeded(limit, depth, len, &towrite);
if ( limit_exceeded && file_extraction_limit )
{
@ -95,12 +98,21 @@ bool Extract::DeliverStream(const u_char* data, uint64 len)
vl->append(new Val(len, TYPE_COUNT));
f->FileEvent(file_extraction_limit, vl);
// Limit may have been modified by BIF, re-check it.
limit_exceeded = check_limit_exceeded(limit, len, &towrite);
// Limit may have been modified by a BIF, re-check it.
limit_exceeded = check_limit_exceeded(limit, depth, len, &towrite);
}
if ( towrite > 0 )
safe_write(fd, (const char *) data, towrite);
{
safe_pwrite(fd, (const u_char *) data, towrite, depth);
depth += towrite;
}
return ( ! limit_exceeded );
}
bool Extract::Undelivered(uint64 offset, uint64 len)
{
depth += len;
return true;
}

View file

@ -33,6 +33,14 @@ public:
*/
virtual bool DeliverStream(const u_char* data, uint64 len);
/**
* Report undelivered bytes.
* @param offset distance into the file where the gap occurred.
* @param len number of bytes undelivered.
* @return true
*/
virtual bool Undelivered(uint64 offset, uint64 len);
/**
* Create a new instance of an Extract analyzer.
* @param args the \c AnalyzerArgs value which represents the analyzer.
@ -66,6 +74,7 @@ private:
string filename;
int fd;
uint64 limit;
uint64 depth;
};
} // namespace file_analysis

View file

@ -47,16 +47,6 @@ function Files::__add_analyzer%(file_id: string, tag: Files::Tag, args: any%): b
return new Val(result, TYPE_BOOL);
%}
## :bro:see:`Files::add_analyzers_for_mime_type`.
function Files::__add_analyzers_for_mime_type%(file_id: string, mtype: string, args: any%): files_tag_set
%{
using BifType::Record::Files::AnalyzerArgs;
RecordVal* rv = args->AsRecordVal()->CoerceTo(AnalyzerArgs);
Val* analyzers = file_mgr->AddAnalyzersForMIMEType(file_id->CheckString(), mtype->CheckString(), rv);
Unref(rv);
return analyzers;
%}
## :bro:see:`Files::remove_analyzer`.
function Files::__remove_analyzer%(file_id: string, tag: Files::Tag, args: any%): bool
%{
@ -81,13 +71,6 @@ function Files::__analyzer_name%(tag: Files::Tag%) : string
return new StringVal(file_mgr->GetComponentName(tag));
%}
## :bro:see:`Files::register_for_mime_type`.
function Files::__register_for_mime_type%(id: Analyzer::Tag, mt: string%) : bool
%{
bool result = file_mgr->RegisterAnalyzerForMIMEType(id->AsEnumVal(), mt);
return new Val(result, TYPE_BOOL);
%}
module GLOBAL;
## For use within a :bro:see:`get_file_handle` handler to set a unique