FileAnalysis: refactor add/remove/stop BIFs, add BOF triggers/fields.

The add_action, remove_action, and stop BIFs now go through a queue to
ensure that modifications are made at well-defined times and don't end
up invalidating loop iterators.
This commit is contained in:
Jon Siwek 2013-02-28 17:19:16 -06:00
parent 691622b3aa
commit 720858fb36
13 changed files with 517 additions and 169 deletions

View file

@ -18,8 +18,7 @@ export {
const default_reassembly_buffer_size: count = 1024*1024 &redef; const default_reassembly_buffer_size: count = 1024*1024 &redef;
## The default buffer size used for storing the beginning of files. ## The default buffer size used for storing the beginning of files.
# TODO: what's a reasonable default? const default_bof_buffer_size: count = 1024 &redef;
const default_bof_buffer_size: count = 256 &redef;
## The default amount of time file analysis will wait for new file data ## The default amount of time file analysis will wait for new file data
## before giving up. ## before giving up.
@ -27,12 +26,6 @@ export {
#const default_timeout_interval: interval = 2 mins &redef; #const default_timeout_interval: interval = 2 mins &redef;
const default_timeout_interval: interval = 10 sec &redef; const default_timeout_interval: interval = 10 sec &redef;
## The default amount of data that a user is allowed to extract
## from a file to an event with the
## :bro:see:`FileAnalysis::ACTION_DATA_EVENT` action.
## TODO: what's a reasonable default?
const default_data_event_len: count = 1024*1024 &redef;
# Needed a forward declaration for event parameters... # Needed a forward declaration for event parameters...
type Info: record {}; type Info: record {};
@ -88,6 +81,20 @@ export {
## the analysis engine will wait before giving up on it. ## the analysis engine will wait before giving up on it.
timeout_interval: interval &log &default=default_timeout_interval; timeout_interval: interval &log &default=default_timeout_interval;
## The number of bytes at the beginning of a file to save for later
## inspection in *bof_buffer* field of
## :bro:see:`FileAnalysis::ActionResults`.
bof_buffer_size: count &default=default_bof_buffer_size;
## The content of the beginning of a file up to *bof_buffer_size* bytes.
## This is also the buffer that's used for file/mime type detection.
bof_buffer: string &optional;
## An initial guess at file type.
file_type: string &optional;
## An initial guess at mime type.
mime_type: string &optional;
## Actions that have been added to the analysis of this file. ## Actions that have been added to the analysis of this file.
## Not meant to be modified directly by scripts. ## Not meant to be modified directly by scripts.
actions: table[ActionArgs] of ActionResults; actions: table[ActionArgs] of ActionResults;

View file

@ -453,6 +453,7 @@ set(bro_SRCS
file_analysis/InfoTimer.cc file_analysis/InfoTimer.cc
file_analysis/FileID.h file_analysis/FileID.h
file_analysis/Action.h file_analysis/Action.h
file_analysis/ActionSet.cc
file_analysis/Extract.cc file_analysis/Extract.cc
file_analysis/Hash.cc file_analysis/Hash.cc
file_analysis/DataEvent.cc file_analysis/DataEvent.cc

View file

@ -35,13 +35,13 @@ enum Trigger %{
## Raised when the beginning of a file is available and that beginning ## Raised when the beginning of a file is available and that beginning
## is at least the number of bytes indicated by the *bof_buffer_size* ## is at least the number of bytes indicated by the *bof_buffer_size*
## field of :bro:see:`FileAnalysis::Info`. ## field of :bro:see:`FileAnalysis::Info`.
TRIGGER_BOF_BUFFER_AVAIL, TRIGGER_BOF_BUFFER,
## Raised when the mime type of a file is matched based on magic ## Raised when an initial guess at the file/mime type of a file is matched
## numbers. TODO: re-purposing protocols/http/file-ident.sig for ## based on magic numbers. TODO: re-purposing protocols/http/file-ident.sig
## doing this is tricky since the signature engine doesn't expect ## for doing this is tricky since the signature engine doesn't expect
## to be decoupled from connections, so figure out what work needs ## to be decoupled from connections, so figure out what work needs
## done there. ## done there.
TRIGGER_MIME_TYPE, TRIGGER_TYPE,
## Raised when the end of a file is detected. If the file is not ## Raised when the end of a file is detected. If the file is not
## being transferred linearly, then this doesn't have to mean the full ## being transferred linearly, then this doesn't have to mean the full
## file has been transferred. ## file has been transferred.

View file

@ -39,7 +39,9 @@ public:
{ return true; } { return true; }
/** /**
* Subclasses may override this to specifically handle the end of a file. * Subclasses may override this to specifically handle an EOF signal,
* which means no more data is going to be incoming and the action/analyzer
* may be deleted/cleaned up soon.
* @return true if the action is still in a valid state to continue * @return true if the action is still in a valid state to continue
* receiving data/events or false if it's essentially "done". * receiving data/events or false if it's essentially "done".
*/ */
@ -64,6 +66,11 @@ public:
*/ */
RecordVal* Args() const { return args; } RecordVal* Args() const { return args; }
/**
* @return the file_analysis::Info object to which the action is attached.
*/
Info* GetInfo() const { return info; }
/** /**
* @return the action tag equivalent of the 'act' field from the ActionArgs * @return the action tag equivalent of the 'act' field from the ActionArgs
* value \a args. * value \a args.

View file

@ -0,0 +1,186 @@
#include "ActionSet.h"
#include "Info.h"
#include "Action.h"
#include "Extract.h"
#include "DataEvent.h"
#include "Hash.h"
using namespace file_analysis;
// keep in order w/ declared enum values in file_analysis.bif
static ActionInstantiator action_factory[] = {
Extract::Instantiate,
MD5::Instantiate,
SHA1::Instantiate,
SHA256::Instantiate,
DataEvent::Instantiate,
};
static void action_del_func(void* v)
{
delete (Action*) v;
}
ActionSet::ActionSet(Info* arg_info) : info(arg_info)
{
TypeList* t = new TypeList();
t->Append(BifType::Record::FileAnalysis::ActionArgs->Ref());
action_hash = new CompositeHash(t);
Unref(t);
action_map.SetDeleteFunc(action_del_func);
}
ActionSet::~ActionSet()
{
while ( ! mod_queue.empty() )
{
Modification* mod = mod_queue.front();
mod->Abort();
delete mod;
mod_queue.pop();
}
delete action_hash;
}
bool ActionSet::AddAction(RecordVal* args)
{
HashKey* key = GetKey(args);
if ( action_map.Lookup(key) )
{
DBG_LOG(DBG_FILE_ANALYSIS, "Instantiate action %d skipped for file id"
" %s: already exists", Action::ArgsTag(args),
info->GetFileID().c_str());
delete key;
return true;
}
Action* act = InstantiateAction(args);
if ( ! act )
{
delete key;
return false;
}
InsertAction(act, key);
return true;
}
bool ActionSet::QueueAddAction(RecordVal* args)
{
HashKey* key = GetKey(args);
Action* act = InstantiateAction(args);
if ( ! act )
{
delete key;
return false;
}
mod_queue.push(new Add(act, key));
return true;
}
bool ActionSet::Add::Perform(ActionSet* set)
{
if ( set->action_map.Lookup(key) )
{
DBG_LOG(DBG_FILE_ANALYSIS, "Add action %d skipped for file id"
" %s: already exists", act->Tag(),
act->GetInfo()->GetFileID().c_str());
Abort();
return true;
}
set->InsertAction(act, key);
return true;
}
bool ActionSet::RemoveAction(const RecordVal* args)
{
return RemoveAction(Action::ArgsTag(args), GetKey(args));
}
bool ActionSet::RemoveAction(ActionTag tag, HashKey* key)
{
Action* act = (Action*) action_map.Remove(key);
delete key;
if ( ! act )
{
DBG_LOG(DBG_FILE_ANALYSIS, "Skip remove action %d for file id %s",
tag, info->GetFileID().c_str());
return false;
}
DBG_LOG(DBG_FILE_ANALYSIS, "Remove action %d for file id %s", act->Tag(),
info->GetFileID().c_str());
delete act;
return true;
}
bool ActionSet::QueueRemoveAction(const RecordVal* args)
{
HashKey* key = GetKey(args);
ActionTag tag = Action::ArgsTag(args);
mod_queue.push(new Remove(tag, key));
return action_map.Lookup(key);
}
bool ActionSet::Remove::Perform(ActionSet* set)
{
return set->RemoveAction(tag, key);
}
HashKey* ActionSet::GetKey(const RecordVal* args) const
{
HashKey* key = action_hash->ComputeHash(args, 1);
if ( ! key )
reporter->InternalError("ActionArgs type mismatch");
return key;
}
Action* ActionSet::InstantiateAction(RecordVal* args) const
{
Action* act = action_factory[Action::ArgsTag(args)](args, info);
if ( ! act )
{
DBG_LOG(DBG_FILE_ANALYSIS, "Instantiate action %d failed for file id",
" %s", Action::ArgsTag(args), info->GetFileID().c_str());
return 0;
}
return act;
}
void ActionSet::InsertAction(Action* act, HashKey* key)
{
DBG_LOG(DBG_FILE_ANALYSIS, "Add action %d for file id %s", act->Tag(),
info->GetFileID().c_str());
action_map.Insert(key, act);
info->GetVal()->Lookup(Info::actions_idx)->AsTableVal()->Assign(act->Args(),
new RecordVal(BifType::Record::FileAnalysis::ActionResults));
}
void ActionSet::FlushQueuedModifications()
{
if ( mod_queue.empty() ) return;
DBG_LOG(DBG_FILE_ANALYSIS, "Start flushing action mod queue of file id %s",
info->GetFileID().c_str());
do
{
Modification* mod = mod_queue.front();
mod->Perform(this);
delete mod;
mod_queue.pop();
} while ( ! mod_queue.empty() );
DBG_LOG(DBG_FILE_ANALYSIS, "End flushing action mod queue of file id %s",
info->GetFileID().c_str());
}

View file

@ -0,0 +1,104 @@
#ifndef FILE_ANALYSIS_ACTIONSET_H
#define FILE_ANALYSIS_ACTIONSET_H
#include <queue>
#include "Action.h"
#include "Dict.h"
#include "CompHash.h"
#include "Val.h"
namespace file_analysis {
class Info;
declare(PDict,Action);
class ActionSet {
public:
ActionSet(Info* arg_info);
~ActionSet();
/**
* @return true if action was instantiated/attached, else false.
*/
bool AddAction(RecordVal* args);
/**
* @return true if action was able to be instantiated, else false.
*/
bool QueueAddAction(RecordVal* args);
/**
* @return false if action didn't exist and so wasn't removed, else true.
*/
bool RemoveAction(const RecordVal* args);
/**
* @return true if action exists at time of call, else false;
*/
bool QueueRemoveAction(const RecordVal* args);
/**
* Perform all queued modifications to the currently active actions.
*/
void FlushQueuedModifications();
IterCookie* InitForIteration() const
{ return action_map.InitForIteration(); }
Action* NextEntry(IterCookie* c)
{ return action_map.NextEntry(c); }
protected:
HashKey* GetKey(const RecordVal* args) const;
Action* InstantiateAction(RecordVal* args) const;
void InsertAction(Action* act, HashKey* key);
bool RemoveAction(ActionTag tag, HashKey* key);
Info* info;
CompositeHash* action_hash; /**< ActionArgs hashes Action map lookup. */
PDict(Action) action_map; /**< Actions indexed by ActionArgs. */
class Modification {
public:
virtual ~Modification() {}
virtual bool Perform(ActionSet* set) = 0;
virtual void Abort() = 0;
};
class Add : public Modification {
public:
Add(Action* arg_act, HashKey* arg_key)
: Modification(), act(arg_act), key(arg_key) {}
virtual ~Add() {}
virtual bool Perform(ActionSet* set);
virtual void Abort() { delete act; delete key; }
protected:
Action* act;
HashKey* key;
};
class Remove : public Modification {
public:
Remove(ActionTag arg_tag, HashKey* arg_key)
: Modification(), tag(arg_tag), key(arg_key) {}
virtual ~Remove() {}
virtual bool Perform(ActionSet* set);
virtual void Abort() { delete key; }
protected:
ActionTag tag;
HashKey* key;
};
typedef queue<Modification*> ModQueue;
ModQueue mod_queue;
};
} // namespace file_analysiss
#endif

View file

@ -41,8 +41,6 @@ Action* DataEvent::Instantiate(RecordVal* args, Info* info)
bool DataEvent::DeliverChunk(const u_char* data, uint64 len, uint64 offset) bool DataEvent::DeliverChunk(const u_char* data, uint64 len, uint64 offset)
{ {
Action::DeliverChunk(data, len, offset);
if ( ! chunk_event ) return true; if ( ! chunk_event ) return true;
val_list* args = new val_list; val_list* args = new val_list;
@ -56,8 +54,6 @@ bool DataEvent::DeliverChunk(const u_char* data, uint64 len, uint64 offset)
bool DataEvent::DeliverStream(const u_char* data, uint64 len) bool DataEvent::DeliverStream(const u_char* data, uint64 len)
{ {
Action::DeliverStream(data, len);
if ( ! stream_event ) return true; if ( ! stream_event ) return true;
val_list* args = new val_list; val_list* args = new val_list;

View file

@ -38,8 +38,6 @@ Action* Extract::Instantiate(RecordVal* args, Info* info)
bool Extract::DeliverChunk(const u_char* data, uint64 len, uint64 offset) bool Extract::DeliverChunk(const u_char* data, uint64 len, uint64 offset)
{ {
Action::DeliverChunk(data, len, offset);
if ( ! fd ) return false; if ( ! fd ) return false;
safe_pwrite(fd, data, len, offset); safe_pwrite(fd, data, len, offset);

View file

@ -16,15 +16,11 @@ Hash::Hash(RecordVal* args, Info* info, HashVal* hv, const char* field)
Hash::~Hash() Hash::~Hash()
{ {
// maybe it's all there...
Finalize();
Unref(hash); Unref(hash);
} }
bool Hash::DeliverStream(const u_char* data, uint64 len) bool Hash::DeliverStream(const u_char* data, uint64 len)
{ {
Action::DeliverStream(data, len);
if ( ! hash->IsValid() ) return false; if ( ! hash->IsValid() ) return false;
hash->Feed(data, len); hash->Feed(data, len);
@ -33,7 +29,6 @@ bool Hash::DeliverStream(const u_char* data, uint64 len)
bool Hash::EndOfFile() bool Hash::EndOfFile()
{ {
Action::EndOfFile();
Finalize(); Finalize();
return false; return false;
} }

View file

@ -3,26 +3,13 @@
#include "Info.h" #include "Info.h"
#include "InfoTimer.h" #include "InfoTimer.h"
#include "FileID.h" #include "FileID.h"
#include "Manager.h"
#include "Reporter.h" #include "Reporter.h"
#include "Val.h" #include "Val.h"
#include "Type.h" #include "Type.h"
#include "Action.h"
#include "Extract.h"
#include "Hash.h"
#include "DataEvent.h"
using namespace file_analysis; using namespace file_analysis;
// keep in order w/ declared enum values in file_analysis.bif
static ActionInstantiator action_factory[] = {
Extract::Instantiate,
MD5::Instantiate,
SHA1::Instantiate,
SHA256::Instantiate,
DataEvent::Instantiate,
};
static TableVal* empty_conn_id_set() static TableVal* empty_conn_id_set()
{ {
TypeList* set_index = new TypeList(conn_id); TypeList* set_index = new TypeList(conn_id);
@ -58,6 +45,10 @@ int Info::total_bytes_idx = -1;
int Info::missing_bytes_idx = -1; int Info::missing_bytes_idx = -1;
int Info::overflow_bytes_idx = -1; int Info::overflow_bytes_idx = -1;
int Info::timeout_interval_idx = -1; int Info::timeout_interval_idx = -1;
int Info::bof_buffer_size_idx = -1;
int Info::bof_buffer_idx = -1;
int Info::file_type_idx = -1;
int Info::mime_type_idx = -1;
int Info::actions_idx = -1; int Info::actions_idx = -1;
void Info::InitFieldIndices() void Info::InitFieldIndices()
@ -73,17 +64,17 @@ void Info::InitFieldIndices()
missing_bytes_idx = Idx("missing_bytes"); missing_bytes_idx = Idx("missing_bytes");
overflow_bytes_idx = Idx("overflow_bytes"); overflow_bytes_idx = Idx("overflow_bytes");
timeout_interval_idx = Idx("timeout_interval"); timeout_interval_idx = Idx("timeout_interval");
bof_buffer_size_idx = Idx("bof_buffer_size");
bof_buffer_idx = Idx("bof_buffer");
file_type_idx = Idx("file_type");
mime_type_idx = Idx("mime_type");
actions_idx = Idx("actions"); actions_idx = Idx("actions");
} }
static void action_del_func(void* v)
{
delete (Action*) v;
}
Info::Info(const string& unique, Connection* conn, const string& protocol) Info::Info(const string& unique, Connection* conn, const string& protocol)
: file_id(unique), unique(unique), val(0), last_activity_time(network_time), : file_id(unique), unique(unique), val(0), last_activity_time(network_time),
postpone_timeout(false), need_reassembly(false) postpone_timeout(false), need_reassembly(false), done(false),
actions(this)
{ {
InitFieldIndices(); InitFieldIndices();
@ -96,24 +87,15 @@ Info::Info(const string& unique, Connection* conn, const string& protocol)
val->Assign(file_id_idx, new StringVal(id)); val->Assign(file_id_idx, new StringVal(id));
file_id = FileID(id); file_id = FileID(id);
TypeList* t = new TypeList();
t->Append(BifType::Record::FileAnalysis::ActionArgs->Ref());
action_hash = new CompositeHash(t);
Unref(t);
action_map.SetDeleteFunc(action_del_func);
UpdateConnectionFields(conn); UpdateConnectionFields(conn);
if ( protocol != "" ) if ( protocol != "" )
val->Assign(protocol_idx, new StringVal(protocol.c_str())); val->Assign(protocol_idx, new StringVal(protocol.c_str()));
ScheduleInactivityTimer();
} }
Info::~Info() Info::~Info()
{ {
DBG_LOG(DBG_FILE_ANALYSIS, "Destroying Info object %s", file_id.c_str()); DBG_LOG(DBG_FILE_ANALYSIS, "Destroying Info object %s", file_id.c_str());
delete action_hash;
Unref(val); Unref(val);
} }
@ -203,89 +185,90 @@ void Info::ScheduleInactivityTimer() const
bool Info::AddAction(RecordVal* args) bool Info::AddAction(RecordVal* args)
{ {
HashKey* key = action_hash->ComputeHash(args, 1); return done ? false : actions.QueueAddAction(args);
if ( ! key )
reporter->InternalError("ActionArgs type mismatch in add_action");
Action* act = action_map.Lookup(key);
if ( act )
{
DBG_LOG(DBG_FILE_ANALYSIS, "Add action %d skipped for already active"
" action on file id %s", act->Tag(), file_id.c_str());
delete key;
return false;
}
act = action_factory[Action::ArgsTag(args)](args, this);
if ( ! act )
{
DBG_LOG(DBG_FILE_ANALYSIS, "Failed to instantiate action %d"
" on file id %s", Action::ArgsTag(args), file_id.c_str());
delete key;
return false;
}
DBG_LOG(DBG_FILE_ANALYSIS, "Add action %d for file id %s", act->Tag(),
file_id.c_str());
action_map.Insert(key, act);
val->Lookup(actions_idx)->AsTableVal()->Assign(args,
new RecordVal(BifType::Record::FileAnalysis::ActionResults));
return true;
}
void Info::ScheduleRemoval(const Action* act)
{
removing.push_back(act->Args());
}
void Info::DoActionRemoval()
{
ActionArgList::iterator it;
for ( it = removing.begin(); it != removing.end(); ++it )
RemoveAction(*it);
removing.clear();
} }
bool Info::RemoveAction(const RecordVal* args) bool Info::RemoveAction(const RecordVal* args)
{ {
HashKey* key = action_hash->ComputeHash(args, 1); return done ? false : actions.QueueRemoveAction(args);
}
if ( ! key ) bool Info::BufferBOF(const u_char* data, uint64 len)
reporter->InternalError("ActionArgs type mismatch in remove_action");
Action* act = (Action*) action_map.Remove(key);
delete key;
if ( ! act )
{ {
DBG_LOG(DBG_FILE_ANALYSIS, "Skip remove action %d for file id %s", if ( bof_buffer.full || bof_buffer.replayed ) return false;
Action::ArgsTag(args), file_id.c_str());
using BifEnum::FileAnalysis::TRIGGER_BOF;
using BifEnum::FileAnalysis::TRIGGER_BOF_BUFFER;
if ( bof_buffer.chunks.size() == 0 )
Manager::EvaluatePolicy(TRIGGER_BOF, this);
if ( ! data )
{
// A gap means we're done seeing as much as the start of the file
// as possible, replay anything that we have
bof_buffer.full = true;
ReplayBOF();
// TODO: libmagic stuff
return false; return false;
} }
DBG_LOG(DBG_FILE_ANALYSIS, "Remove action %d for file id %s", act->Tag(), uint64 desired_size = LookupFieldDefaultCount(bof_buffer_size_idx);
file_id.c_str());
delete act; // If no buffer is desired or if the first chunk satisfies desired size,
// just do everything we need with the first chunk without copying.
if ( desired_size == 0 ||
(bof_buffer.chunks.empty() && len >= desired_size) )
{
bof_buffer.full = bof_buffer.replayed = true;
val->Assign(bof_buffer_idx, new StringVal(new BroString(data, len, 0)));
Manager::EvaluatePolicy(TRIGGER_BOF_BUFFER, this);
// TODO: libmagic stuff
return false;
}
bof_buffer.chunks.push_back(new BroString(data, len, 0));
bof_buffer.size += len;
if ( bof_buffer.size >= desired_size )
{
bof_buffer.full = true;
// TODO: libmagic stuff
ReplayBOF();
}
return true; return true;
} }
void Info::ReplayBOF()
{
if ( bof_buffer.replayed ) return;
bof_buffer.replayed = true;
val->Assign(bof_buffer_idx, new StringVal(concatenate(bof_buffer.chunks)));
using BifEnum::FileAnalysis::TRIGGER_BOF_BUFFER;
Manager::EvaluatePolicy(TRIGGER_BOF_BUFFER, this);
for ( size_t i = 0; i < bof_buffer.chunks.size(); ++i )
DataIn(bof_buffer.chunks[i]->Bytes(), bof_buffer.chunks[i]->Len());
}
void Info::DataIn(const u_char* data, uint64 len, uint64 offset) void Info::DataIn(const u_char* data, uint64 len, uint64 offset)
{ {
Action* act = 0; actions.FlushQueuedModifications();
IterCookie* c = action_map.InitForIteration(); // TODO: attempt libmagic stuff here before doing reassembly?
while ( (act = action_map.NextEntry(c)) ) Action* act = 0;
IterCookie* c = actions.InitForIteration();
while ( (act = actions.NextEntry(c)) )
{ {
if ( ! act->DeliverChunk(data, len, offset) ) if ( ! act->DeliverChunk(data, len, offset) )
ScheduleRemoval(act); actions.QueueRemoveAction(act->Args());
} }
DoActionRemoval(); actions.FlushQueuedModifications();
// TODO: check reassembly requirement based on buffer size in record // TODO: check reassembly requirement based on buffer size in record
if ( need_reassembly ) if ( need_reassembly )
@ -293,62 +276,73 @@ void Info::DataIn(const u_char* data, uint64 len, uint64 offset)
// TODO // TODO
} }
// TODO: reassembly stuff, possibly having to deliver chunks if buffer full // TODO: reassembly overflow stuff, increment overflow count, eval trigger
// and incrememt overflow bytes
IncrementByteCount(len, seen_bytes_idx); IncrementByteCount(len, seen_bytes_idx);
} }
void Info::DataIn(const u_char* data, uint64 len) void Info::DataIn(const u_char* data, uint64 len)
{ {
Action* act = 0; actions.FlushQueuedModifications();
IterCookie* c = action_map.InitForIteration(); if ( BufferBOF(data, len) ) return;
while ( (act = action_map.NextEntry(c)) ) Action* act = 0;
IterCookie* c = actions.InitForIteration();
while ( (act = actions.NextEntry(c)) )
{ {
if ( ! act->DeliverStream(data, len) ) if ( ! act->DeliverStream(data, len) )
{ {
ScheduleRemoval(act); actions.QueueRemoveAction(act->Args());
continue; continue;
} }
uint64 offset = LookupFieldDefaultCount(seen_bytes_idx) + uint64 offset = LookupFieldDefaultCount(seen_bytes_idx) +
LookupFieldDefaultCount(missing_bytes_idx); LookupFieldDefaultCount(missing_bytes_idx);
if ( ! act->DeliverChunk(data, len, offset) ) if ( ! act->DeliverChunk(data, len, offset) )
ScheduleRemoval(act); actions.QueueRemoveAction(act->Args());
} }
DoActionRemoval(); actions.FlushQueuedModifications();
IncrementByteCount(len, seen_bytes_idx); IncrementByteCount(len, seen_bytes_idx);
} }
void Info::EndOfFile() void Info::EndOfFile()
{ {
Action* act = 0; if ( done ) return;
IterCookie* c = action_map.InitForIteration(); done = true;
actions.FlushQueuedModifications();
while ( (act = action_map.NextEntry(c)) ) // send along anything that's been buffered, but never flushed
ReplayBOF();
Action* act = 0;
IterCookie* c = actions.InitForIteration();
while ( (act = actions.NextEntry(c)) )
{ {
if ( ! act->EndOfFile() ) if ( ! act->EndOfFile() )
ScheduleRemoval(act); actions.QueueRemoveAction(act->Args());
} }
DoActionRemoval(); actions.FlushQueuedModifications();
} }
void Info::Gap(uint64 offset, uint64 len) void Info::Gap(uint64 offset, uint64 len)
{ {
Action* act = 0; actions.FlushQueuedModifications();
IterCookie* c = action_map.InitForIteration(); if ( BufferBOF(0, len) ) return;
while ( (act = action_map.NextEntry(c)) ) Action* act = 0;
IterCookie* c = actions.InitForIteration();
while ( (act = actions.NextEntry(c)) )
{ {
if ( ! act->Undelivered(offset, len) ) if ( ! act->Undelivered(offset, len) )
ScheduleRemoval(act); actions.QueueRemoveAction(act->Args());
} }
DoActionRemoval(); actions.FlushQueuedModifications();
IncrementByteCount(len, missing_bytes_idx); IncrementByteCount(len, missing_bytes_idx);
} }

View file

@ -2,19 +2,16 @@
#define FILE_ANALYSIS_INFO_H #define FILE_ANALYSIS_INFO_H
#include <string> #include <string>
#include <list> #include <vector>
#include "CompHash.h"
#include "Dict.h"
#include "Conn.h" #include "Conn.h"
#include "Val.h" #include "Val.h"
#include "Action.h" #include "ActionSet.h"
#include "FileID.h" #include "FileID.h"
#include "BroString.h"
namespace file_analysis { namespace file_analysis {
declare(PDict,Action);
/** /**
* Wrapper class around \c FileAnalysis::Info record values from script layer. * Wrapper class around \c FileAnalysis::Info record values from script layer.
*/ */
@ -80,15 +77,15 @@ public:
void ScheduleInactivityTimer() const; void ScheduleInactivityTimer() const;
/** /**
* Attaches an action. Only one action per type can be attached at a time, * Queues attaching an action. Only one action per type can be attached at
* unless the arguments differ. * a time unless the arguments differ.
* @return true if the action was attached, else false. * @return false if action can't be instantiated, else true.
*/ */
bool AddAction(RecordVal* args); bool AddAction(RecordVal* args);
/** /**
* Removes an action. * Queues removal of an action.
* @return true if the action was removed, else false. * @return true if action was active at time of call, else false.
*/ */
bool RemoveAction(const RecordVal* args); bool RemoveAction(const RecordVal* args);
@ -146,15 +143,16 @@ protected:
double LookupFieldDefaultInterval(int idx) const; double LookupFieldDefaultInterval(int idx) const;
/** /**
* Adds file_analysis::Action associated with \a args to list of actions * Buffers incoming data at the beginning of a file. If \a data is a null
* to remove, #removing. * pointer, that signifies a gap and the buffering cannot continue.
* @return true if buffering is still required, else false
*/ */
void ScheduleRemoval(const Action* act); bool BufferBOF(const u_char* data, uint64 len);
/** /**
* Deletes/removes all actions in #removing. * Forward any beginning-of-file buffered data on to DataIn stream.
*/ */
void DoActionRemoval(); void ReplayBOF();
FileID file_id; /**< A pretty hash that likely identifies file*/ FileID file_id; /**< A pretty hash that likely identifies file*/
string unique; /**< A string that uniquely identifies file */ string unique; /**< A string that uniquely identifies file */
@ -162,10 +160,19 @@ protected:
double last_activity_time; /**< Time of last activity. */ double last_activity_time; /**< Time of last activity. */
bool postpone_timeout; /**< Whether postponing timeout is requested. */ bool postpone_timeout; /**< Whether postponing timeout is requested. */
bool need_reassembly; /**< Whether file stream reassembly is needed. */ bool need_reassembly; /**< Whether file stream reassembly is needed. */
CompositeHash* action_hash;/**< ActionArgs hashes Action map lookup. */ bool done; /**< If this object is about to be deleted. */
PDict(Action) action_map; /**< Actions indexed by ActionArgs. */ ActionSet actions;
typedef list<const RecordVal*> ActionArgList;
ActionArgList removing; /**< Actions pending removal. */ struct BOF_Buffer {
BOF_Buffer() : full(false), replayed(false), size(0) {}
~BOF_Buffer()
{ for ( size_t i = 0; i < chunks.size(); ++i ) delete chunks[i]; }
bool full;
bool replayed;
uint64 size;
BroString::CVec chunks;
} bof_buffer; /**< Beginning of file buffer. */
/** /**
* @return the field offset in #val record corresponding to \a field_name. * @return the field offset in #val record corresponding to \a field_name.
@ -177,6 +184,7 @@ protected:
*/ */
static void InitFieldIndices(); static void InitFieldIndices();
public:
static int file_id_idx; static int file_id_idx;
static int parent_file_id_idx; static int parent_file_id_idx;
static int protocol_idx; static int protocol_idx;
@ -187,6 +195,10 @@ protected:
static int missing_bytes_idx; static int missing_bytes_idx;
static int overflow_bytes_idx; static int overflow_bytes_idx;
static int timeout_interval_idx; static int timeout_interval_idx;
static int bof_buffer_size_idx;
static int bof_buffer_idx;
static int file_type_idx;
static int mime_type_idx;
static int actions_idx; static int actions_idx;
}; };

View file

@ -29,8 +29,8 @@ static void check_file_done(Info* info)
{ {
if ( info->IsComplete() ) if ( info->IsComplete() )
{ {
Manager::EvaluatePolicy(BifEnum::FileAnalysis::TRIGGER_DONE, info);
file_mgr->RemoveFile(info->GetFileID()); file_mgr->RemoveFile(info->GetFileID());
Manager::EvaluatePolicy(BifEnum::FileAnalysis::TRIGGER_DONE, info);
} }
} }
@ -40,6 +40,7 @@ void Manager::DataIn(const string& unique, const u_char* data, uint64 len,
Info* info = GetInfo(unique, conn, protocol); Info* info = GetInfo(unique, conn, protocol);
info->DataIn(data, len, offset); info->DataIn(data, len, offset);
check_file_done(info); check_file_done(info);
DoRemoveFiles();
} }
void Manager::DataIn(const string& unique, const u_char* data, uint64 len, void Manager::DataIn(const string& unique, const u_char* data, uint64 len,
@ -48,6 +49,7 @@ void Manager::DataIn(const string& unique, const u_char* data, uint64 len,
Info* info = GetInfo(unique, conn, protocol); Info* info = GetInfo(unique, conn, protocol);
info->DataIn(data, len); info->DataIn(data, len);
check_file_done(info); check_file_done(info);
DoRemoveFiles();
} }
void Manager::EndOfFile(const string& unique, Connection* conn, void Manager::EndOfFile(const string& unique, Connection* conn,
@ -56,6 +58,7 @@ void Manager::EndOfFile(const string& unique, Connection* conn,
Info* info = GetInfo(unique, conn, protocol); Info* info = GetInfo(unique, conn, protocol);
info->EndOfFile(); info->EndOfFile();
Manager::EvaluatePolicy(BifEnum::FileAnalysis::TRIGGER_EOF, info); Manager::EvaluatePolicy(BifEnum::FileAnalysis::TRIGGER_EOF, info);
DoRemoveFiles();
} }
void Manager::Gap(const string& unique, uint64 offset, uint64 len, void Manager::Gap(const string& unique, uint64 offset, uint64 len,
@ -64,6 +67,7 @@ void Manager::Gap(const string& unique, uint64 offset, uint64 len,
Info* info = GetInfo(unique, conn, protocol); Info* info = GetInfo(unique, conn, protocol);
info->Gap(offset, len); info->Gap(offset, len);
Manager::EvaluatePolicy(BifEnum::FileAnalysis::TRIGGER_GAP, info); Manager::EvaluatePolicy(BifEnum::FileAnalysis::TRIGGER_GAP, info);
DoRemoveFiles();
} }
void Manager::SetSize(const string& unique, uint64 size, void Manager::SetSize(const string& unique, uint64 size,
@ -72,6 +76,7 @@ void Manager::SetSize(const string& unique, uint64 size,
Info* info = GetInfo(unique, conn, protocol); Info* info = GetInfo(unique, conn, protocol);
info->SetTotalBytes(size); info->SetTotalBytes(size);
check_file_done(info); check_file_done(info);
DoRemoveFiles();
} }
void Manager::EvaluatePolicy(BifEnum::FileAnalysis::Trigger t, Info* info) void Manager::EvaluatePolicy(BifEnum::FileAnalysis::Trigger t, Info* info)
@ -131,11 +136,12 @@ Info* Manager::GetInfo(const string& unique, Connection* conn,
if ( id_map[id] ) if ( id_map[id] )
{ {
reporter->Error("Evicted duplicate file ID: %s", id.c_str()); reporter->Error("Evicted duplicate file ID: %s", id.c_str());
RemoveFile(id); DoRemoveFile(id);
} }
id_map[id] = rval; id_map[id] = rval;
Manager::EvaluatePolicy(BifEnum::FileAnalysis::TRIGGER_NEW, rval); Manager::EvaluatePolicy(BifEnum::FileAnalysis::TRIGGER_NEW, rval);
rval->ScheduleInactivityTimer();
} }
else else
{ {
@ -175,7 +181,24 @@ void Manager::Timeout(const FileID& file_id, bool is_terminating)
DBG_LOG(DBG_FILE_ANALYSIS, "File analysis timeout for %s", DBG_LOG(DBG_FILE_ANALYSIS, "File analysis timeout for %s",
info->GetFileID().c_str()); info->GetFileID().c_str());
RemoveFile(file_id); DoRemoveFile(file_id);
}
bool Manager::DoRemoveFile(const FileID& file_id)
{
IDMap::iterator it = id_map.find(file_id);
if ( it == id_map.end() ) return false;
if ( ! str_map.erase(it->second->GetUnique()) )
reporter->Error("No string mapping for file ID %s", file_id.c_str());
DBG_LOG(DBG_FILE_ANALYSIS, "Remove FileID %s", it->first.c_str());
it->second->EndOfFile();
delete it->second;
id_map.erase(it);
return true;
} }
bool Manager::RemoveFile(const FileID& file_id) bool Manager::RemoveFile(const FileID& file_id)
@ -184,9 +207,18 @@ bool Manager::RemoveFile(const FileID& file_id)
if ( it == id_map.end() ) return false; if ( it == id_map.end() ) return false;
if ( ! str_map.erase(it->second->GetUnique()) ) DBG_LOG(DBG_FILE_ANALYSIS, "Queue removal of FileID %s",
reporter->Error("No string mapping for file ID %s", file_id.c_str()); it->first.c_str());
delete it->second;
id_map.erase(it); it->second->EndOfFile();
removing.push_back(it->first);
return true; return true;
} }
void Manager::DoRemoveFiles()
{
IDList::iterator it;
for ( it = removing.begin(); it != removing.end(); ++it )
DoRemoveFile(*it);
removing.clear();
}

View file

@ -3,6 +3,7 @@
#include <string> #include <string>
#include <map> #include <map>
#include <list>
#include "Net.h" #include "Net.h"
#include "Conn.h" #include "Conn.h"
@ -61,7 +62,9 @@ public:
const string& protocol = ""); const string& protocol = "");
/** /**
* Discard the file_analysis::Info object associated with \a file_id. * Queue the file_analysis::Info object associated with \a file_id to
* be discarded. It will be discarded at the end of DataIn, EndOfFile, Gap,
* or SetSize functions.
* @return false if file identifier did not map to anything, else true. * @return false if file identifier did not map to anything, else true.
*/ */
bool RemoveFile(const FileID& file_id); bool RemoveFile(const FileID& file_id);
@ -73,16 +76,16 @@ public:
bool PostponeTimeout(const FileID& file_id) const; bool PostponeTimeout(const FileID& file_id) const;
/** /**
* Attaches an action to the file identifier. Multiple actions of a given * Queue attachment of an action to the file identifier. Multiple actions
* type can be attached per file identifier at a time as long as the * of a given type can be attached per file identifier at a time as long as
* arguments differ. * the arguments differ.
* @return true if the action was attached, else false. * @return false if the action failed to be instantiated, else true.
*/ */
bool AddAction(const FileID& file_id, RecordVal* args) const; bool AddAction(const FileID& file_id, RecordVal* args) const;
/** /**
* Removes an action for a given file identifier. * Queue removal of an action for a given file identifier.
* @return true if the action was removed, else false. * @return true if the action is active at the time of call, else false.
*/ */
bool RemoveAction(const FileID& file_id, const RecordVal* args) const; bool RemoveAction(const FileID& file_id, const RecordVal* args) const;
@ -97,6 +100,7 @@ protected:
typedef map<string, Info*> StrMap; typedef map<string, Info*> StrMap;
typedef map<FileID, Info*> IDMap; typedef map<FileID, Info*> IDMap;
typedef list<FileID> IDList;
/** /**
* @return the Info object mapped to \a unique. One is created if mapping * @return the Info object mapped to \a unique. One is created if mapping
@ -118,8 +122,20 @@ protected:
*/ */
void Timeout(const FileID& file_id, bool is_terminating = ::terminating); void Timeout(const FileID& file_id, bool is_terminating = ::terminating);
/**
* Immediately remove file_analysis::Info object associated with \a file_id.
* @return false if file identifier did not map to anything, else true.
*/
bool DoRemoveFile(const FileID& file_id);
/**
* Clean up all pending file analysis for file IDs in #removing.
*/
void DoRemoveFiles();
StrMap str_map; /**< Map unique strings to \c FileAnalysis::Info records. */ StrMap str_map; /**< Map unique strings to \c FileAnalysis::Info records. */
IDMap id_map; /**< Map file IDs to \c FileAnalysis::Info records. */ IDMap id_map; /**< Map file IDs to \c FileAnalysis::Info records. */
IDList removing;/**< File IDs that are about to be removed. */
}; };
} // namespace file_analysis } // namespace file_analysis