Refactor how file analysis actions are tracked.

The Info record now uses a "table[ActionArgs] of ActionResults", which
allows for simultaneous actions of a given type as long as other args
(fields in the ActionArgs record) are different.
This commit is contained in:
Jon Siwek 2013-02-25 16:35:42 -06:00
parent 4b30cc2e24
commit 691622b3aa
13 changed files with 233 additions and 158 deletions

View file

@ -37,6 +37,7 @@ export {
type Info: record {};
type ActionArgs: record {
act: Action;
extract_filename: string &optional;
chunk_event: event(info: Info, data: string, off: count) &optional;
stream_event: event(info: Info, data: string) &optional;
@ -88,13 +89,8 @@ export {
timeout_interval: interval &log &default=default_timeout_interval;
## Actions that have been added to the analysis of this file.
actions: vector of Action &default=vector();
## The corresponding arguments supplied to each element of *actions*.
action_args: vector of ActionArgs &default=vector();
## Some actions may directly yield results in this record.
action_results: ActionResults;
## Not meant to be modified directly by scripts.
actions: table[ActionArgs] of ActionResults;
} &redef;
## TODO: document

View file

@ -71,25 +71,23 @@ function FileAnalysis::postpone_timeout%(file_id: string%): bool
return new Val(result, TYPE_BOOL);
%}
function FileAnalysis::add_action%(file_id: string,
action: FileAnalysis::Action,
args: any%): bool
function FileAnalysis::add_action%(file_id: string, args: any%): bool
%{
using file_analysis::FileID;
using BifType::Record::FileAnalysis::ActionArgs;
RecordVal* rv = args->AsRecordVal()->CoerceTo(ActionArgs);
bool result = file_mgr->AddAction(FileID(file_id->CheckString()),
action->AsEnumVal(), rv);
bool result = file_mgr->AddAction(FileID(file_id->CheckString()), rv);
Unref(rv);
return new Val(result, TYPE_BOOL);
%}
function FileAnalysis::remove_action%(file_id: string,
action: FileAnalysis::Action%): bool
function FileAnalysis::remove_action%(file_id: string, args: any%): bool
%{
using file_analysis::FileID;
bool result = file_mgr->RemoveAction(FileID(file_id->CheckString()),
action->AsEnumVal());
using BifType::Record::FileAnalysis::ActionArgs;
RecordVal* rv = args->AsRecordVal()->CoerceTo(ActionArgs);
bool result = file_mgr->RemoveAction(FileID(file_id->CheckString()), rv);
Unref(rv);
return new Val(result, TYPE_BOOL);
%}

View file

@ -16,7 +16,11 @@ class Info;
class Action {
public:
virtual ~Action() {}
virtual ~Action()
{
DBG_LOG(DBG_FILE_ANALYSIS, "Destroy action %d", tag);
Unref(args);
}
/**
* Subclasses may override this to receive file data non-sequentially.
@ -50,17 +54,40 @@ public:
virtual bool Undelivered(uint64 offset, uint64 len)
{ return true; }
/**
* @return the action type enum value.
*/
ActionTag Tag() const { return tag; }
/**
* @return the ActionArgs associated with the aciton.
*/
RecordVal* Args() const { return args; }
/**
* @return the action tag equivalent of the 'act' field from the ActionArgs
* value \a args.
*/
static ActionTag ArgsTag(const RecordVal* args)
{
using BifType::Record::FileAnalysis::ActionArgs;
return static_cast<ActionTag>(
args->Lookup(ActionArgs->FieldOffset("act"))->AsEnum());
}
protected:
Action(Info* arg_info, ActionTag arg_tag) : info(arg_info), tag(arg_tag) {}
Action(RecordVal* arg_args, Info* arg_info)
: tag(Action::ArgsTag(arg_args)), args(arg_args->Ref()->AsRecordVal()),
info(arg_info)
{}
Info* info;
ActionTag tag;
RecordVal* args;
Info* info;
};
typedef Action* (*ActionInstantiator)(const RecordVal* args, Info* info);
typedef Action* (*ActionInstantiator)(RecordVal* args, Info* info);
} // namespace file_analysis

View file

@ -7,13 +7,13 @@
using namespace file_analysis;
DataEvent::DataEvent(Info* arg_info, EventHandlerPtr ce, EventHandlerPtr se)
: Action(arg_info, BifEnum::FileAnalysis::ACTION_DATA_EVENT),
chunk_event(ce), stream_event(se)
DataEvent::DataEvent(RecordVal* args, Info* info,
EventHandlerPtr ce, EventHandlerPtr se)
: Action(args, info), chunk_event(ce), stream_event(se)
{
}
Action* DataEvent::Instantiate(const RecordVal* args, Info* info)
Action* DataEvent::Instantiate(RecordVal* args, Info* info)
{
using BifType::Record::FileAnalysis::ActionArgs;
@ -36,7 +36,7 @@ Action* DataEvent::Instantiate(const RecordVal* args, Info* info)
if ( stream_val )
stream = event_registry->Lookup(stream_val->AsFunc()->GetID()->Name());
return new DataEvent(info, chunk, stream);
return new DataEvent(args, info, chunk, stream);
}
bool DataEvent::DeliverChunk(const u_char* data, uint64 len, uint64 offset)

View file

@ -15,7 +15,7 @@ namespace file_analysis {
class DataEvent : public Action {
public:
static Action* Instantiate(const RecordVal* args, Info* info);
static Action* Instantiate(RecordVal* args, Info* info);
virtual bool DeliverChunk(const u_char* data, uint64 len, uint64 offset);
@ -23,7 +23,8 @@ public:
protected:
DataEvent(Info* arg_info, EventHandlerPtr ce, EventHandlerPtr se);
DataEvent(RecordVal* args, Info* info,
EventHandlerPtr ce, EventHandlerPtr se);
EventHandlerPtr chunk_event;
EventHandlerPtr stream_event;

View file

@ -5,9 +5,8 @@
using namespace file_analysis;
Extract::Extract(Info* arg_info, const string& arg_filename)
: Action(arg_info, BifEnum::FileAnalysis::ACTION_EXTRACT),
filename(arg_filename)
Extract::Extract(RecordVal* args, Info* info, const string& arg_filename)
: Action(args, info), filename(arg_filename)
{
fd = open(filename.c_str(), O_WRONLY | O_CREAT | O_TRUNC, 0666);
@ -26,15 +25,15 @@ Extract::~Extract()
safe_close(fd);
}
Action* Extract::Instantiate(const RecordVal* args, Info* info)
Action* Extract::Instantiate(RecordVal* args, Info* info)
{
using BifType::Record::FileAnalysis::ActionArgs;
const char* field = "extract_filename";
int off = BifType::Record::FileAnalysis::ActionArgs->FieldOffset(field);
Val* v = args->Lookup(off);
Val* v = args->Lookup(ActionArgs->FieldOffset(field));
if ( ! v ) return 0;
return new Extract(info, v->AsString()->CheckString());
return new Extract(args, info, v->AsString()->CheckString());
}
bool Extract::DeliverChunk(const u_char* data, uint64 len, uint64 offset)

View file

@ -15,7 +15,7 @@ namespace file_analysis {
class Extract : public Action {
public:
static Action* Instantiate(const RecordVal* args, Info* info);
static Action* Instantiate(RecordVal* args, Info* info);
virtual ~Extract();
@ -23,7 +23,7 @@ public:
protected:
Extract(Info* arg_info, const string& arg_filename);
Extract(RecordVal* args, Info* info, const string& arg_filename);
string filename;
int fd;

View file

@ -5,9 +5,12 @@
using namespace file_analysis;
Hash::Hash(Info* arg_info, ActionTag tag, HashVal* hv)
: Action(arg_info, tag), hash(hv)
Hash::Hash(RecordVal* args, Info* info, HashVal* hv, const char* field)
: Action(args, info), hash(hv)
{
using BifType::Record::FileAnalysis::ActionResults;
if ( (result_field_idx = ActionResults->FieldOffset(field)) < 0 )
reporter->InternalError("Missing ActionResults field: %s", field);
hash->Init();
}
@ -15,7 +18,7 @@ Hash::~Hash()
{
// maybe it's all there...
Finalize();
delete hash;
Unref(hash);
}
bool Hash::DeliverStream(const u_char* data, uint64 len)
@ -45,10 +48,5 @@ void Hash::Finalize()
if ( ! hash->IsValid() ) return;
StringVal* sv = hash->Get();
int i = GetResultFieldOffset();
if ( i < 0 )
reporter->InternalError("Hash Action result field not found");
info->GetResults()->Assign(i, sv);
info->GetResults(args)->Assign(result_field_idx, sv);
}

View file

@ -26,61 +26,51 @@ public:
protected:
Hash(Info* arg_info, ActionTag arg_tag, HashVal* hv);
Hash(RecordVal* args, Info* info, HashVal* hv, const char* field);
void Finalize();
virtual int GetResultFieldOffset() const = 0;
HashVal* hash;
int result_field_idx;
};
class MD5 : public Hash {
public:
static Action* Instantiate(const RecordVal* args, Info* info)
{ return new MD5(info); }
static Action* Instantiate(RecordVal* args, Info* info)
{ return new MD5(args, info); }
protected:
MD5(Info* arg_info)
: Hash(arg_info, BifEnum::FileAnalysis::ACTION_MD5, new MD5Val()) {}
virtual int GetResultFieldOffset() const
{ return BifType::Record::FileAnalysis::ActionResults->
FieldOffset("md5"); }
MD5(RecordVal* args, Info* info)
: Hash(args, info, new MD5Val(), "md5")
{}
};
class SHA1 : public Hash {
public:
static Action* Instantiate(const RecordVal* args, Info* info)
{ return new SHA1(info); }
static Action* Instantiate(RecordVal* args, Info* info)
{ return new SHA1(args, info); }
protected:
SHA1(Info* arg_info)
: Hash(arg_info, BifEnum::FileAnalysis::ACTION_SHA1, new SHA1Val()) {}
virtual int GetResultFieldOffset() const
{ return BifType::Record::FileAnalysis::ActionResults->
FieldOffset("sha1"); }
SHA1(RecordVal* args, Info* info)
: Hash(args, info, new SHA1Val(), "sha1")
{}
};
class SHA256 : public Hash {
public:
static Action* Instantiate(const RecordVal* args, Info* info)
{ return new SHA256(info); }
static Action* Instantiate(RecordVal* args, Info* info)
{ return new SHA256(args, info); }
protected:
SHA256(Info* arg_info)
: Hash(arg_info, BifEnum::FileAnalysis::ACTION_SHA256, new SHA256Val()) {}
virtual int GetResultFieldOffset() const
{ return BifType::Record::FileAnalysis::ActionResults->
FieldOffset("sha256"); }
SHA256(RecordVal* args, Info* info)
: Hash(args, info, new SHA256Val(), "sha256")
{}
};
} // namespace file_analysis

View file

@ -5,6 +5,7 @@
#include "FileID.h"
#include "Reporter.h"
#include "Val.h"
#include "Type.h"
#include "Action.h"
#include "Extract.h"
@ -58,8 +59,6 @@ int Info::missing_bytes_idx = -1;
int Info::overflow_bytes_idx = -1;
int Info::timeout_interval_idx = -1;
int Info::actions_idx = -1;
int Info::action_args_idx = -1;
int Info::action_results_idx = -1;
void Info::InitFieldIndices()
{
@ -75,8 +74,11 @@ void Info::InitFieldIndices()
overflow_bytes_idx = Idx("overflow_bytes");
timeout_interval_idx = Idx("timeout_interval");
actions_idx = Idx("actions");
action_args_idx = Idx("action_args");
action_results_idx = Idx("action_results");
}
static void action_del_func(void* v)
{
delete (Action*) v;
}
Info::Info(const string& unique, Connection* conn, const string& protocol)
@ -94,6 +96,12 @@ Info::Info(const string& unique, Connection* conn, const string& protocol)
val->Assign(file_id_idx, new StringVal(id));
file_id = FileID(id);
TypeList* t = new TypeList();
t->Append(BifType::Record::FileAnalysis::ActionArgs->Ref());
action_hash = new CompositeHash(t);
Unref(t);
action_map.SetDeleteFunc(action_del_func);
UpdateConnectionFields(conn);
if ( protocol != "" )
@ -104,11 +112,8 @@ Info::Info(const string& unique, Connection* conn, const string& protocol)
Info::~Info()
{
ActionMap::const_iterator it;
for ( it = actions.begin(); it != actions.end(); ++it )
delete it->second;
DBG_LOG(DBG_FILE_ANALYSIS, "Destroying Info object %s", file_id.c_str());
delete action_hash;
Unref(val);
}
@ -157,9 +162,18 @@ double Info::GetTimeoutInterval() const
return LookupFieldDefaultInterval(timeout_interval_idx);
}
RecordVal* Info::GetResults() const
RecordVal* Info::GetResults(RecordVal* args) const
{
return val->Lookup(action_results_idx)->AsRecordVal();
TableVal* actions_table = val->Lookup(actions_idx)->AsTableVal();
RecordVal* rval = actions_table->Lookup(args)->AsRecordVal();
if ( ! rval )
{
rval = new RecordVal(BifType::Record::FileAnalysis::ActionResults);
actions_table->Assign(args, rval);
}
return rval;
}
void Info::IncrementByteCount(uint64 size, int field_idx)
@ -187,59 +201,91 @@ void Info::ScheduleInactivityTimer() const
timer_mgr->Add(new InfoTimer(network_time, file_id, GetTimeoutInterval()));
}
bool Info::AddAction(ActionTag act, RecordVal* args)
bool Info::AddAction(RecordVal* args)
{
if ( actions.find(act) != actions.end() ) return false;
HashKey* key = action_hash->ComputeHash(args, 1);
ActionTag tag = static_cast<ActionTag>(act);
if ( ! key )
reporter->InternalError("ActionArgs type mismatch in add_action");
Action* a = action_factory[act](args, this);
Action* act = action_map.Lookup(key);
if ( ! a ) return false;
if ( act )
{
DBG_LOG(DBG_FILE_ANALYSIS, "Add action %d skipped for already active"
" action on file id %s", act->Tag(), file_id.c_str());
delete key;
return false;
}
DBG_LOG(DBG_FILE_ANALYSIS, "Add action %d for file id %s", act,
act = action_factory[Action::ArgsTag(args)](args, this);
if ( ! act )
{
DBG_LOG(DBG_FILE_ANALYSIS, "Failed to instantiate action %d"
" on file id %s", Action::ArgsTag(args), file_id.c_str());
delete key;
return false;
}
DBG_LOG(DBG_FILE_ANALYSIS, "Add action %d for file id %s", act->Tag(),
file_id.c_str());
actions[act] = a;
VectorVal* av = val->LookupWithDefault(actions_idx)->AsVectorVal();
VectorVal* aav = val->LookupWithDefault(action_args_idx)->AsVectorVal();
EnumVal* ev = new EnumVal(act, BifType::Enum::FileAnalysis::Action);
av->Assign(av->Size(), ev, 0);
aav->Assign(aav->Size(), args->Ref(), 0);
Unref(av);
Unref(aav);
action_map.Insert(key, act);
val->Lookup(actions_idx)->AsTableVal()->Assign(args,
new RecordVal(BifType::Record::FileAnalysis::ActionResults));
return true;
}
bool Info::RemoveAction(ActionTag act)
void Info::ScheduleRemoval(const Action* act)
{
ActionMap::iterator it = actions.find(act);
if ( it == actions.end() ) return false;
return RemoveAction(it);
removing.push_back(act->Args());
}
bool Info::RemoveAction(const ActionMap::iterator& it)
void Info::DoActionRemoval()
{
DBG_LOG(DBG_FILE_ANALYSIS, "Remove action %d for file id %s", it->first,
ActionArgList::iterator it;
for ( it = removing.begin(); it != removing.end(); ++it )
RemoveAction(*it);
removing.clear();
}
bool Info::RemoveAction(const RecordVal* args)
{
HashKey* key = action_hash->ComputeHash(args, 1);
if ( ! key )
reporter->InternalError("ActionArgs type mismatch in remove_action");
Action* act = (Action*) action_map.Remove(key);
delete key;
if ( ! act )
{
DBG_LOG(DBG_FILE_ANALYSIS, "Skip remove action %d for file id %s",
Action::ArgsTag(args), file_id.c_str());
return false;
}
DBG_LOG(DBG_FILE_ANALYSIS, "Remove action %d for file id %s", act->Tag(),
file_id.c_str());
delete it->second;
actions.erase(it);
delete act;
return true;
}
void Info::DataIn(const u_char* data, uint64 len, uint64 offset)
{
ActionMap::iterator it = actions.begin();
while ( it != actions.end() )
if ( ! it->second->DeliverChunk(data, len, offset) )
RemoveAction(it++);
else
++it;
Action* act = 0;
IterCookie* c = action_map.InitForIteration();
while ( (act = action_map.NextEntry(c)) )
{
if ( ! act->DeliverChunk(data, len, offset) )
ScheduleRemoval(act);
}
DoActionRemoval();
// TODO: check reassembly requirement based on buffer size in record
if ( need_reassembly )
@ -255,45 +301,54 @@ void Info::DataIn(const u_char* data, uint64 len, uint64 offset)
void Info::DataIn(const u_char* data, uint64 len)
{
ActionMap::iterator it = actions.begin();
while ( it != actions.end() )
Action* act = 0;
IterCookie* c = action_map.InitForIteration();
while ( (act = action_map.NextEntry(c)) )
{
if ( ! it->second->DeliverStream(data, len) )
if ( ! act->DeliverStream(data, len) )
{
RemoveAction(it++);
ScheduleRemoval(act);
continue;
}
uint64 offset = LookupFieldDefaultCount(seen_bytes_idx) +
LookupFieldDefaultCount(missing_bytes_idx);
if ( ! it->second->DeliverChunk(data, len, offset) )
RemoveAction(it++);
else
++it;
if ( ! act->DeliverChunk(data, len, offset) )
ScheduleRemoval(act);
}
DoActionRemoval();
IncrementByteCount(len, seen_bytes_idx);
}
void Info::EndOfFile()
{
ActionMap::iterator it = actions.begin();
while ( it != actions.end() )
if ( ! it->second->EndOfFile() )
RemoveAction(it++);
else
++it;
Action* act = 0;
IterCookie* c = action_map.InitForIteration();
while ( (act = action_map.NextEntry(c)) )
{
if ( ! act->EndOfFile() )
ScheduleRemoval(act);
}
DoActionRemoval();
}
void Info::Gap(uint64 offset, uint64 len)
{
ActionMap::iterator it = actions.begin();
while ( it != actions.end() )
if ( ! it->second->Undelivered(offset, len) )
RemoveAction(it++);
else
++it;
Action* act = 0;
IterCookie* c = action_map.InitForIteration();
while ( (act = action_map.NextEntry(c)) )
{
if ( ! act->Undelivered(offset, len) )
ScheduleRemoval(act);
}
DoActionRemoval();
IncrementByteCount(len, missing_bytes_idx);
}

View file

@ -2,8 +2,10 @@
#define FILE_ANALYSIS_INFO_H
#include <string>
#include <map>
#include <list>
#include "CompHash.h"
#include "Dict.h"
#include "Conn.h"
#include "Val.h"
#include "Action.h"
@ -11,6 +13,8 @@
namespace file_analysis {
declare(PDict,Action);
/**
* Wrapper class around \c FileAnalysis::Info record values from script layer.
*/
@ -35,9 +39,11 @@ public:
FileID GetFileID() const { return file_id; }
/**
* @return record val of the "action_results" field from #val record.
* @return looks up the value of the "actions" field in the #val record at
* the index corresponding to \a args. If there was no value at
* the index, it is created.
*/
RecordVal* GetResults() const;
RecordVal* GetResults(RecordVal* args) const;
/**
* @return the string which uniquely identifies the file.
@ -74,16 +80,17 @@ public:
void ScheduleInactivityTimer() const;
/**
* Attaches an action. Only one action per type can be attached at a time.
* Attaches an action. Only one action per type can be attached at a time,
* unless the arguments differ.
* @return true if the action was attached, else false.
*/
bool AddAction(ActionTag act, RecordVal* args);
bool AddAction(RecordVal* args);
/**
* Removes an action.
* @return true if the action was removed, else false.
*/
bool RemoveAction(ActionTag act);
bool RemoveAction(const RecordVal* args);
/**
* Pass in non-sequential data and deliver to attached actions/analyzers.
@ -109,8 +116,6 @@ protected:
friend class Manager;
typedef map<ActionTag, Action*> ActionMap;
/**
* Constructor; only file_analysis::Manager should be creating these.
*/
@ -141,10 +146,15 @@ protected:
double LookupFieldDefaultInterval(int idx) const;
/**
* Removes an action.
* @return true if the action was removed, else false.
* Adds file_analysis::Action associated with \a args to list of actions
* to remove, #removing.
*/
bool RemoveAction(const ActionMap::iterator& it);
void ScheduleRemoval(const Action* act);
/**
* Deletes/removes all actions in #removing.
*/
void DoActionRemoval();
FileID file_id; /**< A pretty hash that likely identifies file*/
string unique; /**< A string that uniquely identifies file */
@ -152,7 +162,10 @@ protected:
double last_activity_time; /**< Time of last activity. */
bool postpone_timeout; /**< Whether postponing timeout is requested. */
bool need_reassembly; /**< Whether file stream reassembly is needed. */
ActionMap actions; /**< Actions/analysis to perform on file. */
CompositeHash* action_hash;/**< ActionArgs hashes Action map lookup. */
PDict(Action) action_map; /**< Actions indexed by ActionArgs. */
typedef list<const RecordVal*> ActionArgList;
ActionArgList removing; /**< Actions pending removal. */
/**
* @return the field offset in #val record corresponding to \a field_name.
@ -175,8 +188,6 @@ protected:
static int overflow_bytes_idx;
static int timeout_interval_idx;
static int actions_idx;
static int action_args_idx;
static int action_results_idx;
};
} // namespace file_analysis

View file

@ -100,23 +100,22 @@ bool Manager::PostponeTimeout(const FileID& file_id) const
return true;
}
bool Manager::AddAction(const FileID& file_id, EnumVal* act,
RecordVal* args) const
bool Manager::AddAction(const FileID& file_id, RecordVal* args) const
{
Info* info = Lookup(file_id);
if ( ! info ) return false;
return info->AddAction(static_cast<ActionTag>(act->AsEnum()), args);
return info->AddAction(args);
}
bool Manager::RemoveAction(const FileID& file_id, EnumVal* act) const
bool Manager::RemoveAction(const FileID& file_id, const RecordVal* args) const
{
Info* info = Lookup(file_id);
if ( ! info ) return false;
return info->RemoveAction(static_cast<ActionTag>(act->AsEnum()));
return info->RemoveAction(args);
}
Info* Manager::GetInfo(const string& unique, Connection* conn,

View file

@ -73,17 +73,18 @@ public:
bool PostponeTimeout(const FileID& file_id) const;
/**
* Attaches an action to the file identifier. Only one action of a given
* type can be attached per file identifier at a time.
* Attaches an action to the file identifier. Multiple actions of a given
* type can be attached per file identifier at a time as long as the
* arguments differ.
* @return true if the action was attached, else false.
*/
bool AddAction(const FileID& file_id, EnumVal* act, RecordVal* args) const;
bool AddAction(const FileID& file_id, RecordVal* args) const;
/**
* Removes an action for a given file identifier.
* @return true if the action was removed, else false.
*/
bool RemoveAction(const FileID& file_id, EnumVal* act) const;
bool RemoveAction(const FileID& file_id, const RecordVal* args) const;
/**
* Calls the \c FileAnalysis::policy hook.