Merge remote-tracking branch 'origin/topic/jsiwek/file-analysis' into topic/seth/file-analysis-exe-analyzer

Conflicts:
	src/CMakeLists.txt
	src/file_analysis.bif
	src/file_analysis/Info.cc
This commit is contained in:
Seth Hall 2013-02-22 02:38:29 -05:00
commit ff2c2c7e52
16 changed files with 404 additions and 116 deletions

View file

@ -37,6 +37,12 @@ export {
extract_filename: string &optional; extract_filename: string &optional;
}; };
type ActionResults: record {
md5: string &optional;
sha1: string &optional;
sha256: string &optional;
};
## Contains all metadata related to the analysis of a given file, some ## Contains all metadata related to the analysis of a given file, some
## of which is logged. ## of which is logged.
type Info: record { type Info: record {
@ -81,6 +87,9 @@ export {
## The corresponding arguments supplied to each element of *actions*. ## The corresponding arguments supplied to each element of *actions*.
action_args: vector of ActionArgs &default=vector(); action_args: vector of ActionArgs &default=vector();
## Some actions may directly yield results in this record.
action_results: ActionResults;
} &redef; } &redef;
## TODO: document ## TODO: document

View file

@ -455,8 +455,10 @@ set(bro_SRCS
file_analysis/Manager.cc file_analysis/Manager.cc
file_analysis/Info.cc file_analysis/Info.cc
file_analysis/InfoTimer.cc file_analysis/InfoTimer.cc
file_analysis/FileID.h
file_analysis/Action.h file_analysis/Action.h
file_analysis/Extract.cc file_analysis/Extract.cc
file_analysis/Hash.cc
file_analysis/analyzers/PE.cc file_analysis/analyzers/PE.cc
nb_dns.c nb_dns.c

View file

@ -21,7 +21,7 @@ File_Analyzer::File_Analyzer(Connection* conn)
char op[256], rp[256]; char op[256], rp[256];
modp_ulitoa10(ntohs(conn->OrigPort()), op); modp_ulitoa10(ntohs(conn->OrigPort()), op);
modp_ulitoa10(ntohs(conn->RespPort()), rp); modp_ulitoa10(ntohs(conn->RespPort()), rp);
file_id = "TCPFile " + conn->OrigAddr().AsString() + ":" + op + "->" + unique_file = "TCPFile " + conn->OrigAddr().AsString() + ":" + op + "->" +
conn->RespAddr().AsString() + ":" + rp; conn->RespAddr().AsString() + ":" + rp;
} }
@ -29,7 +29,7 @@ void File_Analyzer::DeliverStream(int len, const u_char* data, bool orig)
{ {
TCP_ApplicationAnalyzer::DeliverStream(len, data, orig); TCP_ApplicationAnalyzer::DeliverStream(len, data, orig);
file_mgr->DataIn(file_id, data, len, Conn()); file_mgr->DataIn(unique_file, data, len, Conn());
int n = min(len, BUFFER_SIZE - buffer_len); int n = min(len, BUFFER_SIZE - buffer_len);
@ -48,14 +48,14 @@ void File_Analyzer::Undelivered(int seq, int len, bool orig)
{ {
TCP_ApplicationAnalyzer::Undelivered(seq, len, orig); TCP_ApplicationAnalyzer::Undelivered(seq, len, orig);
file_mgr->Gap(file_id, seq, len); file_mgr->Gap(unique_file, seq, len);
} }
void File_Analyzer::Done() void File_Analyzer::Done()
{ {
TCP_ApplicationAnalyzer::Done(); TCP_ApplicationAnalyzer::Done();
file_mgr->EndOfFile(file_id, Conn()); file_mgr->EndOfFile(unique_file, Conn());
if ( buffer_len && buffer_len != BUFFER_SIZE ) if ( buffer_len && buffer_len != BUFFER_SIZE )
Identify(); Identify();

View file

@ -37,7 +37,7 @@ protected:
static magic_t magic; static magic_t magic;
static magic_t magic_mime; static magic_t magic_mime;
string file_id; string unique_file;
}; };
#endif #endif

View file

@ -8,6 +8,7 @@ module FileAnalysis;
type Info: record; type Info: record;
type ActionArgs: record; type ActionArgs: record;
type ActionResults: record;
## An enumeration of possibly-interesting "events" that can occur over ## An enumeration of possibly-interesting "events" that can occur over
## the course of analyzing files. The :bro:see:`FileAnalysis::policy` ## the course of analyzing files. The :bro:see:`FileAnalysis::policy`
@ -57,12 +58,16 @@ enum Trigger %{
enum Action %{ enum Action %{
ACTION_EXTRACT, ACTION_EXTRACT,
ACTION_MD5,
ACTION_SHA1,
ACTION_SHA256,
ACTION_PE_ANALYZER, ACTION_PE_ANALYZER,
%} %}
function FileAnalysis::postpone_timeout%(file_id: string%): bool function FileAnalysis::postpone_timeout%(file_id: string%): bool
%{ %{
bool result = file_mgr->PostponeTimeout(file_id->CheckString()); using namespace file_analysis;
bool result = file_mgr->PostponeTimeout(FileID(file_id->CheckString()));
return new Val(result, TYPE_BOOL); return new Val(result, TYPE_BOOL);
%} %}
@ -70,9 +75,10 @@ function FileAnalysis::add_action%(file_id: string,
action: FileAnalysis::Action, action: FileAnalysis::Action,
args: any%): bool args: any%): bool
%{ %{
using namespace file_analysis;
RecordVal* rv = args->AsRecordVal()->CoerceTo( RecordVal* rv = args->AsRecordVal()->CoerceTo(
BifType::Record::FileAnalysis::ActionArgs); BifType::Record::FileAnalysis::ActionArgs);
bool result = file_mgr->AddAction(file_id->CheckString(), bool result = file_mgr->AddAction(FileID(file_id->CheckString()),
action->AsEnumVal(), rv); action->AsEnumVal(), rv);
Unref(rv); Unref(rv);
return new Val(result, TYPE_BOOL); return new Val(result, TYPE_BOOL);
@ -81,13 +87,15 @@ function FileAnalysis::add_action%(file_id: string,
function FileAnalysis::remove_action%(file_id: string, function FileAnalysis::remove_action%(file_id: string,
action: FileAnalysis::Action%): bool action: FileAnalysis::Action%): bool
%{ %{
bool result = file_mgr->RemoveAction(file_id->CheckString(), using namespace file_analysis;
bool result = file_mgr->RemoveAction(FileID(file_id->CheckString()),
action->AsEnumVal()); action->AsEnumVal());
return new Val(result, TYPE_BOOL); return new Val(result, TYPE_BOOL);
%} %}
function FileAnalysis::stop%(file_id: string%): bool function FileAnalysis::stop%(file_id: string%): bool
%{ %{
bool result = file_mgr->RemoveFile(file_id->CheckString()); using namespace file_analysis;
bool result = file_mgr->RemoveFile(FileID(file_id->CheckString()));
return new Val(result, TYPE_BOOL); return new Val(result, TYPE_BOOL);
%} %}

View file

@ -2,9 +2,12 @@
#define FILE_ANALYSIS_ACTION_H #define FILE_ANALYSIS_ACTION_H
#include "Val.h" #include "Val.h"
#include "NetVar.h"
namespace file_analysis { namespace file_analysis {
typedef BifEnum::FileAnalysis::Action ActionTag;
class Info; class Info;
/** /**
@ -17,29 +20,44 @@ public:
/** /**
* Subclasses may override this to receive file data non-sequentially. * Subclasses may override this to receive file data non-sequentially.
* @return true if the action is still in a valid state to continue
* receiving data/events or false if it's essentially "done".
*/ */
virtual void DeliverChunk(const u_char* data, uint64 len, uint64 offset) {} virtual bool DeliverChunk(const u_char* data, uint64 len, uint64 offset)
{ return true; }
/** /**
* Subclasses may override this to receive file sequentially. * Subclasses may override this to receive file sequentially.
* @return true if the action is still in a valid state to continue
* receiving data/events or false if it's essentially "done".
*/ */
virtual void DeliverStream(const u_char* data, uint64 len) {} virtual bool DeliverStream(const u_char* data, uint64 len)
{ return true; }
/** /**
* Subclasses may override this to specifically handle the end of a file. * Subclasses may override this to specifically handle the end of a file.
* @return true if the action is still in a valid state to continue
* receiving data/events or false if it's essentially "done".
*/ */
virtual void EndOfFile() {} virtual bool EndOfFile()
{ return true; }
/** /**
* Subclasses may override this to handle missing data in a file stream. * Subclasses may override this to handle missing data in a file stream.
* @return true if the action is still in a valid state to continue
* receiving data/events or false if it's essentially "done".
*/ */
virtual void Undelivered(uint64 offset, uint64 len) {} virtual bool Undelivered(uint64 offset, uint64 len)
{ return true; }
ActionTag Tag() const { return tag; }
protected: protected:
Action(Info* arg_info) {} Action(Info* arg_info, ActionTag arg_tag) : info(arg_info), tag(arg_tag) {}
Info* info; Info* info;
ActionTag tag;
}; };
typedef Action* (*ActionInstantiator)(const RecordVal* args, Info* info); typedef Action* (*ActionInstantiator)(const RecordVal* args, Info* info);

View file

@ -6,7 +6,8 @@
using namespace file_analysis; using namespace file_analysis;
Extract::Extract(Info* arg_info, const string& arg_filename) Extract::Extract(Info* arg_info, const string& arg_filename)
: Action(arg_info), filename(arg_filename) : Action(arg_info, BifEnum::FileAnalysis::ACTION_EXTRACT),
filename(arg_filename)
{ {
fd = open(filename.c_str(), O_WRONLY | O_CREAT | O_TRUNC, 0666); fd = open(filename.c_str(), O_WRONLY | O_CREAT | O_TRUNC, 0666);
@ -36,11 +37,12 @@ Action* Extract::Instantiate(const RecordVal* args, Info* info)
return new Extract(info, v->AsString()->CheckString()); return new Extract(info, v->AsString()->CheckString());
} }
void Extract::DeliverChunk(const u_char* data, uint64 len, uint64 offset) bool Extract::DeliverChunk(const u_char* data, uint64 len, uint64 offset)
{ {
Action::DeliverChunk(data, len, offset); Action::DeliverChunk(data, len, offset);
if ( ! fd ) return; if ( ! fd ) return false;
safe_pwrite(fd, data, len, offset); safe_pwrite(fd, data, len, offset);
return true;
} }

View file

@ -5,20 +5,21 @@
#include "Val.h" #include "Val.h"
#include "Info.h" #include "Info.h"
#include "Action.h"
namespace file_analysis { namespace file_analysis {
/** /**
* An action to simply extract files to disk. * An action to simply extract files to disk.
*/ */
class Extract : Action { class Extract : public Action {
public: public:
static Action* Instantiate(const RecordVal* args, Info* info); static Action* Instantiate(const RecordVal* args, Info* info);
~Extract(); virtual ~Extract();
virtual void DeliverChunk(const u_char* data, uint64 len, uint64 offset); virtual bool DeliverChunk(const u_char* data, uint64 len, uint64 offset);
protected: protected:

View file

@ -0,0 +1,32 @@
#ifndef FILE_ANALYSIS_FILEID_H
#define FILE_ANALYSIS_FILEID_H
namespace file_analysis {
/**
* A simple string wrapper class to help enforce some type safety between
* methods of FileAnalysis::Manager, some of which use a unique string to
* identify files, and others which use a pretty hash (the FileID) to identify
* files. A FileID is primarily used in methods which interface with the
* script-layer, while the unique strings are used for methods which interface
* with protocol analyzers (to better accomodate the possibility that a file
* can be distributed over different connections and thus analyzer instances).
*/
struct FileID {
string id;
explicit FileID(const string arg_id) : id(arg_id) {}
FileID(const FileID& other) : id(other.id) {}
const char* c_str() const { return id.c_str(); }
bool operator==(const FileID& rhs) const { return id == rhs.id; }
bool operator<(const FileID& rhs) const { return id < rhs.id; }
FileID& operator=(const FileID& rhs) { id = rhs.id; return *this; }
FileID& operator=(const string& rhs) { id = rhs; return *this; }
};
} // namespace file_analysis
#endif

54
src/file_analysis/Hash.cc Normal file
View file

@ -0,0 +1,54 @@
#include <string>
#include "Hash.h"
#include "util.h"
using namespace file_analysis;
Hash::Hash(Info* arg_info, ActionTag tag, HashVal* hv)
: Action(arg_info, tag), hash(hv)
{
hash->Init();
}
Hash::~Hash()
{
// maybe it's all there...
Finalize();
delete hash;
}
bool Hash::DeliverStream(const u_char* data, uint64 len)
{
Action::DeliverStream(data, len);
if ( ! hash->IsValid() ) return false;
hash->Feed(data, len);
return true;
}
bool Hash::EndOfFile()
{
Action::EndOfFile();
Finalize();
return false;
}
bool Hash::Undelivered(uint64 offset, uint64 len)
{
return false;
}
void Hash::Finalize()
{
if ( ! hash->IsValid() ) return;
StringVal* sv = hash->Get();
int i = GetResultFieldOffset();
if ( i < 0 )
reporter->InternalError("Hash Action result field not found");
info->Results()->Assign(i, sv);
}

88
src/file_analysis/Hash.h Normal file
View file

@ -0,0 +1,88 @@
#ifndef FILE_ANALYSIS_HASH_H
#define FILE_ANALYSIS_HASH_H
#include <string>
#include "Val.h"
#include "OpaqueVal.h"
#include "Info.h"
#include "Action.h"
namespace file_analysis {
/**
* An action to produce a hash of file contents.
*/
class Hash : public Action {
public:
virtual ~Hash();
virtual bool DeliverStream(const u_char* data, uint64 len);
virtual bool EndOfFile();
virtual bool Undelivered(uint64 offset, uint64 len);
protected:
Hash(Info* arg_info, ActionTag arg_tag, HashVal* hv);
void Finalize();
virtual int GetResultFieldOffset() const = 0;
HashVal* hash;
};
class MD5 : public Hash {
public:
static Action* Instantiate(const RecordVal* args, Info* info)
{ return new MD5(info); }
protected:
MD5(Info* arg_info)
: Hash(arg_info, BifEnum::FileAnalysis::ACTION_MD5, new MD5Val()) {}
virtual int GetResultFieldOffset() const
{ return BifType::Record::FileAnalysis::ActionResults->
FieldOffset("md5"); }
};
class SHA1 : public Hash {
public:
static Action* Instantiate(const RecordVal* args, Info* info)
{ return new SHA1(info); }
protected:
SHA1(Info* arg_info)
: Hash(arg_info, BifEnum::FileAnalysis::ACTION_SHA1, new SHA1Val()) {}
virtual int GetResultFieldOffset() const
{ return BifType::Record::FileAnalysis::ActionResults->
FieldOffset("sha1"); }
};
class SHA256 : public Hash {
public:
static Action* Instantiate(const RecordVal* args, Info* info)
{ return new SHA256(info); }
protected:
SHA256(Info* arg_info)
: Hash(arg_info, BifEnum::FileAnalysis::ACTION_SHA256, new SHA256Val()) {}
virtual int GetResultFieldOffset() const
{ return BifType::Record::FileAnalysis::ActionResults->
FieldOffset("sha256"); }
};
} // namespace file_analysis
#endif

View file

@ -2,11 +2,13 @@
#include "Info.h" #include "Info.h"
#include "InfoTimer.h" #include "InfoTimer.h"
#include "FileID.h"
#include "Reporter.h" #include "Reporter.h"
#include "Val.h" #include "Val.h"
#include "Action.h" #include "Action.h"
#include "Extract.h" #include "Extract.h"
#include "Hash.h"
#include "analyzers/PE.h" #include "analyzers/PE.h"
using namespace file_analysis; using namespace file_analysis;
@ -14,6 +16,9 @@ using namespace file_analysis;
// keep in order w/ declared enum values in file_analysis.bif // keep in order w/ declared enum values in file_analysis.bif
static ActionInstantiator action_factory[] = { static ActionInstantiator action_factory[] = {
Extract::Instantiate, Extract::Instantiate,
MD5::Instantiate,
SHA1::Instantiate,
SHA256::Instantiate,
PE_Analyzer::Instantiate, PE_Analyzer::Instantiate,
}; };
@ -54,6 +59,7 @@ int Info::overflow_bytes_idx = -1;
int Info::timeout_interval_idx = -1; int Info::timeout_interval_idx = -1;
int Info::actions_idx = -1; int Info::actions_idx = -1;
int Info::action_args_idx = -1; int Info::action_args_idx = -1;
int Info::action_results_idx = -1;
void Info::InitFieldIndices() void Info::InitFieldIndices()
{ {
@ -70,19 +76,23 @@ void Info::InitFieldIndices()
timeout_interval_idx = Idx("timeout_interval"); timeout_interval_idx = Idx("timeout_interval");
actions_idx = Idx("actions"); actions_idx = Idx("actions");
action_args_idx = Idx("action_args"); action_args_idx = Idx("action_args");
action_results_idx = Idx("action_results");
} }
Info::Info(const string& file_id, Connection* conn, const string& protocol) Info::Info(const string& unique, Connection* conn, const string& protocol)
: val(0), last_activity_time(network_time), postpone_timeout(false), : file_id(unique), unique(unique), val(0), last_activity_time(network_time),
need_reassembly(false) postpone_timeout(false), need_reassembly(false)
{ {
DBG_LOG(DBG_FILE_ANALYSIS, "Creating new Info object %s", file_id.c_str());
InitFieldIndices(); InitFieldIndices();
char id[20];
uitoa_n(calculate_unique_id(), id, sizeof(id), 62);
DBG_LOG(DBG_FILE_ANALYSIS, "Creating new Info object %s", id);
val = new RecordVal(BifType::Record::FileAnalysis::Info); val = new RecordVal(BifType::Record::FileAnalysis::Info);
// TODO: hash/prettify file_id for script layer presentation val->Assign(file_id_idx, new StringVal(id));
val->Assign(file_id_idx, new StringVal(file_id.c_str())); file_id = FileID(id);
UpdateConnectionFields(conn); UpdateConnectionFields(conn);
@ -98,7 +108,7 @@ Info::~Info()
for ( it = actions.begin(); it != actions.end(); ++it ) for ( it = actions.begin(); it != actions.end(); ++it )
delete it->second; delete it->second;
DBG_LOG(DBG_FILE_ANALYSIS, "Destroying Info object %s", FileID().c_str()); DBG_LOG(DBG_FILE_ANALYSIS, "Destroying Info object %s",file_id.c_str());
Unref(val); Unref(val);
} }
@ -147,9 +157,9 @@ double Info::TimeoutInterval() const
return LookupFieldDefaultInterval(timeout_interval_idx); return LookupFieldDefaultInterval(timeout_interval_idx);
} }
string Info::FileID() const RecordVal* Info::Results() const
{ {
return val->Lookup(file_id_idx)->AsString()->CheckString(); return val->Lookup(action_results_idx)->AsRecordVal();
} }
void Info::IncrementByteCount(uint64 size, int field_idx) void Info::IncrementByteCount(uint64 size, int field_idx)
@ -174,25 +184,28 @@ bool Info::IsComplete() const
void Info::ScheduleInactivityTimer() const void Info::ScheduleInactivityTimer() const
{ {
timer_mgr->Add(new InfoTimer(network_time, FileID(), TimeoutInterval())); timer_mgr->Add(new InfoTimer(network_time, file_id, TimeoutInterval()));
} }
bool Info::AddAction(EnumVal* act, RecordVal* args) bool Info::AddAction(ActionTag act, RecordVal* args)
{ {
if ( actions.find(act->AsEnum()) != actions.end() ) return false; if ( actions.find(act) != actions.end() ) return false;
Action* a = action_factory[act->AsEnum()](args, this); ActionTag tag = static_cast<ActionTag>(act);
Action* a = action_factory[act](args, this);
if ( ! a ) return false; if ( ! a ) return false;
DBG_LOG(DBG_FILE_ANALYSIS, "Add action %d for file id %s", act->AsEnum(), DBG_LOG(DBG_FILE_ANALYSIS, "Add action %d for file id %s", act,
FileID().c_str()); file_id.c_str());
actions[act->AsEnum()] = a; actions[act] = a;
VectorVal* av = val->LookupWithDefault(actions_idx)->AsVectorVal(); VectorVal* av = val->LookupWithDefault(actions_idx)->AsVectorVal();
VectorVal* aav = val->LookupWithDefault(action_args_idx)->AsVectorVal(); VectorVal* aav = val->LookupWithDefault(action_args_idx)->AsVectorVal();
av->Assign(av->Size(), act->Ref(), 0); EnumVal* ev = new EnumVal(act, BifType::Enum::FileAnalysis::Action);
av->Assign(av->Size(), ev, 0);
aav->Assign(aav->Size(), args->Ref(), 0); aav->Assign(aav->Size(), args->Ref(), 0);
Unref(av); Unref(av);
@ -201,14 +214,19 @@ bool Info::AddAction(EnumVal* act, RecordVal* args)
return true; return true;
} }
bool Info::RemoveAction(EnumVal* act) bool Info::RemoveAction(ActionTag act)
{ {
ActionMap::iterator it = actions.find(act->AsEnum()); ActionMap::iterator it = actions.find(act);
if ( it == actions.end() ) return false; if ( it == actions.end() ) return false;
DBG_LOG(DBG_FILE_ANALYSIS, "Remove action %d for file id %s", act->AsEnum(), return RemoveAction(it);
FileID().c_str()); }
bool Info::RemoveAction(const ActionMap::iterator& it)
{
DBG_LOG(DBG_FILE_ANALYSIS, "Remove action %d for file id %s", it->first,
file_id.c_str());
delete it->second; delete it->second;
actions.erase(it); actions.erase(it);
return true; return true;
@ -216,12 +234,18 @@ bool Info::RemoveAction(EnumVal* act)
void Info::DataIn(const u_char* data, uint64 len, uint64 offset) void Info::DataIn(const u_char* data, uint64 len, uint64 offset)
{ {
ActionMap::const_iterator it; ActionMap::iterator it = actions.begin();
for ( it = actions.begin(); it != actions.end(); ++it ) while ( it != actions.end() )
it->second->DeliverChunk(data, len, offset); if ( ! it->second->DeliverChunk(data, len, offset) )
RemoveAction(it++);
else
++it;
// TODO: check reassembly requirement based on buffer size in record // TODO: check reassembly requirement based on buffer size in record
if ( ! need_reassembly ) return; if ( need_reassembly )
{
// TODO
}
// TODO: reassembly stuff, possibly having to deliver chunks if buffer full // TODO: reassembly stuff, possibly having to deliver chunks if buffer full
// and incrememt overflow bytes // and incrememt overflow bytes
@ -231,13 +255,22 @@ void Info::DataIn(const u_char* data, uint64 len, uint64 offset)
void Info::DataIn(const u_char* data, uint64 len) void Info::DataIn(const u_char* data, uint64 len)
{ {
ActionMap::const_iterator it; ActionMap::iterator it = actions.begin();
for ( it = actions.begin(); it != actions.end(); ++it ) while ( it != actions.end() )
{ {
it->second->DeliverStream(data, len); if ( ! it->second->DeliverStream(data, len) )
{
RemoveAction(it++);
continue;
}
uint64 offset = LookupFieldDefaultCount(seen_bytes_idx) + uint64 offset = LookupFieldDefaultCount(seen_bytes_idx) +
LookupFieldDefaultCount(missing_bytes_idx); LookupFieldDefaultCount(missing_bytes_idx);
it->second->DeliverChunk(data, len, offset);
if ( ! it->second->DeliverChunk(data, len, offset) )
RemoveAction(it++);
else
++it;
} }
IncrementByteCount(len, seen_bytes_idx); IncrementByteCount(len, seen_bytes_idx);
@ -245,16 +278,22 @@ void Info::DataIn(const u_char* data, uint64 len)
void Info::EndOfFile() void Info::EndOfFile()
{ {
ActionMap::const_iterator it; ActionMap::iterator it = actions.begin();
for ( it = actions.begin(); it != actions.end(); ++it ) while ( it != actions.end() )
it->second->EndOfFile(); if ( ! it->second->EndOfFile() )
RemoveAction(it++);
else
++it;
} }
void Info::Gap(uint64 offset, uint64 len) void Info::Gap(uint64 offset, uint64 len)
{ {
ActionMap::const_iterator it; ActionMap::iterator it = actions.begin();
for ( it = actions.begin(); it != actions.end(); ++it ) while ( it != actions.end() )
it->second->Undelivered(offset, len); if ( ! it->second->Undelivered(offset, len) )
RemoveAction(it++);
else
++it;
IncrementByteCount(len, missing_bytes_idx); IncrementByteCount(len, missing_bytes_idx);
} }

View file

@ -7,6 +7,7 @@
#include "Conn.h" #include "Conn.h"
#include "Val.h" #include "Val.h"
#include "Action.h" #include "Action.h"
#include "FileID.h"
namespace file_analysis { namespace file_analysis {
@ -26,7 +27,17 @@ public:
/** /**
* @return value of the "file_id" field from #val record. * @return value of the "file_id" field from #val record.
*/ */
string FileID() const; FileID GetFileID() const { return file_id; }
/**
* @return record val of the "action_results" field from #val record.
*/
RecordVal* Results() const;
/**
* @return the string which uniquely identifies the file.
*/
string Unique() const { return unique; }
/** /**
* @return #last_activity_time * @return #last_activity_time
@ -61,13 +72,13 @@ public:
* Attaches an action. Only one action per type can be attached at a time. * Attaches an action. Only one action per type can be attached at a time.
* @return true if the action was attached, else false. * @return true if the action was attached, else false.
*/ */
bool AddAction(EnumVal* act, RecordVal* args); bool AddAction(ActionTag act, RecordVal* args);
/** /**
* Removes an action. * Removes an action.
* @return true if the action was removed, else false. * @return true if the action was removed, else false.
*/ */
bool RemoveAction(EnumVal* act); bool RemoveAction(ActionTag act);
/** /**
* Pass in non-sequential data and deliver to attached actions/analyzers. * Pass in non-sequential data and deliver to attached actions/analyzers.
@ -93,10 +104,12 @@ protected:
friend class Manager; friend class Manager;
typedef map<ActionTag, Action*> ActionMap;
/** /**
* Constructor; only file_analysis::Manager should be creating these. * Constructor; only file_analysis::Manager should be creating these.
*/ */
Info(const string& file_id, Connection* conn = 0, Info(const string& unique, Connection* conn = 0,
const string& protocol = ""); const string& protocol = "");
/** /**
@ -122,14 +135,19 @@ protected:
*/ */
double LookupFieldDefaultInterval(int idx) const; double LookupFieldDefaultInterval(int idx) const;
/**
* Removes an action.
* @return true if the action was removed, else false.
*/
bool RemoveAction(const ActionMap::iterator& it);
FileID file_id; /**< A pretty hash that likely identifies file*/
string unique; /**< A string that uniquely identifies file */
RecordVal* val; /**< \c FileAnalysis::Info from script layer. */ RecordVal* val; /**< \c FileAnalysis::Info from script layer. */
double last_activity_time; /**< Time of last activity. */ double last_activity_time; /**< Time of last activity. */
bool postpone_timeout; /**< Whether postponing timeout is requested. */ bool postpone_timeout; /**< Whether postponing timeout is requested. */
bool need_reassembly; /**< Whether file stream reassembly is needed. */ bool need_reassembly; /**< Whether file stream reassembly is needed. */
ActionMap actions; /**< Actions/analysis to perform on file. */
typedef map<int, Action*> ActionMap;
ActionMap actions;
/** /**
* @return the field offset in #val record corresponding to \a field_name. * @return the field offset in #val record corresponding to \a field_name.
@ -153,6 +171,7 @@ protected:
static int timeout_interval_idx; static int timeout_interval_idx;
static int actions_idx; static int actions_idx;
static int action_args_idx; static int action_args_idx;
static int action_results_idx;
}; };
} // namespace file_analysis } // namespace file_analysis

View file

@ -1,8 +1,9 @@
#ifndef FILE_ANALYSIS_INFOTIMER_H #ifndef FILE_ANALYSIS_INFOTIMER_H
#define FILE_ANALYSIS_INFOTIMER_H #define FILE_ANALYSIS_INFOTIMER_H
#include "Timer.h"
#include <string> #include <string>
#include "Timer.h"
#include "FileID.h"
namespace file_analysis { namespace file_analysis {
@ -12,7 +13,7 @@ namespace file_analysis {
class InfoTimer : public Timer { class InfoTimer : public Timer {
public: public:
InfoTimer(double t, const string& id, double interval) InfoTimer(double t, const FileID& id, double interval)
: Timer(t + interval, TIMER_FILE_ANALYSIS_INACTIVITY), file_id(id) {} : Timer(t + interval, TIMER_FILE_ANALYSIS_INACTIVITY), file_id(id) {}
~InfoTimer() {} ~InfoTimer() {}
@ -25,7 +26,7 @@ public:
protected: protected:
string file_id; FileID file_id;
}; };
} // namespace file_analysis } // namespace file_analysis

View file

@ -3,6 +3,7 @@
#include "Manager.h" #include "Manager.h"
#include "Info.h" #include "Info.h"
#include "Action.h"
using namespace file_analysis; using namespace file_analysis;
@ -17,8 +18,8 @@ Manager::~Manager()
void Manager::Terminate() void Manager::Terminate()
{ {
vector<string> keys; vector<FileID> keys;
for ( FileMap::iterator it = file_map.begin(); it != file_map.end(); ++it ) for ( IDMap::iterator it = id_map.begin(); it != id_map.end(); ++it )
keys.push_back(it->first); keys.push_back(it->first);
for ( size_t i = 0; i < keys.size(); ++i ) for ( size_t i = 0; i < keys.size(); ++i )
Timeout(keys[i], true); Timeout(keys[i], true);
@ -29,46 +30,46 @@ static void check_file_done(Info* info)
if ( info->IsComplete() ) if ( info->IsComplete() )
{ {
Manager::EvaluatePolicy(BifEnum::FileAnalysis::TRIGGER_DONE, info); Manager::EvaluatePolicy(BifEnum::FileAnalysis::TRIGGER_DONE, info);
file_mgr->RemoveFile(info->FileID()); file_mgr->RemoveFile(info->GetFileID());
} }
} }
void Manager::DataIn(const string& file_id, const u_char* data, uint64 len, void Manager::DataIn(const string& unique, const u_char* data, uint64 len,
uint64 offset, Connection* conn, const string& protocol) uint64 offset, Connection* conn, const string& protocol)
{ {
Info* info = IDtoInfo(file_id, conn, protocol); Info* info = GetInfo(unique, conn, protocol);
info->DataIn(data, len, offset); info->DataIn(data, len, offset);
check_file_done(info); check_file_done(info);
} }
void Manager::DataIn(const string& file_id, const u_char* data, uint64 len, void Manager::DataIn(const string& unique, const u_char* data, uint64 len,
Connection* conn, const string& protocol) Connection* conn, const string& protocol)
{ {
Info* info = IDtoInfo(file_id, conn, protocol); Info* info = GetInfo(unique, conn, protocol);
info->DataIn(data, len); info->DataIn(data, len);
check_file_done(info); check_file_done(info);
} }
void Manager::EndOfFile(const string& file_id, Connection* conn, void Manager::EndOfFile(const string& unique, Connection* conn,
const string& protocol) const string& protocol)
{ {
Info* info = IDtoInfo(file_id, conn, protocol); Info* info = GetInfo(unique, conn, protocol);
info->EndOfFile(); info->EndOfFile();
Manager::EvaluatePolicy(BifEnum::FileAnalysis::TRIGGER_EOF, info); Manager::EvaluatePolicy(BifEnum::FileAnalysis::TRIGGER_EOF, info);
} }
void Manager::Gap(const string& file_id, uint64 offset, uint64 len, void Manager::Gap(const string& unique, uint64 offset, uint64 len,
Connection* conn, const string& protocol) Connection* conn, const string& protocol)
{ {
Info* info = IDtoInfo(file_id, conn, protocol); Info* info = GetInfo(unique, conn, protocol);
info->Gap(offset, len); info->Gap(offset, len);
Manager::EvaluatePolicy(BifEnum::FileAnalysis::TRIGGER_GAP, info); Manager::EvaluatePolicy(BifEnum::FileAnalysis::TRIGGER_GAP, info);
} }
void Manager::SetSize(const string& file_id, uint64 size, void Manager::SetSize(const string& unique, uint64 size,
Connection* conn, const string& protocol) Connection* conn, const string& protocol)
{ {
Info* info = IDtoInfo(file_id, conn, protocol); Info* info = GetInfo(unique, conn, protocol);
info->SetTotalBytes(size); info->SetTotalBytes(size);
check_file_done(info); check_file_done(info);
} }
@ -89,7 +90,7 @@ void Manager::EvaluatePolicy(BifEnum::FileAnalysis::Trigger t, Info* info)
Unref(result); Unref(result);
} }
bool Manager::PostponeTimeout(const string& file_id) const bool Manager::PostponeTimeout(const FileID& file_id) const
{ {
Info* info = Lookup(file_id); Info* info = Lookup(file_id);
@ -99,33 +100,42 @@ bool Manager::PostponeTimeout(const string& file_id) const
return true; return true;
} }
bool Manager::AddAction(const string& file_id, EnumVal* act, bool Manager::AddAction(const FileID& file_id, EnumVal* act,
RecordVal* args) const RecordVal* args) const
{ {
Info* info = Lookup(file_id); Info* info = Lookup(file_id);
if ( ! info ) return false; if ( ! info ) return false;
return info->AddAction(act, args); return info->AddAction(static_cast<ActionTag>(act->AsEnum()), args);
} }
bool Manager::RemoveAction(const string& file_id, EnumVal* act) const bool Manager::RemoveAction(const FileID& file_id, EnumVal* act) const
{ {
Info* info = Lookup(file_id); Info* info = Lookup(file_id);
if ( ! info ) return false; if ( ! info ) return false;
return info->RemoveAction(act); return info->RemoveAction(static_cast<ActionTag>(act->AsEnum()));
} }
Info* Manager::IDtoInfo(const string& file_id, Connection* conn, Info* Manager::GetInfo(const string& unique, Connection* conn,
const string& protocol) const string& protocol)
{ {
Info* rval = file_map[file_id]; Info* rval = str_map[unique];
if ( ! rval ) if ( ! rval )
{ {
rval = file_map[file_id] = new Info(file_id, conn, protocol); rval = str_map[unique] = new Info(unique, conn, protocol);
FileID id = rval->GetFileID();
if ( id_map[id] )
{
reporter->Error("Evicted duplicate file ID: %s", id.c_str());
RemoveFile(id);
}
id_map[id] = rval;
Manager::EvaluatePolicy(BifEnum::FileAnalysis::TRIGGER_NEW, rval); Manager::EvaluatePolicy(BifEnum::FileAnalysis::TRIGGER_NEW, rval);
} }
else else
@ -137,16 +147,16 @@ Info* Manager::IDtoInfo(const string& file_id, Connection* conn,
return rval; return rval;
} }
Info* Manager::Lookup(const string& file_id) const Info* Manager::Lookup(const FileID& file_id) const
{ {
FileMap::const_iterator it = file_map.find(file_id); IDMap::const_iterator it = id_map.find(file_id);
if ( it == file_map.end() ) return 0; if ( it == id_map.end() ) return 0;
return it->second; return it->second;
} }
void Manager::Timeout(const string& file_id, bool is_terminating) void Manager::Timeout(const FileID& file_id, bool is_terminating)
{ {
Info* info = Lookup(file_id); Info* info = Lookup(file_id);
@ -157,25 +167,27 @@ void Manager::Timeout(const string& file_id, bool is_terminating)
if ( info->postpone_timeout && ! is_terminating ) if ( info->postpone_timeout && ! is_terminating )
{ {
DBG_LOG(DBG_FILE_ANALYSIS, "Postpone file analysis timeout for %s", DBG_LOG(DBG_FILE_ANALYSIS, "Postpone file analysis timeout for %s",
info->FileID().c_str()); info->GetFileID().c_str());
info->UpdateLastActivityTime(); info->UpdateLastActivityTime();
info->ScheduleInactivityTimer(); info->ScheduleInactivityTimer();
return; return;
} }
DBG_LOG(DBG_FILE_ANALYSIS, "File analysis timeout for %s", DBG_LOG(DBG_FILE_ANALYSIS, "File analysis timeout for %s",
info->FileID().c_str()); info->GetFileID().c_str());
RemoveFile(file_id); RemoveFile(file_id);
} }
bool Manager::RemoveFile(const string& file_id) bool Manager::RemoveFile(const FileID& file_id)
{ {
FileMap::iterator it = file_map.find(file_id); IDMap::iterator it = id_map.find(file_id);
if ( it == file_map.end() ) return false; if ( it == id_map.end() ) return false;
if ( ! str_map.erase(it->second->Unique()) )
reporter->Error("No string mapping for file ID %s", file_id.c_str());
delete it->second; delete it->second;
file_map.erase(it); id_map.erase(it);
return true; return true;
} }

View file

@ -10,6 +10,7 @@
#include "Info.h" #include "Info.h"
#include "InfoTimer.h" #include "InfoTimer.h"
#include "FileID.h"
namespace file_analysis { namespace file_analysis {
@ -31,58 +32,58 @@ public:
/** /**
* Pass in non-sequential file data. * Pass in non-sequential file data.
*/ */
void DataIn(const string& file_id, const u_char* data, uint64 len, void DataIn(const string& unique, const u_char* data, uint64 len,
uint64 offset, Connection* conn = 0, uint64 offset, Connection* conn = 0,
const string& protocol = ""); const string& protocol = "");
/** /**
* Pass in sequential file data. * Pass in sequential file data.
*/ */
void DataIn(const string& file_id, const u_char* data, uint64 len, void DataIn(const string& unique, const u_char* data, uint64 len,
Connection* conn = 0, const string& protocol = ""); Connection* conn = 0, const string& protocol = "");
/** /**
* Signal the end of file data. * Signal the end of file data.
*/ */
void EndOfFile(const string& file_id, Connection* conn = 0, void EndOfFile(const string& unique, Connection* conn = 0,
const string& protocol = ""); const string& protocol = "");
/** /**
* Signal a gap in the file data stream. * Signal a gap in the file data stream.
*/ */
void Gap(const string& file_id, uint64 offset, uint64 len, void Gap(const string& unique, uint64 offset, uint64 len,
Connection* conn = 0, const string& protocol = ""); Connection* conn = 0, const string& protocol = "");
/** /**
* Provide the expected number of bytes that comprise a file. * Provide the expected number of bytes that comprise a file.
*/ */
void SetSize(const string& file_id, uint64 size, Connection* conn = 0, void SetSize(const string& unique, uint64 size, Connection* conn = 0,
const string& protocol = ""); const string& protocol = "");
/** /**
* Discard the file_analysis::Info object associated with \a file_id. * Discard the file_analysis::Info object associated with \a file_id.
* @return false if file identifier did not map to anything, else true. * @return false if file identifier did not map to anything, else true.
*/ */
bool RemoveFile(const string& file_id); bool RemoveFile(const FileID& file_id);
/** /**
* If called during \c FileAnalysis::policy evaluation for a * If called during \c FileAnalysis::policy evaluation for a
* \c FileAnalysis::TRIGGER_TIMEOUT, requests deferral of analysis timeout. * \c FileAnalysis::TRIGGER_TIMEOUT, requests deferral of analysis timeout.
*/ */
bool PostponeTimeout(const string& file_id) const; bool PostponeTimeout(const FileID& file_id) const;
/** /**
* Attaches an action to the file identifier. Only one action of a given * Attaches an action to the file identifier. Only one action of a given
* type can be attached per file identifier at a time. * type can be attached per file identifier at a time.
* @return true if the action was attached, else false. * @return true if the action was attached, else false.
*/ */
bool AddAction(const string& file_id, EnumVal* act, RecordVal* args) const; bool AddAction(const FileID& file_id, EnumVal* act, RecordVal* args) const;
/** /**
* Removes an action for a given file identifier. * Removes an action for a given file identifier.
* @return true if the action was removed, else false. * @return true if the action was removed, else false.
*/ */
bool RemoveAction(const string& file_id, EnumVal* act) const; bool RemoveAction(const FileID& file_id, EnumVal* act) const;
/** /**
* Calls the \c FileAnalysis::policy hook. * Calls the \c FileAnalysis::policy hook.
@ -93,29 +94,31 @@ protected:
friend class InfoTimer; friend class InfoTimer;
typedef map<string, Info*> FileMap; typedef map<string, Info*> StrMap;
typedef map<FileID, Info*> IDMap;
/** /**
* @return the Info object mapped to \a file_id. One is created if mapping * @return the Info object mapped to \a unique. One is created if mapping
* doesn't exist. If it did exist, the activity time is refreshed * doesn't exist. If it did exist, the activity time is refreshed
* and connection-related fields of the record value may be updated. * and connection-related fields of the record value may be updated.
*/ */
Info* IDtoInfo(const string& file_id, Connection* conn = 0, Info* GetInfo(const string& unique, Connection* conn = 0,
const string& protocol = ""); const string& protocol = "");
/** /**
* @return the Info object mapped to \a file_id, or a null pointer if no * @return the Info object mapped to \a file_id, or a null pointer if no
* mapping exists. * mapping exists.
*/ */
Info* Lookup(const string& file_id) const; Info* Lookup(const FileID& file_id) const;
/** /**
* Evaluate timeout policy for a file and remove the Info object mapped to * Evaluate timeout policy for a file and remove the Info object mapped to
* \a file_id if needed. * \a file_id if needed.
*/ */
void Timeout(const string& file_id, bool is_terminating = ::terminating); void Timeout(const FileID& file_id, bool is_terminating = ::terminating);
FileMap file_map; /**< Map strings to \c FileAnalysis::Info records. */ StrMap str_map; /**< Map unique strings to \c FileAnalysis::Info records. */
IDMap id_map; /**< Map file IDs to \c FileAnalysis::Info records. */
}; };
} // namespace file_analysis } // namespace file_analysis