Add MD5/SHA1/SHA256 file analysis hashing actions.

This commit is contained in:
Jon Siwek 2013-02-21 21:05:01 -06:00
parent ceb471fb36
commit 85410a7657
11 changed files with 271 additions and 42 deletions

View file

@ -37,6 +37,12 @@ export {
extract_filename: string &optional; extract_filename: string &optional;
}; };
type ActionResults: record {
md5: string &optional;
sha1: string &optional;
sha256: string &optional;
};
## Contains all metadata related to the analysis of a given file, some ## Contains all metadata related to the analysis of a given file, some
## of which is logged. ## of which is logged.
type Info: record { type Info: record {
@ -81,6 +87,9 @@ export {
## The corresponding arguments supplied to each element of *actions*. ## The corresponding arguments supplied to each element of *actions*.
action_args: vector of ActionArgs &default=vector(); action_args: vector of ActionArgs &default=vector();
## Some actions may directly yield results in this record.
action_results: ActionResults;
} &redef; } &redef;
## TODO: document ## TODO: document

View file

@ -454,6 +454,7 @@ set(bro_SRCS
file_analysis/FileID.h file_analysis/FileID.h
file_analysis/Action.h file_analysis/Action.h
file_analysis/Extract.cc file_analysis/Extract.cc
file_analysis/Hash.cc
nb_dns.c nb_dns.c
digest.h digest.h

View file

@ -8,6 +8,7 @@ module FileAnalysis;
type Info: record; type Info: record;
type ActionArgs: record; type ActionArgs: record;
type ActionResults: record;
## An enumeration of possibly-interesting "events" that can occur over ## An enumeration of possibly-interesting "events" that can occur over
## the course of analyzing files. The :bro:see:`FileAnalysis::policy` ## the course of analyzing files. The :bro:see:`FileAnalysis::policy`
@ -57,6 +58,9 @@ enum Trigger %{
enum Action %{ enum Action %{
ACTION_EXTRACT, ACTION_EXTRACT,
ACTION_MD5,
ACTION_SHA1,
ACTION_SHA256,
%} %}
function FileAnalysis::postpone_timeout%(file_id: string%): bool function FileAnalysis::postpone_timeout%(file_id: string%): bool

View file

@ -2,9 +2,12 @@
#define FILE_ANALYSIS_ACTION_H #define FILE_ANALYSIS_ACTION_H
#include "Val.h" #include "Val.h"
#include "NetVar.h"
namespace file_analysis { namespace file_analysis {
typedef BifEnum::FileAnalysis::Action ActionTag;
class Info; class Info;
/** /**
@ -17,29 +20,44 @@ public:
/** /**
* Subclasses may override this to receive file data non-sequentially. * Subclasses may override this to receive file data non-sequentially.
* @return true if the action is still in a valid state to continue
* receiving data/events or false if it's essentially "done".
*/ */
virtual void DeliverChunk(const u_char* data, uint64 len, uint64 offset) {} virtual bool DeliverChunk(const u_char* data, uint64 len, uint64 offset)
{ return true; }
/** /**
* Subclasses may override this to receive file sequentially. * Subclasses may override this to receive file sequentially.
* @return true if the action is still in a valid state to continue
* receiving data/events or false if it's essentially "done".
*/ */
virtual void DeliverStream(const u_char* data, uint64 len) {} virtual bool DeliverStream(const u_char* data, uint64 len)
{ return true; }
/** /**
* Subclasses may override this to specifically handle the end of a file. * Subclasses may override this to specifically handle the end of a file.
* @return true if the action is still in a valid state to continue
* receiving data/events or false if it's essentially "done".
*/ */
virtual void EndOfFile() {} virtual bool EndOfFile()
{ return true; }
/** /**
* Subclasses may override this to handle missing data in a file stream. * Subclasses may override this to handle missing data in a file stream.
* @return true if the action is still in a valid state to continue
* receiving data/events or false if it's essentially "done".
*/ */
virtual void Undelivered(uint64 offset, uint64 len) {} virtual bool Undelivered(uint64 offset, uint64 len)
{ return true; }
ActionTag Tag() const { return tag; }
protected: protected:
Action(Info* arg_info) {} Action(Info* arg_info, ActionTag arg_tag) : info(arg_info), tag(arg_tag) {}
Info* info; Info* info;
ActionTag tag;
}; };
typedef Action* (*ActionInstantiator)(const RecordVal* args, Info* info); typedef Action* (*ActionInstantiator)(const RecordVal* args, Info* info);

View file

@ -6,7 +6,8 @@
using namespace file_analysis; using namespace file_analysis;
Extract::Extract(Info* arg_info, const string& arg_filename) Extract::Extract(Info* arg_info, const string& arg_filename)
: Action(arg_info), filename(arg_filename) : Action(arg_info, BifEnum::FileAnalysis::ACTION_EXTRACT),
filename(arg_filename)
{ {
fd = open(filename.c_str(), O_WRONLY | O_CREAT | O_TRUNC, 0666); fd = open(filename.c_str(), O_WRONLY | O_CREAT | O_TRUNC, 0666);
@ -36,11 +37,12 @@ Action* Extract::Instantiate(const RecordVal* args, Info* info)
return new Extract(info, v->AsString()->CheckString()); return new Extract(info, v->AsString()->CheckString());
} }
void Extract::DeliverChunk(const u_char* data, uint64 len, uint64 offset) bool Extract::DeliverChunk(const u_char* data, uint64 len, uint64 offset)
{ {
Action::DeliverChunk(data, len, offset); Action::DeliverChunk(data, len, offset);
if ( ! fd ) return; if ( ! fd ) return false;
safe_pwrite(fd, data, len, offset); safe_pwrite(fd, data, len, offset);
return true;
} }

View file

@ -5,20 +5,21 @@
#include "Val.h" #include "Val.h"
#include "Info.h" #include "Info.h"
#include "Action.h"
namespace file_analysis { namespace file_analysis {
/** /**
* An action to simply extract files to disk. * An action to simply extract files to disk.
*/ */
class Extract : Action { class Extract : public Action {
public: public:
static Action* Instantiate(const RecordVal* args, Info* info); static Action* Instantiate(const RecordVal* args, Info* info);
~Extract(); virtual ~Extract();
virtual void DeliverChunk(const u_char* data, uint64 len, uint64 offset); virtual bool DeliverChunk(const u_char* data, uint64 len, uint64 offset);
protected: protected:

54
src/file_analysis/Hash.cc Normal file
View file

@ -0,0 +1,54 @@
#include <string>
#include "Hash.h"
#include "util.h"
using namespace file_analysis;
Hash::Hash(Info* arg_info, ActionTag tag, HashVal* hv)
: Action(arg_info, tag), hash(hv)
{
hash->Init();
}
Hash::~Hash()
{
// maybe it's all there...
Finalize();
delete hash;
}
bool Hash::DeliverStream(const u_char* data, uint64 len)
{
Action::DeliverStream(data, len);
if ( ! hash->IsValid() ) return false;
hash->Feed(data, len);
return true;
}
bool Hash::EndOfFile()
{
Action::EndOfFile();
Finalize();
return false;
}
bool Hash::Undelivered(uint64 offset, uint64 len)
{
return false;
}
void Hash::Finalize()
{
if ( ! hash->IsValid() ) return;
StringVal* sv = hash->Get();
int i = GetResultFieldOffset();
if ( i < 0 )
reporter->InternalError("Hash Action result field not found");
info->Results()->Assign(i, sv);
}

88
src/file_analysis/Hash.h Normal file
View file

@ -0,0 +1,88 @@
#ifndef FILE_ANALYSIS_HASH_H
#define FILE_ANALYSIS_HASH_H
#include <string>
#include "Val.h"
#include "OpaqueVal.h"
#include "Info.h"
#include "Action.h"
namespace file_analysis {
/**
* An action to produce a hash of file contents.
*/
class Hash : public Action {
public:
virtual ~Hash();
virtual bool DeliverStream(const u_char* data, uint64 len);
virtual bool EndOfFile();
virtual bool Undelivered(uint64 offset, uint64 len);
protected:
Hash(Info* arg_info, ActionTag arg_tag, HashVal* hv);
void Finalize();
virtual int GetResultFieldOffset() const = 0;
HashVal* hash;
};
class MD5 : public Hash {
public:
static Action* Instantiate(const RecordVal* args, Info* info)
{ return new MD5(info); }
protected:
MD5(Info* arg_info)
: Hash(arg_info, BifEnum::FileAnalysis::ACTION_MD5, new MD5Val()) {}
virtual int GetResultFieldOffset() const
{ return BifType::Record::FileAnalysis::ActionResults->
FieldOffset("md5"); }
};
class SHA1 : public Hash {
public:
static Action* Instantiate(const RecordVal* args, Info* info)
{ return new SHA1(info); }
protected:
SHA1(Info* arg_info)
: Hash(arg_info, BifEnum::FileAnalysis::ACTION_SHA1, new SHA1Val()) {}
virtual int GetResultFieldOffset() const
{ return BifType::Record::FileAnalysis::ActionResults->
FieldOffset("sha1"); }
};
class SHA256 : public Hash {
public:
static Action* Instantiate(const RecordVal* args, Info* info)
{ return new SHA256(info); }
protected:
SHA256(Info* arg_info)
: Hash(arg_info, BifEnum::FileAnalysis::ACTION_SHA256, new SHA256Val()) {}
virtual int GetResultFieldOffset() const
{ return BifType::Record::FileAnalysis::ActionResults->
FieldOffset("sha256"); }
};
} // namespace file_analysis
#endif

View file

@ -8,12 +8,16 @@
#include "Action.h" #include "Action.h"
#include "Extract.h" #include "Extract.h"
#include "Hash.h"
using namespace file_analysis; using namespace file_analysis;
// keep in order w/ declared enum values in file_analysis.bif // keep in order w/ declared enum values in file_analysis.bif
static ActionInstantiator action_factory[] = { static ActionInstantiator action_factory[] = {
Extract::Instantiate, Extract::Instantiate,
MD5::Instantiate,
SHA1::Instantiate,
SHA256::Instantiate,
}; };
static TableVal* empty_conn_id_set() static TableVal* empty_conn_id_set()
@ -53,6 +57,7 @@ int Info::overflow_bytes_idx = -1;
int Info::timeout_interval_idx = -1; int Info::timeout_interval_idx = -1;
int Info::actions_idx = -1; int Info::actions_idx = -1;
int Info::action_args_idx = -1; int Info::action_args_idx = -1;
int Info::action_results_idx = -1;
void Info::InitFieldIndices() void Info::InitFieldIndices()
{ {
@ -69,6 +74,7 @@ void Info::InitFieldIndices()
timeout_interval_idx = Idx("timeout_interval"); timeout_interval_idx = Idx("timeout_interval");
actions_idx = Idx("actions"); actions_idx = Idx("actions");
action_args_idx = Idx("action_args"); action_args_idx = Idx("action_args");
action_results_idx = Idx("action_results");
} }
Info::Info(const string& unique, Connection* conn, const string& protocol) Info::Info(const string& unique, Connection* conn, const string& protocol)
@ -149,6 +155,11 @@ double Info::TimeoutInterval() const
return LookupFieldDefaultInterval(timeout_interval_idx); return LookupFieldDefaultInterval(timeout_interval_idx);
} }
RecordVal* Info::Results() const
{
return val->Lookup(action_results_idx)->AsRecordVal();
}
void Info::IncrementByteCount(uint64 size, int field_idx) void Info::IncrementByteCount(uint64 size, int field_idx)
{ {
uint64 old = LookupFieldDefaultCount(field_idx); uint64 old = LookupFieldDefaultCount(field_idx);
@ -174,22 +185,25 @@ void Info::ScheduleInactivityTimer() const
timer_mgr->Add(new InfoTimer(network_time, file_id, TimeoutInterval())); timer_mgr->Add(new InfoTimer(network_time, file_id, TimeoutInterval()));
} }
bool Info::AddAction(EnumVal* act, RecordVal* args) bool Info::AddAction(ActionTag act, RecordVal* args)
{ {
if ( actions.find(act->AsEnum()) != actions.end() ) return false; if ( actions.find(act) != actions.end() ) return false;
Action* a = action_factory[act->AsEnum()](args, this); ActionTag tag = static_cast<ActionTag>(act);
Action* a = action_factory[act](args, this);
if ( ! a ) return false; if ( ! a ) return false;
DBG_LOG(DBG_FILE_ANALYSIS, "Add action %d for file id %s", act->AsEnum(), DBG_LOG(DBG_FILE_ANALYSIS, "Add action %d for file id %s", act,
file_id.c_str()); file_id.c_str());
actions[act->AsEnum()] = a; actions[act] = a;
VectorVal* av = val->LookupWithDefault(actions_idx)->AsVectorVal(); VectorVal* av = val->LookupWithDefault(actions_idx)->AsVectorVal();
VectorVal* aav = val->LookupWithDefault(action_args_idx)->AsVectorVal(); VectorVal* aav = val->LookupWithDefault(action_args_idx)->AsVectorVal();
av->Assign(av->Size(), act->Ref(), 0); EnumVal* ev = new EnumVal(act, BifType::Enum::FileAnalysis::Action);
av->Assign(av->Size(), ev, 0);
aav->Assign(aav->Size(), args->Ref(), 0); aav->Assign(aav->Size(), args->Ref(), 0);
Unref(av); Unref(av);
@ -198,13 +212,18 @@ bool Info::AddAction(EnumVal* act, RecordVal* args)
return true; return true;
} }
bool Info::RemoveAction(EnumVal* act) bool Info::RemoveAction(ActionTag act)
{ {
ActionMap::iterator it = actions.find(act->AsEnum()); ActionMap::iterator it = actions.find(act);
if ( it == actions.end() ) return false; if ( it == actions.end() ) return false;
DBG_LOG(DBG_FILE_ANALYSIS, "Remove action %d for file id %s", act->AsEnum(), return RemoveAction(it);
}
bool Info::RemoveAction(const ActionMap::iterator& it)
{
DBG_LOG(DBG_FILE_ANALYSIS, "Remove action %d for file id %s", it->first,
file_id.c_str()); file_id.c_str());
delete it->second; delete it->second;
actions.erase(it); actions.erase(it);
@ -213,12 +232,18 @@ bool Info::RemoveAction(EnumVal* act)
void Info::DataIn(const u_char* data, uint64 len, uint64 offset) void Info::DataIn(const u_char* data, uint64 len, uint64 offset)
{ {
ActionMap::const_iterator it; ActionMap::iterator it = actions.begin();
for ( it = actions.begin(); it != actions.end(); ++it ) while ( it != actions.end() )
it->second->DeliverChunk(data, len, offset); if ( ! it->second->DeliverChunk(data, len, offset) )
RemoveAction(it++);
else
++it;
// TODO: check reassembly requirement based on buffer size in record // TODO: check reassembly requirement based on buffer size in record
if ( ! need_reassembly ) return; if ( need_reassembly )
{
// TODO
}
// TODO: reassembly stuff, possibly having to deliver chunks if buffer full // TODO: reassembly stuff, possibly having to deliver chunks if buffer full
// and incrememt overflow bytes // and incrememt overflow bytes
@ -228,13 +253,22 @@ void Info::DataIn(const u_char* data, uint64 len, uint64 offset)
void Info::DataIn(const u_char* data, uint64 len) void Info::DataIn(const u_char* data, uint64 len)
{ {
ActionMap::const_iterator it; ActionMap::iterator it = actions.begin();
for ( it = actions.begin(); it != actions.end(); ++it ) while ( it != actions.end() )
{ {
it->second->DeliverStream(data, len); if ( ! it->second->DeliverStream(data, len) )
{
RemoveAction(it++);
continue;
}
uint64 offset = LookupFieldDefaultCount(seen_bytes_idx) + uint64 offset = LookupFieldDefaultCount(seen_bytes_idx) +
LookupFieldDefaultCount(missing_bytes_idx); LookupFieldDefaultCount(missing_bytes_idx);
it->second->DeliverChunk(data, len, offset);
if ( ! it->second->DeliverChunk(data, len, offset) )
RemoveAction(it++);
else
++it;
} }
IncrementByteCount(len, seen_bytes_idx); IncrementByteCount(len, seen_bytes_idx);
@ -242,16 +276,22 @@ void Info::DataIn(const u_char* data, uint64 len)
void Info::EndOfFile() void Info::EndOfFile()
{ {
ActionMap::const_iterator it; ActionMap::iterator it = actions.begin();
for ( it = actions.begin(); it != actions.end(); ++it ) while ( it != actions.end() )
it->second->EndOfFile(); if ( ! it->second->EndOfFile() )
RemoveAction(it++);
else
++it;
} }
void Info::Gap(uint64 offset, uint64 len) void Info::Gap(uint64 offset, uint64 len)
{ {
ActionMap::const_iterator it; ActionMap::iterator it = actions.begin();
for ( it = actions.begin(); it != actions.end(); ++it ) while ( it != actions.end() )
it->second->Undelivered(offset, len); if ( ! it->second->Undelivered(offset, len) )
RemoveAction(it++);
else
++it;
IncrementByteCount(len, missing_bytes_idx); IncrementByteCount(len, missing_bytes_idx);
} }

View file

@ -29,6 +29,11 @@ public:
*/ */
FileID GetFileID() const { return file_id; } FileID GetFileID() const { return file_id; }
/**
* @return record val of the "action_results" field from #val record.
*/
RecordVal* Results() const;
/** /**
* @return the string which uniquely identifies the file. * @return the string which uniquely identifies the file.
*/ */
@ -67,13 +72,13 @@ public:
* Attaches an action. Only one action per type can be attached at a time. * Attaches an action. Only one action per type can be attached at a time.
* @return true if the action was attached, else false. * @return true if the action was attached, else false.
*/ */
bool AddAction(EnumVal* act, RecordVal* args); bool AddAction(ActionTag act, RecordVal* args);
/** /**
* Removes an action. * Removes an action.
* @return true if the action was removed, else false. * @return true if the action was removed, else false.
*/ */
bool RemoveAction(EnumVal* act); bool RemoveAction(ActionTag act);
/** /**
* Pass in non-sequential data and deliver to attached actions/analyzers. * Pass in non-sequential data and deliver to attached actions/analyzers.
@ -99,6 +104,8 @@ protected:
friend class Manager; friend class Manager;
typedef map<ActionTag, Action*> ActionMap;
/** /**
* Constructor; only file_analysis::Manager should be creating these. * Constructor; only file_analysis::Manager should be creating these.
*/ */
@ -128,16 +135,19 @@ protected:
*/ */
double LookupFieldDefaultInterval(int idx) const; double LookupFieldDefaultInterval(int idx) const;
/**
* Removes an action.
* @return true if the action was removed, else false.
*/
bool RemoveAction(const ActionMap::iterator& it);
FileID file_id; /**< A pretty hash that likely identifies file*/ FileID file_id; /**< A pretty hash that likely identifies file*/
string unique; /**< A string that uniquely identifies file */ string unique; /**< A string that uniquely identifies file */
RecordVal* val; /**< \c FileAnalysis::Info from script layer. */ RecordVal* val; /**< \c FileAnalysis::Info from script layer. */
double last_activity_time; /**< Time of last activity. */ double last_activity_time; /**< Time of last activity. */
bool postpone_timeout; /**< Whether postponing timeout is requested. */ bool postpone_timeout; /**< Whether postponing timeout is requested. */
bool need_reassembly; /**< Whether file stream reassembly is needed. */ bool need_reassembly; /**< Whether file stream reassembly is needed. */
ActionMap actions; /**< Actions/analysis to perform on file. */
typedef map<int, Action*> ActionMap;
ActionMap actions;
/** /**
* @return the field offset in #val record corresponding to \a field_name. * @return the field offset in #val record corresponding to \a field_name.
@ -161,6 +171,7 @@ protected:
static int timeout_interval_idx; static int timeout_interval_idx;
static int actions_idx; static int actions_idx;
static int action_args_idx; static int action_args_idx;
static int action_results_idx;
}; };
} // namespace file_analysis } // namespace file_analysis

View file

@ -3,6 +3,7 @@
#include "Manager.h" #include "Manager.h"
#include "Info.h" #include "Info.h"
#include "Action.h"
using namespace file_analysis; using namespace file_analysis;
@ -106,7 +107,7 @@ bool Manager::AddAction(const FileID& file_id, EnumVal* act,
if ( ! info ) return false; if ( ! info ) return false;
return info->AddAction(act, args); return info->AddAction(static_cast<ActionTag>(act->AsEnum()), args);
} }
bool Manager::RemoveAction(const FileID& file_id, EnumVal* act) const bool Manager::RemoveAction(const FileID& file_id, EnumVal* act) const
@ -115,7 +116,7 @@ bool Manager::RemoveAction(const FileID& file_id, EnumVal* act) const
if ( ! info ) return false; if ( ! info ) return false;
return info->RemoveAction(act); return info->RemoveAction(static_cast<ActionTag>(act->AsEnum()));
} }
Info* Manager::GetInfo(const string& unique, Connection* conn, Info* Manager::GetInfo(const string& unique, Connection* conn,