Merge remote-tracking branch 'origin/topic/jsiwek/file-analysis' into topic/seth/file-analysis-exe-analyzer

Conflicts:
	src/file_analysis/ActionSet.cc
	src/types.bif
This commit is contained in:
Seth Hall 2013-04-24 13:01:39 -04:00
commit d72980828f
141 changed files with 3754 additions and 888 deletions

View file

@ -1,101 +0,0 @@
#ifndef FILE_ANALYSIS_ACTION_H
#define FILE_ANALYSIS_ACTION_H
#include "Val.h"
#include "NetVar.h"
namespace file_analysis {
typedef BifEnum::FileAnalysis::Action ActionTag;
class File;
/**
* Base class for actions that can be attached to a file_analysis::File object.
*/
class Action {
public:
virtual ~Action()
{
DBG_LOG(DBG_FILE_ANALYSIS, "Destroy action %d", tag);
Unref(args);
}
/**
* Subclasses may override this to receive file data non-sequentially.
* @return true if the action is still in a valid state to continue
* receiving data/events or false if it's essentially "done".
*/
virtual bool DeliverChunk(const u_char* data, uint64 len, uint64 offset)
{ return true; }
/**
* Subclasses may override this to receive file sequentially.
* @return true if the action is still in a valid state to continue
* receiving data/events or false if it's essentially "done".
*/
virtual bool DeliverStream(const u_char* data, uint64 len)
{ return true; }
/**
* Subclasses may override this to specifically handle an EOF signal,
* which means no more data is going to be incoming and the action/analyzer
* may be deleted/cleaned up soon.
* @return true if the action is still in a valid state to continue
* receiving data/events or false if it's essentially "done".
*/
virtual bool EndOfFile()
{ return true; }
/**
* Subclasses may override this to handle missing data in a file stream.
* @return true if the action is still in a valid state to continue
* receiving data/events or false if it's essentially "done".
*/
virtual bool Undelivered(uint64 offset, uint64 len)
{ return true; }
/**
* @return the action type enum value.
*/
ActionTag Tag() const { return tag; }
/**
* @return the ActionArgs associated with the aciton.
*/
RecordVal* Args() const { return args; }
/**
* @return the file_analysis::File object to which the action is attached.
*/
File* GetFile() const { return file; }
/**
* @return the action tag equivalent of the 'act' field from the ActionArgs
* value \a args.
*/
static ActionTag ArgsTag(const RecordVal* args)
{
using BifType::Record::FileAnalysis::ActionArgs;
return static_cast<ActionTag>(
args->Lookup(ActionArgs->FieldOffset("act"))->AsEnum());
}
protected:
Action(RecordVal* arg_args, File* arg_file)
: tag(Action::ArgsTag(arg_args)), args(arg_args->Ref()->AsRecordVal()),
file(arg_file)
{}
ActionTag tag;
RecordVal* args;
File* file;
};
typedef Action* (*ActionInstantiator)(RecordVal* args, File* file);
} // namespace file_analysis
#endif

View file

@ -1,189 +0,0 @@
#include "ActionSet.h"
#include "File.h"
#include "Action.h"
#include "Extract.h"
#include "DataEvent.h"
#include "Hash.h"
#include "analyzers/PE.h"
using namespace file_analysis;
// keep in order w/ declared enum values in file_analysis.bif
static ActionInstantiator action_factory[] = {
file_analysis::Extract::Instantiate,
file_analysis::MD5::Instantiate,
file_analysis::SHA1::Instantiate,
file_analysis::SHA256::Instantiate,
file_analysis::DataEvent::Instantiate,
file_analysis::PE_Analyzer::Instantiate,
};
static void action_del_func(void* v)
{
delete (Action*) v;
}
ActionSet::ActionSet(File* arg_file) : file(arg_file)
{
TypeList* t = new TypeList();
t->Append(BifType::Record::FileAnalysis::ActionArgs->Ref());
action_hash = new CompositeHash(t);
Unref(t);
action_map.SetDeleteFunc(action_del_func);
}
ActionSet::~ActionSet()
{
while ( ! mod_queue.empty() )
{
Modification* mod = mod_queue.front();
mod->Abort();
delete mod;
mod_queue.pop();
}
delete action_hash;
}
bool ActionSet::AddAction(RecordVal* args)
{
HashKey* key = GetKey(args);
if ( action_map.Lookup(key) )
{
DBG_LOG(DBG_FILE_ANALYSIS, "Instantiate action %d skipped for file id"
" %s: already exists", Action::ArgsTag(args),
file->GetID().c_str());
delete key;
return true;
}
Action* act = InstantiateAction(args);
if ( ! act )
{
delete key;
return false;
}
InsertAction(act, key);
return true;
}
bool ActionSet::QueueAddAction(RecordVal* args)
{
HashKey* key = GetKey(args);
Action* act = InstantiateAction(args);
if ( ! act )
{
delete key;
return false;
}
mod_queue.push(new Add(act, key));
return true;
}
bool ActionSet::Add::Perform(ActionSet* set)
{
if ( set->action_map.Lookup(key) )
{
DBG_LOG(DBG_FILE_ANALYSIS, "Add action %d skipped for file id"
" %s: already exists", act->Tag(),
act->GetFile()->GetID().c_str());
Abort();
return true;
}
set->InsertAction(act, key);
return true;
}
bool ActionSet::RemoveAction(const RecordVal* args)
{
return RemoveAction(Action::ArgsTag(args), GetKey(args));
}
bool ActionSet::RemoveAction(ActionTag tag, HashKey* key)
{
Action* act = (Action*) action_map.Remove(key);
delete key;
if ( ! act )
{
DBG_LOG(DBG_FILE_ANALYSIS, "Skip remove action %d for file id %s",
tag, file->GetID().c_str());
return false;
}
DBG_LOG(DBG_FILE_ANALYSIS, "Remove action %d for file id %s", act->Tag(),
file->GetID().c_str());
delete act;
return true;
}
bool ActionSet::QueueRemoveAction(const RecordVal* args)
{
HashKey* key = GetKey(args);
ActionTag tag = Action::ArgsTag(args);
mod_queue.push(new Remove(tag, key));
return action_map.Lookup(key);
}
bool ActionSet::Remove::Perform(ActionSet* set)
{
return set->RemoveAction(tag, key);
}
HashKey* ActionSet::GetKey(const RecordVal* args) const
{
HashKey* key = action_hash->ComputeHash(args, 1);
if ( ! key )
reporter->InternalError("ActionArgs type mismatch");
return key;
}
Action* ActionSet::InstantiateAction(RecordVal* args) const
{
Action* act = action_factory[Action::ArgsTag(args)](args, file);
if ( ! act )
{
DBG_LOG(DBG_FILE_ANALYSIS, "Instantiate action %d failed for file id",
" %s", Action::ArgsTag(args), file->GetID().c_str());
return 0;
}
return act;
}
void ActionSet::InsertAction(Action* act, HashKey* key)
{
DBG_LOG(DBG_FILE_ANALYSIS, "Add action %d for file id %s", act->Tag(),
file->GetID().c_str());
action_map.Insert(key, act);
delete key;
}
void ActionSet::DrainModifications()
{
if ( mod_queue.empty() ) return;
DBG_LOG(DBG_FILE_ANALYSIS, "Start flushing action mod queue of file id %s",
file->GetID().c_str());
do
{
Modification* mod = mod_queue.front();
mod->Perform(this);
delete mod;
mod_queue.pop();
} while ( ! mod_queue.empty() );
DBG_LOG(DBG_FILE_ANALYSIS, "End flushing action mod queue of file id %s",
file->GetID().c_str());
}

View file

@ -1,109 +0,0 @@
#ifndef FILE_ANALYSIS_ACTIONSET_H
#define FILE_ANALYSIS_ACTIONSET_H
#include <queue>
#include "Action.h"
#include "Dict.h"
#include "CompHash.h"
#include "Val.h"
namespace file_analysis {
class File;
declare(PDict,Action);
/**
* A set of file analysis actions indexed by ActionArgs. Allows queueing
* of addition/removals so that those modifications can happen at well-defined
* times (e.g. to make sure a loop iterator isn't invalidated).
*/
class ActionSet {
public:
ActionSet(File* arg_file);
~ActionSet();
/**
* @return true if action was instantiated/attached, else false.
*/
bool AddAction(RecordVal* args);
/**
* @return true if action was able to be instantiated, else false.
*/
bool QueueAddAction(RecordVal* args);
/**
* @return false if action didn't exist and so wasn't removed, else true.
*/
bool RemoveAction(const RecordVal* args);
/**
* @return true if action exists at time of call, else false;
*/
bool QueueRemoveAction(const RecordVal* args);
/**
* Perform all queued modifications to the currently active actions.
*/
void DrainModifications();
IterCookie* InitForIteration() const
{ return action_map.InitForIteration(); }
Action* NextEntry(IterCookie* c)
{ return action_map.NextEntry(c); }
protected:
HashKey* GetKey(const RecordVal* args) const;
Action* InstantiateAction(RecordVal* args) const;
void InsertAction(Action* act, HashKey* key);
bool RemoveAction(ActionTag tag, HashKey* key);
File* file;
CompositeHash* action_hash; /**< ActionArgs hashes Action map lookup. */
PDict(Action) action_map; /**< Actions indexed by ActionArgs. */
class Modification {
public:
virtual ~Modification() {}
virtual bool Perform(ActionSet* set) = 0;
virtual void Abort() = 0;
};
class Add : public Modification {
public:
Add(Action* arg_act, HashKey* arg_key)
: Modification(), act(arg_act), key(arg_key) {}
virtual ~Add() {}
virtual bool Perform(ActionSet* set);
virtual void Abort() { delete act; delete key; }
protected:
Action* act;
HashKey* key;
};
class Remove : public Modification {
public:
Remove(ActionTag arg_tag, HashKey* arg_key)
: Modification(), tag(arg_tag), key(arg_key) {}
virtual ~Remove() {}
virtual bool Perform(ActionSet* set);
virtual void Abort() { delete key; }
protected:
ActionTag tag;
HashKey* key;
};
typedef queue<Modification*> ModQueue;
ModQueue mod_queue;
};
} // namespace file_analysiss
#endif

View file

@ -0,0 +1,103 @@
#ifndef FILE_ANALYSIS_ANALYZER_H
#define FILE_ANALYSIS_ANALYZER_H
#include "Val.h"
#include "NetVar.h"
namespace file_analysis {
typedef BifEnum::FileAnalysis::Analyzer FA_Tag;
class File;
/**
* Base class for analyzers that can be attached to file_analysis::File objects.
*/
class Analyzer {
public:
virtual ~Analyzer()
{
DBG_LOG(DBG_FILE_ANALYSIS, "Destroy file analyzer %d", tag);
Unref(args);
}
/**
* Subclasses may override this to receive file data non-sequentially.
* @return true if the analyzer is still in a valid state to continue
* receiving data/events or false if it's essentially "done".
*/
virtual bool DeliverChunk(const u_char* data, uint64 len, uint64 offset)
{ return true; }
/**
* Subclasses may override this to receive file sequentially.
* @return true if the analyzer is still in a valid state to continue
* receiving data/events or false if it's essentially "done".
*/
virtual bool DeliverStream(const u_char* data, uint64 len)
{ return true; }
/**
* Subclasses may override this to specifically handle an EOF signal,
* which means no more data is going to be incoming and the analyzer
* may be deleted/cleaned up soon.
* @return true if the analyzer is still in a valid state to continue
* receiving data/events or false if it's essentially "done".
*/
virtual bool EndOfFile()
{ return true; }
/**
* Subclasses may override this to handle missing data in a file stream.
* @return true if the analyzer is still in a valid state to continue
* receiving data/events or false if it's essentially "done".
*/
virtual bool Undelivered(uint64 offset, uint64 len)
{ return true; }
/**
* @return the analyzer type enum value.
*/
FA_Tag Tag() const { return tag; }
/**
* @return the AnalyzerArgs associated with the analyzer.
*/
RecordVal* Args() const { return args; }
/**
* @return the file_analysis::File object to which the analyzer is attached.
*/
File* GetFile() const { return file; }
/**
* @return the analyzer tag equivalent of the 'tag' field from the
* AnalyzerArgs value \a args.
*/
static FA_Tag ArgsTag(const RecordVal* args)
{
using BifType::Record::FileAnalysis::AnalyzerArgs;
return static_cast<FA_Tag>(
args->Lookup(AnalyzerArgs->FieldOffset("tag"))->AsEnum());
}
protected:
Analyzer(RecordVal* arg_args, File* arg_file)
: tag(file_analysis::Analyzer::ArgsTag(arg_args)),
args(arg_args->Ref()->AsRecordVal()),
file(arg_file)
{}
FA_Tag tag;
RecordVal* args;
File* file;
};
typedef file_analysis::Analyzer* (*AnalyzerInstantiator)(RecordVal* args,
File* file);
} // namespace file_analysis
#endif

View file

@ -0,0 +1,188 @@
#include "AnalyzerSet.h"
#include "File.h"
#include "Analyzer.h"
#include "Extract.h"
#include "DataEvent.h"
#include "Hash.h"
using namespace file_analysis;
// keep in order w/ declared enum values in file_analysis.bif
static AnalyzerInstantiator analyzer_factory[] = {
file_analysis::Extract::Instantiate,
file_analysis::MD5::Instantiate,
file_analysis::SHA1::Instantiate,
file_analysis::SHA256::Instantiate,
file_analysis::DataEvent::Instantiate,
};
static void analyzer_del_func(void* v)
{
delete (file_analysis::Analyzer*) v;
}
AnalyzerSet::AnalyzerSet(File* arg_file) : file(arg_file)
{
TypeList* t = new TypeList();
t->Append(BifType::Record::FileAnalysis::AnalyzerArgs->Ref());
analyzer_hash = new CompositeHash(t);
Unref(t);
analyzer_map.SetDeleteFunc(analyzer_del_func);
}
AnalyzerSet::~AnalyzerSet()
{
while ( ! mod_queue.empty() )
{
Modification* mod = mod_queue.front();
mod->Abort();
delete mod;
mod_queue.pop();
}
delete analyzer_hash;
}
bool AnalyzerSet::Add(RecordVal* args)
{
HashKey* key = GetKey(args);
if ( analyzer_map.Lookup(key) )
{
DBG_LOG(DBG_FILE_ANALYSIS, "Instantiate analyzer %d skipped for file id"
" %s: already exists", file_analysis::Analyzer::ArgsTag(args),
file->GetID().c_str());
delete key;
return true;
}
file_analysis::Analyzer* a = InstantiateAnalyzer(args);
if ( ! a )
{
delete key;
return false;
}
Insert(a, key);
return true;
}
bool AnalyzerSet::QueueAdd(RecordVal* args)
{
HashKey* key = GetKey(args);
file_analysis::Analyzer* a = InstantiateAnalyzer(args);
if ( ! a )
{
delete key;
return false;
}
mod_queue.push(new AddMod(a, key));
return true;
}
bool AnalyzerSet::AddMod::Perform(AnalyzerSet* set)
{
if ( set->analyzer_map.Lookup(key) )
{
DBG_LOG(DBG_FILE_ANALYSIS, "Add analyzer %d skipped for file id"
" %s: already exists", a->Tag(), a->GetFile()->GetID().c_str());
Abort();
return true;
}
set->Insert(a, key);
return true;
}
bool AnalyzerSet::Remove(const RecordVal* args)
{
return Remove(file_analysis::Analyzer::ArgsTag(args), GetKey(args));
}
bool AnalyzerSet::Remove(FA_Tag tag, HashKey* key)
{
file_analysis::Analyzer* a =
(file_analysis::Analyzer*) analyzer_map.Remove(key);
delete key;
if ( ! a )
{
DBG_LOG(DBG_FILE_ANALYSIS, "Skip remove analyzer %d for file id %s",
tag, file->GetID().c_str());
return false;
}
DBG_LOG(DBG_FILE_ANALYSIS, "Remove analyzer %d for file id %s", a->Tag(),
file->GetID().c_str());
delete a;
return true;
}
bool AnalyzerSet::QueueRemove(const RecordVal* args)
{
HashKey* key = GetKey(args);
FA_Tag tag = file_analysis::Analyzer::ArgsTag(args);
mod_queue.push(new RemoveMod(tag, key));
return analyzer_map.Lookup(key);
}
bool AnalyzerSet::RemoveMod::Perform(AnalyzerSet* set)
{
return set->Remove(tag, key);
}
HashKey* AnalyzerSet::GetKey(const RecordVal* args) const
{
HashKey* key = analyzer_hash->ComputeHash(args, 1);
if ( ! key )
reporter->InternalError("AnalyzerArgs type mismatch");
return key;
}
file_analysis::Analyzer* AnalyzerSet::InstantiateAnalyzer(RecordVal* args) const
{
file_analysis::Analyzer* a =
analyzer_factory[file_analysis::Analyzer::ArgsTag(args)](args, file);
if ( ! a )
{
DBG_LOG(DBG_FILE_ANALYSIS, "Instantiate analyzer %d failed for file id",
" %s", file_analysis::Analyzer::ArgsTag(args),
file->GetID().c_str());
return 0;
}
return a;
}
void AnalyzerSet::Insert(file_analysis::Analyzer* a, HashKey* key)
{
DBG_LOG(DBG_FILE_ANALYSIS, "Add analyzer %d for file id %s", a->Tag(),
file->GetID().c_str());
analyzer_map.Insert(key, a);
delete key;
}
void AnalyzerSet::DrainModifications()
{
if ( mod_queue.empty() ) return;
DBG_LOG(DBG_FILE_ANALYSIS, "Start analyzer mod queue flush of file id %s",
file->GetID().c_str());
do
{
Modification* mod = mod_queue.front();
mod->Perform(this);
delete mod;
mod_queue.pop();
} while ( ! mod_queue.empty() );
DBG_LOG(DBG_FILE_ANALYSIS, "End flushing analyzer mod queue of file id %s",
file->GetID().c_str());
}

View file

@ -0,0 +1,109 @@
#ifndef FILE_ANALYSIS_ANALYZERSET_H
#define FILE_ANALYSIS_ANALYZERSET_H
#include <queue>
#include "Analyzer.h"
#include "Dict.h"
#include "CompHash.h"
#include "Val.h"
namespace file_analysis {
class File;
declare(PDict,Analyzer);
/**
* A set of file analysis analyzers indexed by AnalyzerArgs. Allows queueing
* of addition/removals so that those modifications can happen at well-defined
* times (e.g. to make sure a loop iterator isn't invalidated).
*/
class AnalyzerSet {
public:
AnalyzerSet(File* arg_file);
~AnalyzerSet();
/**
* @return true if analyzer was instantiated/attached, else false.
*/
bool Add(RecordVal* args);
/**
* @return true if analyzer was able to be instantiated, else false.
*/
bool QueueAdd(RecordVal* args);
/**
* @return false if analyzer didn't exist and so wasn't removed, else true.
*/
bool Remove(const RecordVal* args);
/**
* @return true if analyzer exists at time of call, else false;
*/
bool QueueRemove(const RecordVal* args);
/**
* Perform all queued modifications to the currently active analyzers.
*/
void DrainModifications();
IterCookie* InitForIteration() const
{ return analyzer_map.InitForIteration(); }
file_analysis::Analyzer* NextEntry(IterCookie* c)
{ return analyzer_map.NextEntry(c); }
protected:
HashKey* GetKey(const RecordVal* args) const;
file_analysis::Analyzer* InstantiateAnalyzer(RecordVal* args) const;
void Insert(file_analysis::Analyzer* a, HashKey* key);
bool Remove(FA_Tag tag, HashKey* key);
File* file;
CompositeHash* analyzer_hash; /**< AnalyzerArgs hashes. */
PDict(file_analysis::Analyzer) analyzer_map; /**< Indexed by AnalyzerArgs. */
class Modification {
public:
virtual ~Modification() {}
virtual bool Perform(AnalyzerSet* set) = 0;
virtual void Abort() = 0;
};
class AddMod : public Modification {
public:
AddMod(file_analysis::Analyzer* arg_a, HashKey* arg_key)
: Modification(), a(arg_a), key(arg_key) {}
virtual ~AddMod() {}
virtual bool Perform(AnalyzerSet* set);
virtual void Abort() { delete a; delete key; }
protected:
file_analysis::Analyzer* a;
HashKey* key;
};
class RemoveMod : public Modification {
public:
RemoveMod(FA_Tag arg_tag, HashKey* arg_key)
: Modification(), tag(arg_tag), key(arg_key) {}
virtual ~RemoveMod() {}
virtual bool Perform(AnalyzerSet* set);
virtual void Abort() { delete key; }
protected:
FA_Tag tag;
HashKey* key;
};
typedef queue<Modification*> ModQueue;
ModQueue mod_queue;
};
} // namespace file_analysiss
#endif

View file

@ -9,18 +9,18 @@ using namespace file_analysis;
DataEvent::DataEvent(RecordVal* args, File* file,
EventHandlerPtr ce, EventHandlerPtr se)
: Action(args, file), chunk_event(ce), stream_event(se)
: file_analysis::Analyzer(args, file), chunk_event(ce), stream_event(se)
{
}
Action* DataEvent::Instantiate(RecordVal* args, File* file)
file_analysis::Analyzer* DataEvent::Instantiate(RecordVal* args, File* file)
{
using BifType::Record::FileAnalysis::ActionArgs;
using BifType::Record::FileAnalysis::AnalyzerArgs;
const char* chunk_field = "chunk_event";
const char* stream_field = "stream_event";
int chunk_off = ActionArgs->FieldOffset(chunk_field);
int stream_off = ActionArgs->FieldOffset(stream_field);
int chunk_off = AnalyzerArgs->FieldOffset(chunk_field);
int stream_off = AnalyzerArgs->FieldOffset(stream_field);
Val* chunk_val = args->Lookup(chunk_off);
Val* stream_val = args->Lookup(stream_off);

View file

@ -5,17 +5,17 @@
#include "Val.h"
#include "File.h"
#include "Action.h"
#include "Analyzer.h"
namespace file_analysis {
/**
* An action to send file data to script-layer events.
* An analyzer to send file data to script-layer events.
*/
class DataEvent : public Action {
class DataEvent : public file_analysis::Analyzer {
public:
static Action* Instantiate(RecordVal* args, File* file);
static file_analysis::Analyzer* Instantiate(RecordVal* args, File* file);
virtual bool DeliverChunk(const u_char* data, uint64 len, uint64 offset);

View file

@ -6,7 +6,7 @@
using namespace file_analysis;
Extract::Extract(RecordVal* args, File* file, const string& arg_filename)
: Action(args, file), filename(arg_filename)
: file_analysis::Analyzer(args, file), filename(arg_filename)
{
fd = open(filename.c_str(), O_WRONLY | O_CREAT | O_TRUNC, 0666);
@ -25,11 +25,11 @@ Extract::~Extract()
safe_close(fd);
}
Action* Extract::Instantiate(RecordVal* args, File* file)
file_analysis::Analyzer* Extract::Instantiate(RecordVal* args, File* file)
{
using BifType::Record::FileAnalysis::ActionArgs;
using BifType::Record::FileAnalysis::AnalyzerArgs;
const char* field = "extract_filename";
Val* v = args->Lookup(ActionArgs->FieldOffset(field));
Val* v = args->Lookup(AnalyzerArgs->FieldOffset(field));
if ( ! v ) return 0;

View file

@ -5,17 +5,17 @@
#include "Val.h"
#include "File.h"
#include "Action.h"
#include "Analyzer.h"
namespace file_analysis {
/**
* An action to simply extract files to disk.
* An analyzer to extract files to disk.
*/
class Extract : public Action {
class Extract : public file_analysis::Analyzer {
public:
static Action* Instantiate(RecordVal* args, File* file);
static file_analysis::Analyzer* Instantiate(RecordVal* args, File* file);
virtual ~Extract();

View file

@ -4,11 +4,12 @@
#include "File.h"
#include "FileTimer.h"
#include "FileID.h"
#include "Analyzer.h"
#include "Manager.h"
#include "Reporter.h"
#include "Val.h"
#include "Type.h"
#include "Analyzer.h"
#include "../Analyzer.h"
#include "Event.h"
using namespace file_analysis;
@ -36,6 +37,7 @@ static RecordVal* get_conn_id_val(const Connection* conn)
int File::id_idx = -1;
int File::parent_id_idx = -1;
int File::source_idx = -1;
int File::is_orig_idx = -1;
int File::conns_idx = -1;
int File::last_active_idx = -1;
int File::seen_bytes_idx = -1;
@ -45,10 +47,8 @@ int File::overflow_bytes_idx = -1;
int File::timeout_interval_idx = -1;
int File::bof_buffer_size_idx = -1;
int File::bof_buffer_idx = -1;
int File::file_type_idx = -1;
int File::mime_type_idx = -1;
magic_t File::magic = 0;
magic_t File::magic_mime = 0;
string File::salt;
@ -60,6 +60,7 @@ void File::StaticInit()
id_idx = Idx("id");
parent_id_idx = Idx("parent_id");
source_idx = Idx("source");
is_orig_idx = Idx("is_orig");
conns_idx = Idx("conns");
last_active_idx = Idx("last_active");
seen_bytes_idx = Idx("seen_bytes");
@ -69,19 +70,18 @@ void File::StaticInit()
timeout_interval_idx = Idx("timeout_interval");
bof_buffer_size_idx = Idx("bof_buffer_size");
bof_buffer_idx = Idx("bof_buffer");
file_type_idx = Idx("file_type");
mime_type_idx = Idx("mime_type");
bro_init_magic(&magic, MAGIC_NONE);
bro_init_magic(&magic_mime, MAGIC_MIME);
salt = BifConst::FileAnalysis::salt->CheckString();
}
File::File(const string& unique, Connection* conn, AnalyzerTag::Tag tag)
File::File(const string& unique, Connection* conn, AnalyzerTag::Tag tag,
bool is_orig)
: id(""), unique(unique), val(0), postpone_timeout(false),
first_chunk(true), missed_bof(false), need_reassembly(false), done(false),
actions(this)
analyzers(this)
{
StaticInit();
@ -101,8 +101,9 @@ File::File(const string& unique, Connection* conn, AnalyzerTag::Tag tag)
if ( conn )
{
// add source and connection fields
val->Assign(source_idx, new StringVal(Analyzer::GetTagName(tag)));
// add source, connection, is_orig fields
val->Assign(source_idx, new StringVal(::Analyzer::GetTagName(tag)));
val->Assign(is_orig_idx, new Val(is_orig, TYPE_BOOL));
UpdateConnectionFields(conn);
}
else
@ -145,9 +146,16 @@ void File::UpdateConnectionFields(Connection* conn)
Val* idx = get_conn_id_val(conn);
if ( ! conns->AsTableVal()->Lookup(idx) )
{
conns->AsTableVal()->Assign(idx, conn->BuildConnVal());
if ( ! is_first )
file_mgr->FileEvent(file_over_new_connection, this);
Val* conn_val = conn->BuildConnVal();
conns->AsTableVal()->Assign(idx, conn_val);
if ( ! is_first && FileEventAvailable(file_over_new_connection) )
{
val_list* vl = new val_list();
vl->append(val->Ref());
vl->append(conn_val->Ref());
FileEvent(file_over_new_connection, vl);
}
}
Unref(idx);
@ -182,6 +190,11 @@ double File::GetTimeoutInterval() const
return LookupFieldDefaultInterval(timeout_interval_idx);
}
void File::SetTimeoutInterval(double interval)
{
val->Assign(timeout_interval_idx, new Val(interval, TYPE_INTERVAL));
}
void File::IncrementByteCount(uint64 size, int field_idx)
{
uint64 old = LookupFieldDefaultCount(field_idx);
@ -207,14 +220,14 @@ void File::ScheduleInactivityTimer() const
timer_mgr->Add(new FileTimer(network_time, id, GetTimeoutInterval()));
}
bool File::AddAction(RecordVal* args)
bool File::AddAnalyzer(RecordVal* args)
{
return done ? false : actions.QueueAddAction(args);
return done ? false : analyzers.QueueAdd(args);
}
bool File::RemoveAction(const RecordVal* args)
bool File::RemoveAnalyzer(const RecordVal* args)
{
return done ? false : actions.QueueRemoveAction(args);
return done ? false : analyzers.QueueRemove(args);
}
bool File::BufferBOF(const u_char* data, uint64 len)
@ -235,18 +248,22 @@ bool File::BufferBOF(const u_char* data, uint64 len)
return true;
}
bool File::DetectTypes(const u_char* data, uint64 len)
bool File::DetectMIME(const u_char* data, uint64 len)
{
const char* desc = bro_magic_buffer(magic, data, len);
const char* mime = bro_magic_buffer(magic_mime, data, len);
if ( desc )
val->Assign(file_type_idx, new StringVal(desc));
if ( mime )
val->Assign(mime_type_idx, new StringVal(mime));
{
const char* mime_end = strchr(mime, ';');
return desc || mime;
if ( mime_end )
// strip off charset
val->Assign(mime_type_idx, new StringVal(mime_end - mime, mime));
else
val->Assign(mime_type_idx, new StringVal(mime));
}
return mime;
}
void File::ReplayBOF()
@ -264,10 +281,9 @@ void File::ReplayBOF()
BroString* bs = concatenate(bof_buffer.chunks);
val->Assign(bof_buffer_idx, new StringVal(bs));
DetectTypes(bs->Bytes(), bs->Len());
DetectMIME(bs->Bytes(), bs->Len());
file_mgr->FileEvent(file_new, this);
mgr.Drain(); // need immediate feedback about actions to add
FileEvent(file_new);
for ( size_t i = 0; i < bof_buffer.chunks.size(); ++i )
DataIn(bof_buffer.chunks[i]->Bytes(), bof_buffer.chunks[i]->Len());
@ -275,28 +291,26 @@ void File::ReplayBOF()
void File::DataIn(const u_char* data, uint64 len, uint64 offset)
{
actions.DrainModifications();
analyzers.DrainModifications();
if ( first_chunk )
{
// TODO: this should all really be delayed until we attempt reassembly
DetectTypes(data, len);
file_mgr->FileEvent(file_new, this);
mgr.Drain(); // need immediate feedback about actions to add
actions.DrainModifications();
DetectMIME(data, len);
FileEvent(file_new);
first_chunk = false;
}
Action* act = 0;
IterCookie* c = actions.InitForIteration();
file_analysis::Analyzer* a = 0;
IterCookie* c = analyzers.InitForIteration();
while ( (act = actions.NextEntry(c)) )
while ( (a = analyzers.NextEntry(c)) )
{
if ( ! act->DeliverChunk(data, len, offset) )
actions.QueueRemoveAction(act->Args());
if ( ! a->DeliverChunk(data, len, offset) )
analyzers.QueueRemove(a->Args());
}
actions.DrainModifications();
analyzers.DrainModifications();
// TODO: check reassembly requirement based on buffer size in record
if ( need_reassembly )
@ -311,38 +325,36 @@ void File::DataIn(const u_char* data, uint64 len, uint64 offset)
void File::DataIn(const u_char* data, uint64 len)
{
actions.DrainModifications();
analyzers.DrainModifications();
if ( BufferBOF(data, len) ) return;
if ( missed_bof )
{
DetectTypes(data, len);
file_mgr->FileEvent(file_new, this);
mgr.Drain(); // need immediate feedback about actions to add
actions.DrainModifications();
DetectMIME(data, len);
FileEvent(file_new);
missed_bof = false;
}
Action* act = 0;
IterCookie* c = actions.InitForIteration();
file_analysis::Analyzer* a = 0;
IterCookie* c = analyzers.InitForIteration();
while ( (act = actions.NextEntry(c)) )
while ( (a = analyzers.NextEntry(c)) )
{
if ( ! act->DeliverStream(data, len) )
if ( ! a->DeliverStream(data, len) )
{
actions.QueueRemoveAction(act->Args());
analyzers.QueueRemove(a->Args());
continue;
}
uint64 offset = LookupFieldDefaultCount(seen_bytes_idx) +
LookupFieldDefaultCount(missing_bytes_idx);
if ( ! act->DeliverChunk(data, len, offset) )
actions.QueueRemoveAction(act->Args());
if ( ! a->DeliverChunk(data, len, offset) )
analyzers.QueueRemove(a->Args());
}
actions.DrainModifications();
analyzers.DrainModifications();
IncrementByteCount(len, seen_bytes_idx);
}
@ -350,46 +362,79 @@ void File::EndOfFile()
{
if ( done ) return;
actions.DrainModifications();
analyzers.DrainModifications();
// Send along anything that's been buffered, but never flushed.
ReplayBOF();
done = true;
Action* act = 0;
IterCookie* c = actions.InitForIteration();
file_analysis::Analyzer* a = 0;
IterCookie* c = analyzers.InitForIteration();
while ( (act = actions.NextEntry(c)) )
while ( (a = analyzers.NextEntry(c)) )
{
if ( ! act->EndOfFile() )
actions.QueueRemoveAction(act->Args());
if ( ! a->EndOfFile() )
analyzers.QueueRemove(a->Args());
}
file_mgr->FileEvent(file_state_remove, this);
FileEvent(file_state_remove);
actions.DrainModifications();
analyzers.DrainModifications();
}
void File::Gap(uint64 offset, uint64 len)
{
actions.DrainModifications();
analyzers.DrainModifications();
// If we were buffering the beginning of the file, a gap means we've got
// as much contiguous stuff at the beginning as possible, so work with that.
ReplayBOF();
Action* act = 0;
IterCookie* c = actions.InitForIteration();
file_analysis::Analyzer* a = 0;
IterCookie* c = analyzers.InitForIteration();
while ( (act = actions.NextEntry(c)) )
while ( (a = analyzers.NextEntry(c)) )
{
if ( ! act->Undelivered(offset, len) )
actions.QueueRemoveAction(act->Args());
if ( ! a->Undelivered(offset, len) )
analyzers.QueueRemove(a->Args());
}
file_mgr->FileEvent(file_gap, this);
if ( FileEventAvailable(file_gap) )
{
val_list* vl = new val_list();
vl->append(val->Ref());
vl->append(new Val(offset, TYPE_COUNT));
vl->append(new Val(len, TYPE_COUNT));
FileEvent(file_gap, vl);
}
actions.DrainModifications();
analyzers.DrainModifications();
IncrementByteCount(len, missing_bytes_idx);
}
bool File::FileEventAvailable(EventHandlerPtr h)
{
return h && ! file_mgr->IsIgnored(unique);
}
void File::FileEvent(EventHandlerPtr h)
{
if ( ! FileEventAvailable(h) ) return;
val_list* vl = new val_list();
vl->append(val->Ref());
FileEvent(h, vl);
}
void File::FileEvent(EventHandlerPtr h, val_list* vl)
{
mgr.QueueEvent(h, vl);
if ( h == file_new || h == file_timeout )
{
// immediate feedback is required for these events.
mgr.Drain();
analyzers.DrainModifications();
}
}

View file

@ -8,7 +8,7 @@
#include "AnalyzerTags.h"
#include "Conn.h"
#include "Val.h"
#include "ActionSet.h"
#include "AnalyzerSet.h"
#include "FileID.h"
#include "BroString.h"
@ -34,6 +34,11 @@ public:
*/
double GetTimeoutInterval() const;
/**
* Set the "timeout_interval" field from #val record to \a interval seconds.
*/
void SetTimeoutInterval(double interval);
/**
* @return value of the "id" field from #val record.
*/
@ -74,45 +79,61 @@ public:
void ScheduleInactivityTimer() const;
/**
* Queues attaching an action. Only one action per type can be attached at
* a time unless the arguments differ.
* @return false if action can't be instantiated, else true.
* Queues attaching an analyzer. Only one analyzer per type can be attached
* at a time unless the arguments differ.
* @return false if analyzer can't be instantiated, else true.
*/
bool AddAction(RecordVal* args);
bool AddAnalyzer(RecordVal* args);
/**
* Queues removal of an action.
* @return true if action was active at time of call, else false.
* Queues removal of an analyzer.
* @return true if analyzer was active at time of call, else false.
*/
bool RemoveAction(const RecordVal* args);
bool RemoveAnalyzer(const RecordVal* args);
/**
* Pass in non-sequential data and deliver to attached actions/analyzers.
* Pass in non-sequential data and deliver to attached analyzers.
*/
void DataIn(const u_char* data, uint64 len, uint64 offset);
/**
* Pass in sequential data and deliver to attached actions/analyzers.
* Pass in sequential data and deliver to attached analyzers.
*/
void DataIn(const u_char* data, uint64 len);
/**
* Inform attached actions/analyzers about end of file being seen.
* Inform attached analyzers about end of file being seen.
*/
void EndOfFile();
/**
* Inform attached actions/analyzers about a gap in file stream.
* Inform attached analyzers about a gap in file stream.
*/
void Gap(uint64 offset, uint64 len);
/**
* @return true if event has a handler and the file isn't ignored.
*/
bool FileEventAvailable(EventHandlerPtr h);
/**
* Raises an event related to the file's life-cycle, the only parameter
* to that event is the \c fa_file record..
*/
void FileEvent(EventHandlerPtr h);
/**
* Raises an event related to the file's life-cycle.
*/
void FileEvent(EventHandlerPtr h, val_list* vl);
protected:
/**
* Constructor; only file_analysis::Manager should be creating these.
*/
File(const string& unique, Connection* conn = 0,
AnalyzerTag::Tag tag = AnalyzerTag::Error);
AnalyzerTag::Tag tag = AnalyzerTag::Error, bool is_orig = false);
/**
* Updates the "conn_ids" and "conn_uids" fields in #val record with the
@ -149,11 +170,11 @@ protected:
void ReplayBOF();
/**
* Does file/mime type detection and assigns types (if available) to
* corresponding fields in #val.
* @return whether a file or mime type was available.
* Does mime type detection and assigns type (if available) to \c mime_type
* field in #val.
* @return whether mime type was available.
*/
bool DetectTypes(const u_char* data, uint64 len);
bool DetectMIME(const u_char* data, uint64 len);
FileID id; /**< A pretty hash that likely identifies file */
string unique; /**< A string that uniquely identifies file */
@ -163,7 +184,7 @@ protected:
bool missed_bof; /**< Flags that we missed start of file. */
bool need_reassembly; /**< Whether file stream reassembly is needed. */
bool done; /**< If this object is about to be deleted. */
ActionSet actions;
AnalyzerSet analyzers;
struct BOF_Buffer {
BOF_Buffer() : full(false), replayed(false), size(0) {}
@ -186,7 +207,6 @@ protected:
*/
static void StaticInit();
static magic_t magic;
static magic_t magic_mime;
static string salt;
@ -194,6 +214,7 @@ protected:
static int id_idx;
static int parent_id_idx;
static int source_idx;
static int is_orig_idx;
static int conns_idx;
static int last_active_idx;
static int seen_bytes_idx;
@ -203,7 +224,6 @@ protected:
static int timeout_interval_idx;
static int bof_buffer_size_idx;
static int bof_buffer_idx;
static int file_type_idx;
static int mime_type_idx;
};

View file

@ -7,7 +7,7 @@
using namespace file_analysis;
Hash::Hash(RecordVal* args, File* file, HashVal* hv, const char* arg_kind)
: Action(args, file), hash(hv), fed(false), kind(arg_kind)
: file_analysis::Analyzer(args, file), hash(hv), fed(false), kind(arg_kind)
{
hash->Init();
}

View file

@ -6,14 +6,14 @@
#include "Val.h"
#include "OpaqueVal.h"
#include "File.h"
#include "Action.h"
#include "Analyzer.h"
namespace file_analysis {
/**
* An action to produce a hash of file contents.
* An analyzer to produce a hash of file contents.
*/
class Hash : public Action {
class Hash : public file_analysis::Analyzer {
public:
virtual ~Hash();
@ -38,7 +38,7 @@ protected:
class MD5 : public Hash {
public:
static Action* Instantiate(RecordVal* args, File* file)
static file_analysis::Analyzer* Instantiate(RecordVal* args, File* file)
{ return file_hash ? new MD5(args, file) : 0; }
protected:
@ -51,7 +51,7 @@ protected:
class SHA1 : public Hash {
public:
static Action* Instantiate(RecordVal* args, File* file)
static file_analysis::Analyzer* Instantiate(RecordVal* args, File* file)
{ return file_hash ? new SHA1(args, file) : 0; }
protected:
@ -64,7 +64,7 @@ protected:
class SHA256 : public Hash {
public:
static Action* Instantiate(RecordVal* args, File* file)
static file_analysis::Analyzer* Instantiate(RecordVal* args, File* file)
{ return file_hash ? new SHA256(args, file) : 0; }
protected:

View file

@ -3,7 +3,7 @@
#include "Manager.h"
#include "File.h"
#include "Action.h"
#include "Analyzer.h"
#include "Var.h"
#include "Event.h"
@ -40,7 +40,7 @@ void Manager::DataIn(const u_char* data, uint64 len, uint64 offset,
if ( IsDisabled(tag) ) return;
GetFileHandle(tag, conn, is_orig);
DataIn(data, len, offset, GetFile(current_handle, conn, tag));
DataIn(data, len, offset, GetFile(current_handle, conn, tag, is_orig));
}
void Manager::DataIn(const u_char* data, uint64 len, uint64 offset,
@ -65,7 +65,9 @@ void Manager::DataIn(const u_char* data, uint64 len, AnalyzerTag::Tag tag,
{
if ( IsDisabled(tag) ) return;
GetFileHandle(tag, conn, is_orig);
DataIn(data, len, GetFile(current_handle, conn, tag));
// Sequential data input shouldn't be going over multiple conns, so don't
// do the check to update connection set.
DataIn(data, len, GetFile(current_handle, conn, tag, is_orig, false));
}
void Manager::DataIn(const u_char* data, uint64 len, const string& unique)
@ -108,7 +110,7 @@ void Manager::Gap(uint64 offset, uint64 len, AnalyzerTag::Tag tag,
if ( IsDisabled(tag) ) return;
GetFileHandle(tag, conn, is_orig);
Gap(offset, len, GetFile(current_handle, conn, tag));
Gap(offset, len, GetFile(current_handle, conn, tag, is_orig));
}
void Manager::Gap(uint64 offset, uint64 len, const string& unique)
@ -129,7 +131,7 @@ void Manager::SetSize(uint64 size, AnalyzerTag::Tag tag, Connection* conn,
if ( IsDisabled(tag) ) return;
GetFileHandle(tag, conn, is_orig);
SetSize(size, GetFile(current_handle, conn, tag));
SetSize(size, GetFile(current_handle, conn, tag, is_orig));
}
void Manager::SetSize(uint64 size, const string& unique)
@ -147,17 +149,6 @@ void Manager::SetSize(uint64 size, File* file)
RemoveFile(file->GetUnique());
}
void Manager::FileEvent(EventHandlerPtr h, File* file)
{
if ( ! h ) return;
if ( IsIgnored(file->GetUnique()) ) return;
val_list * vl = new val_list();
vl->append(file->GetVal()->Ref());
mgr.QueueEvent(h, vl);
}
bool Manager::PostponeTimeout(const FileID& file_id) const
{
File* file = Lookup(file_id);
@ -168,26 +159,36 @@ bool Manager::PostponeTimeout(const FileID& file_id) const
return true;
}
bool Manager::AddAction(const FileID& file_id, RecordVal* args) const
bool Manager::SetTimeoutInterval(const FileID& file_id, double interval) const
{
File* file = Lookup(file_id);
if ( ! file ) return false;
return file->AddAction(args);
file->SetTimeoutInterval(interval);
return true;
}
bool Manager::RemoveAction(const FileID& file_id, const RecordVal* args) const
bool Manager::AddAnalyzer(const FileID& file_id, RecordVal* args) const
{
File* file = Lookup(file_id);
if ( ! file ) return false;
return file->RemoveAction(args);
return file->AddAnalyzer(args);
}
bool Manager::RemoveAnalyzer(const FileID& file_id, const RecordVal* args) const
{
File* file = Lookup(file_id);
if ( ! file ) return false;
return file->RemoveAnalyzer(args);
}
File* Manager::GetFile(const string& unique, Connection* conn,
AnalyzerTag::Tag tag)
AnalyzerTag::Tag tag, bool is_orig, bool update_conn)
{
if ( unique.empty() ) return 0;
if ( IsIgnored(unique) ) return 0;
@ -196,7 +197,7 @@ File* Manager::GetFile(const string& unique, Connection* conn,
if ( ! rval )
{
rval = str_map[unique] = new File(unique, conn, tag);
rval = str_map[unique] = new File(unique, conn, tag, is_orig);
FileID id = rval->GetID();
if ( id_map[id] )
@ -212,7 +213,8 @@ File* Manager::GetFile(const string& unique, Connection* conn,
else
{
rval->UpdateLastActivityTime();
rval->UpdateConnectionFields(conn);
if ( update_conn )
rval->UpdateConnectionFields(conn);
}
return rval;
@ -235,8 +237,7 @@ void Manager::Timeout(const FileID& file_id, bool is_terminating)
file->postpone_timeout = false;
FileEvent(file_timeout, file);
mgr.Drain(); // need immediate feedback about whether to postpone
file->FileEvent(file_timeout);
if ( file->postpone_timeout && ! is_terminating )
{

View file

@ -97,23 +97,28 @@ public:
bool PostponeTimeout(const FileID& file_id) const;
/**
* Queue attachment of an action to the file identifier. Multiple actions
* of a given type can be attached per file identifier at a time as long as
* the arguments differ.
* @return false if the action failed to be instantiated, else true.
* Set's an inactivity threshold for the file.
*/
bool AddAction(const FileID& file_id, RecordVal* args) const;
bool SetTimeoutInterval(const FileID& file_id, double interval) const;
/**
* Queue removal of an action for a given file identifier.
* @return true if the action is active at the time of call, else false.
* Queue attachment of an analzer to the file identifier. Multiple
* analyzers of a given type can be attached per file identifier at a time
* as long as the arguments differ.
* @return false if the analyzer failed to be instantiated, else true.
*/
bool RemoveAction(const FileID& file_id, const RecordVal* args) const;
bool AddAnalyzer(const FileID& file_id, RecordVal* args) const;
/**
* Queues an event related to the file's life-cycle.
* Queue removal of an analyzer for a given file identifier.
* @return true if the analyzer is active at the time of call, else false.
*/
void FileEvent(EventHandlerPtr h, File* file);
bool RemoveAnalyzer(const FileID& file_id, const RecordVal* args) const;
/**
* @return whether the file mapped to \a unique is being ignored.
*/
bool IsIgnored(const string& unique);
protected:
@ -129,7 +134,8 @@ protected:
* fields.
*/
File* GetFile(const string& unique, Connection* conn = 0,
AnalyzerTag::Tag tag = AnalyzerTag::Error);
AnalyzerTag::Tag tag = AnalyzerTag::Error,
bool is_orig = false, bool update_conn = true);
/**
* @return the File object mapped to \a file_id, or a null pointer if no
@ -149,11 +155,6 @@ protected:
*/
bool RemoveFile(const string& unique);
/**
* @return whether the file mapped to \a unique is being ignored.
*/
bool IsIgnored(const string& unique);
/**
* Sets #current_handle to a unique file handle string based on what the
* \c get_file_handle event derives from the connection params. The