Move file analyzers to new plugin infrastructure.

This commit is contained in:
Jon Siwek 2013-06-10 15:50:18 -05:00
parent f2574636b6
commit 7c7b6214a6
41 changed files with 559 additions and 172 deletions

View file

@ -5,10 +5,13 @@
#include "Val.h"
#include "NetVar.h"
#include "analyzer/Tag.h"
#include "file_analysis/file_analysis.bif.h"
namespace file_analysis {
typedef BifEnum::FileAnalysis::Analyzer FA_Tag;
typedef int FA_Tag;
class File;
@ -94,8 +97,7 @@ public:
static FA_Tag ArgsTag(const RecordVal* args)
{
using BifType::Record::FileAnalysis::AnalyzerArgs;
return static_cast<FA_Tag>(
args->Lookup(AnalyzerArgs->FieldOffset("tag"))->AsEnum());
return args->Lookup(AnalyzerArgs->FieldOffset("tag"))->AsEnum();
}
protected:
@ -119,9 +121,6 @@ private:
File* file; /**< The file to which the analyzer is attached. */
};
typedef file_analysis::Analyzer* (*AnalyzerInstantiator)(RecordVal* args,
File* file);
} // namespace file_analysis
#endif

View file

@ -3,21 +3,10 @@
#include "AnalyzerSet.h"
#include "File.h"
#include "Analyzer.h"
#include "Extract.h"
#include "DataEvent.h"
#include "Hash.h"
#include "Manager.h"
using namespace file_analysis;
// keep in order w/ declared enum values in file_analysis.bif
static AnalyzerInstantiator analyzer_factory[] = {
file_analysis::Extract::Instantiate,
file_analysis::MD5::Instantiate,
file_analysis::SHA1::Instantiate,
file_analysis::SHA256::Instantiate,
file_analysis::DataEvent::Instantiate,
};
static void analyzer_del_func(void* v)
{
delete (file_analysis::Analyzer*) v;
@ -154,14 +143,13 @@ HashKey* AnalyzerSet::GetKey(const RecordVal* args) const
file_analysis::Analyzer* AnalyzerSet::InstantiateAnalyzer(RecordVal* args) const
{
file_analysis::Analyzer* a =
analyzer_factory[file_analysis::Analyzer::ArgsTag(args)](args, file);
FA_Tag tag = file_analysis::Analyzer::ArgsTag(args);
file_analysis::Analyzer* a = file_mgr->InstantiateAnalyzer(tag, args, file);
if ( ! a )
{
DBG_LOG(DBG_FILE_ANALYSIS, "Instantiate analyzer %d failed for file id",
" %s", file_analysis::Analyzer::ArgsTag(args),
file->GetID().c_str());
reporter->Error("Failed file analyzer %s instantiation for file id %s",
file_mgr->GetAnalyzerName(tag), file->GetID().c_str());
return 0;
}

View file

@ -0,0 +1,22 @@
include(BroSubdir)
include_directories(BEFORE
${CMAKE_CURRENT_SOURCE_DIR}
${CMAKE_CURRENT_BINARY_DIR}
)
add_subdirectory(analyzer)
set(file_analysis_SRCS
Manager.cc
File.cc
FileTimer.cc
Analyzer.h
AnalyzerSet.cc
Component.cc
)
bif_target(file_analysis.bif)
bro_add_subdir_library(file_analysis ${file_analysis_SRCS} ${BIF_OUTPUT_CC})
add_dependencies(bro_file_analysis generate_outputs)

View file

@ -0,0 +1,70 @@
// See the file "COPYING" in the main distribution directory for copyright.
#include "Component.h"
#include "Manager.h"
#include "../Desc.h"
#include "../util.h"
using namespace file_analysis;
analyzer::Tag::type_t Component::type_counter = 0;
Component::Component(const char* arg_name, factory_callback arg_factory,
analyzer::Tag::subtype_t arg_subtype)
: plugin::Component(plugin::component::FILE_ANALYZER)
{
name = copy_string(arg_name);
canon_name = canonify_name(arg_name);
factory = arg_factory;
tag = analyzer::Tag(++type_counter, arg_subtype);
}
Component::Component(const Component& other)
: plugin::Component(Type())
{
name = copy_string(other.name);
canon_name = copy_string(other.canon_name);
factory = other.factory;
tag = other.tag;
}
Component::~Component()
{
delete [] name;
delete [] canon_name;
}
analyzer::Tag Component::Tag() const
{
return tag;
}
void Component::Describe(ODesc* d)
{
plugin::Component::Describe(d);
d->Add(name);
d->Add(" (");
if ( factory )
{
d->Add("ANALYZER_");
d->Add(canon_name);
d->Add(", ");
}
d->Add(")");
}
Component& Component::operator=(const Component& other)
{
if ( &other != this )
{
name = copy_string(other.name);
factory = other.factory;
tag = other.tag;
}
return *this;
}

View file

@ -0,0 +1,109 @@
// See the file "COPYING" in the main distribution directory for copyright.
#ifndef FILE_ANALYZER_PLUGIN_COMPONENT_H
#define FILE_ANALYZER_PLUGIN_COMPONENT_H
#include "analyzer/Tag.h"
#include "plugin/Component.h"
#include "Val.h"
#include "../config.h"
#include "../util.h"
namespace file_analysis {
class File;
class Analyzer;
/**
* Component description for plugins providing file analyzers.
*
* A plugin can provide a specific protocol analyzer by registering this
* analyzer component, describing the analyzer.
*/
class Component : public plugin::Component {
public:
typedef Analyzer* (*factory_callback)(RecordVal* args, File* file);
/**
* Constructor.
*
* @param name The name of the provided analyzer. This name is used
* across the system to identify the analyzer, e.g., when calling
* file_analysis::Manager::InstantiateAnalyzer with a name.
*
* @param factory A factory function to instantiate instances of the
* analyzer's class, which must be derived directly or indirectly
* from file_analysis::Analyzer. This is typically a static \c
* Instatiate() method inside the class that just allocates and
* returns a new instance.
*
* @param subtype A subtype associated with this component that
* further distinguishes it. The subtype will be integrated into
* the analyzer::Tag that the manager associates with this analyzer,
* and analyzer instances can accordingly access it via analyzer::Tag().
* If not used, leave at zero.
*/
Component(const char* name, factory_callback factory,
analyzer::Tag::subtype_t subtype = 0);
/**
* Copy constructor.
*/
Component(const Component& other);
/**
* Destructor.
*/
~Component();
/**
* Returns the name of the analyzer. This name is unique across all
* analyzers and used to identify it. The returned name is derived
* from what's passed to the constructor but upper-cased and
* canonified to allow being part of a script-level ID.
*/
const char* Name() const { return name; }
/**
* Returns a canonocalized version of the analyzer's name. The
* returned name is derived from what's passed to the constructor but
* upper-cased and transformed to allow being part of a script-level
* ID.
*/
const char* CanonicalName() const { return canon_name; }
/**
* Returns the analyzer's factory function.
*/
factory_callback Factory() const { return factory; }
/**
* Returns the analyzer's tag. Note that this is automatically
* generated for each new Components, and hence unique across all of
* them.
*/
analyzer::Tag Tag() const;
/**
* Generates a human-readable description of the component's main
* parameters. This goes into the output of \c "bro -NN".
*/
virtual void Describe(ODesc* d);
Component& operator=(const Component& other);
private:
const char* name; // The analyzer's name.
const char* canon_name; // The analyzer's canonical name.
factory_callback factory; // The analyzer's factory callback.
analyzer::Tag tag; // The automatically assigned analyzer tag.
// Global counter used to generate unique tags.
static analyzer::Tag::type_t type_counter;
};
}
#endif

View file

@ -10,12 +10,18 @@
#include "Var.h"
#include "Event.h"
#include "plugin/Manager.h"
using namespace file_analysis;
TableVal* Manager::disabled = 0;
string Manager::salt;
Manager::Manager()
{
tag_enum_type = new EnumType("FileAnalysis::Tag");
::ID* id = install_ID("Tag", "FileAnalysis", true, true);
add_type(id, tag_enum_type, 0, 0);
}
Manager::~Manager()
@ -23,6 +29,40 @@ Manager::~Manager()
Terminate();
}
void Manager::InitPreScript()
{
std::list<Component*> analyzers = plugin_mgr->Components<Component>();
for ( std::list<Component*>::const_iterator i = analyzers.begin();
i != analyzers.end(); ++i )
RegisterAnalyzerComponent(*i);
}
void Manager::RegisterAnalyzerComponent(Component* component)
{
const char* cname = component->CanonicalName();
if ( tag_enum_type->Lookup("FileAnalysis", cname) != -1 )
reporter->FatalError("File Analyzer %s defined more than once", cname);
DBG_LOG(DBG_FILE_ANALYSIS, "Registering analyzer %s (tag %s)",
component->Name(), component->Tag().AsString().c_str());
analyzers_by_name.insert(std::make_pair(cname, component));
analyzers_by_tag.insert(std::make_pair(component->Tag(), component));
analyzers_by_val.insert(std::make_pair(
component->Tag().AsEnumVal()->InternalInt(), component));
string id = fmt("ANALYZER_%s", cname);
tag_enum_type->AddName("FileAnalysis", id.c_str(),
component->Tag().AsEnumVal()->InternalInt(), true);
}
void Manager::InitPostScript()
{
#include "file_analysis.bif.init.cc"
}
void Manager::Terminate()
{
vector<string> keys;
@ -35,8 +75,6 @@ void Manager::Terminate()
string Manager::HashHandle(const string& handle) const
{
static string salt;
if ( salt.empty() )
salt = BifConst::FileAnalysis::salt->CheckString();
@ -327,3 +365,31 @@ bool Manager::IsDisabled(analyzer::Tag tag)
return rval;
}
Analyzer* Manager::InstantiateAnalyzer(int tag, RecordVal* args, File* f) const
{
analyzer_map_by_val::const_iterator it = analyzers_by_val.find(tag);
if ( it == analyzers_by_val.end() )
reporter->InternalError("cannot instantiate unknown file analyzer: %d",
tag);
Component* c = it->second;
if ( ! c->Factory() )
reporter->InternalError("file analyzer %s cannot be instantiated "
"dynamically", c->CanonicalName());
return c->Factory()(args, f);
}
const char* Manager::GetAnalyzerName(int tag) const
{
analyzer_map_by_val::const_iterator it = analyzers_by_val.find(tag);
if ( it == analyzers_by_val.end() )
reporter->InternalError("cannot get name of unknown file analyzer: %d",
tag);
return it->second->CanonicalName();
}

View file

@ -17,9 +17,12 @@
#include "File.h"
#include "FileTimer.h"
#include "Component.h"
#include "analyzer/Tag.h"
#include "file_analysis/file_analysis.bif.h"
namespace file_analysis {
/**
@ -38,6 +41,18 @@ public:
*/
~Manager();
/**
* First-stage initializion of the manager. This is called early on
* during Bro's initialization, before any scripts are processed.
*/
void InitPreScript();
/**
* Second-stage initialization of the manager. This is called late
* during Bro's initialization after any scripts are processed.
*/
void InitPostScript();
/**
* Times out any active file analysis to prepare for shutdown.
*/
@ -182,6 +197,23 @@ public:
*/
bool IsIgnored(const string& file_id);
/**
* Instantiates a new file analyzer instance for the file.
* @param tag The file analyzer's tag.
* @param args The file analzer argument/option values.
* @param f The file analzer is to be associated with.
* @return The new analyzer instance or null if tag is invalid.
*/
Analyzer* InstantiateAnalyzer(int tag, RecordVal* args, File* f) const;
/**
* Translates a script-level file analyzer tag in to corresponding file
* analyzer name.
* @param tag The enum val of a file analyzer.
* @return The human-readable name of the file analyzer.
*/
const char* GetAnalyzerName(int tag) const;
protected:
friend class FileTimer;
@ -255,11 +287,23 @@ protected:
static bool IsDisabled(analyzer::Tag tag);
private:
typedef map<string, Component*> analyzer_map_by_name;
typedef map<analyzer::Tag, Component*> analyzer_map_by_tag;
typedef map<int, Component*> analyzer_map_by_val;
void RegisterAnalyzerComponent(Component* component);
IDMap id_map; /**< Map file ID to file_analysis::File records. */
IDSet ignored; /**< Ignored files. Will be finally removed on EOF. */
string current_file_id; /**< Hash of what get_file_handle event sets.*/
string current_file_id; /**< Hash of what get_file_handle event sets. */
EnumType* tag_enum_type; /**< File analyzer tag type. */
analyzer_map_by_name analyzers_by_name;
analyzer_map_by_tag analyzers_by_tag;
analyzer_map_by_val analyzers_by_val;
static TableVal* disabled; /**< Table of disabled analyzers. */
static string salt; /**< A salt added to file handles before hashing. */
};
} // namespace file_analysis

View file

@ -0,0 +1,3 @@
add_subdirectory(data_event)
add_subdirectory(extract)
add_subdirectory(hash)

View file

@ -0,0 +1,8 @@
include(BroPlugin)
include_directories(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}
${CMAKE_CURRENT_BINARY_DIR})
bro_plugin_begin(Bro FileDataEvent)
bro_plugin_cc(DataEvent.cc Plugin.cc)
bro_plugin_end()

View file

@ -0,0 +1,26 @@
#include "plugin/Plugin.h"
#include "file_analysis/Component.h"
#include "DataEvent.h"
namespace plugin { namespace Bro_FileDataEvent {
class Plugin : public plugin::Plugin {
protected:
void InitPreScript()
{
SetName("Bro::FileDataEvent");
SetVersion(-1);
SetAPIVersion(BRO_PLUGIN_API_VERSION);
SetDynamicPlugin(false);
SetDescription("Delivers file content via events");
AddComponent(new ::file_analysis::Component("DATA_EVENT",
::file_analysis::DataEvent::Instantiate));
}
};
Plugin __plugin;
} }

View file

@ -0,0 +1,8 @@
include(BroPlugin)
include_directories(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}
${CMAKE_CURRENT_BINARY_DIR})
bro_plugin_begin(Bro FileExtract)
bro_plugin_cc(Extract.cc Plugin.cc)
bro_plugin_end()

View file

@ -0,0 +1,26 @@
#include "plugin/Plugin.h"
#include "file_analysis/Component.h"
#include "Extract.h"
namespace plugin { namespace Bro_FileExtract {
class Plugin : public plugin::Plugin {
protected:
void InitPreScript()
{
SetName("Bro::FileExtract");
SetVersion(-1);
SetAPIVersion(BRO_PLUGIN_API_VERSION);
SetDynamicPlugin(false);
SetDescription("Extract file content to local file system");
AddComponent(new ::file_analysis::Component("EXTRACT",
::file_analysis::Extract::Instantiate));
}
};
Plugin __plugin;
} }

View file

@ -0,0 +1,9 @@
include(BroPlugin)
include_directories(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}
${CMAKE_CURRENT_BINARY_DIR})
bro_plugin_begin(Bro FileHash)
bro_plugin_cc(Hash.cc Plugin.cc)
bro_plugin_bif(events.bif)
bro_plugin_end()

View file

@ -10,6 +10,8 @@
#include "File.h"
#include "Analyzer.h"
#include "events.bif.h"
namespace file_analysis {
/**

View file

@ -0,0 +1,33 @@
#include "plugin/Plugin.h"
#include "file_analysis/Component.h"
#include "Hash.h"
namespace plugin { namespace Bro_FileHash {
class Plugin : public plugin::Plugin {
protected:
void InitPreScript()
{
SetName("Bro::FileHash");
SetVersion(-1);
SetAPIVersion(BRO_PLUGIN_API_VERSION);
SetDynamicPlugin(false);
SetDescription("Hash file content");
AddComponent(new ::file_analysis::Component("MD5",
::file_analysis::MD5::Instantiate));
AddComponent(new ::file_analysis::Component("SHA1",
::file_analysis::SHA1::Instantiate));
AddComponent(new ::file_analysis::Component("SHA256",
::file_analysis::SHA256::Instantiate));
extern std::list<std::pair<const char*, int> > __bif_events_init();
AddBifInitFunction(&__bif_events_init);
}
};
Plugin __plugin;
} }

View file

@ -0,0 +1,12 @@
## This event is generated each time file analysis generates a digest of the
## file contents.
##
## f: The file.
##
## kind: The type of digest algorithm.
##
## hash: The result of the hashing.
##
## .. bro:see:: FileAnalysis::add_analyzer FileAnalysis::ANALYZER_MD5
## FileAnalysis::ANALYZER_SHA1 FileAnalysis::ANALYZER_SHA256
event file_hash%(f: fa_file, kind: string, hash: string%);

View file

@ -0,0 +1,61 @@
##! Internal functions and types used by the file analysis framework.
module FileAnalysis;
%%{
#include "file_analysis/Manager.h"
%%}
type AnalyzerArgs: record;
## :bro:see:`FileAnalysis::set_timeout_interval`.
function FileAnalysis::__set_timeout_interval%(file_id: string, t: interval%): bool
%{
bool result = file_mgr->SetTimeoutInterval(file_id->CheckString(), t);
return new Val(result, TYPE_BOOL);
%}
## :bro:see:`FileAnalysis::add_analyzer`.
function FileAnalysis::__add_analyzer%(file_id: string, args: any%): bool
%{
using BifType::Record::FileAnalysis::AnalyzerArgs;
RecordVal* rv = args->AsRecordVal()->CoerceTo(AnalyzerArgs);
bool result = file_mgr->AddAnalyzer(file_id->CheckString(), rv);
Unref(rv);
return new Val(result, TYPE_BOOL);
%}
## :bro:see:`FileAnalysis::remove_analyzer`.
function FileAnalysis::__remove_analyzer%(file_id: string, args: any%): bool
%{
using BifType::Record::FileAnalysis::AnalyzerArgs;
RecordVal* rv = args->AsRecordVal()->CoerceTo(AnalyzerArgs);
bool result = file_mgr->RemoveAnalyzer(file_id->CheckString(), rv);
Unref(rv);
return new Val(result, TYPE_BOOL);
%}
## :bro:see:`FileAnalysis::stop`.
function FileAnalysis::__stop%(file_id: string%): bool
%{
bool result = file_mgr->IgnoreFile(file_id->CheckString());
return new Val(result, TYPE_BOOL);
%}
module GLOBAL;
## For use within a :bro:see:`get_file_handle` handler to set a unique
## identifier to associate with the current input to the file analysis
## framework. Using an empty string for the handle signifies that the
## input will be ignored/discarded.
##
## handle: A string that uniquely identifies a file.
##
## .. bro:see:: get_file_handle
function set_file_handle%(handle: string%): any
%{
file_mgr->SetHandle(handle->CheckString());
return 0;
%}
const FileAnalysis::salt: string;