diff --git a/doc/scripts/DocSourcesList.cmake b/doc/scripts/DocSourcesList.cmake index 0b077c2c50..fdd919f86b 100644 --- a/doc/scripts/DocSourcesList.cmake +++ b/doc/scripts/DocSourcesList.cmake @@ -34,6 +34,7 @@ rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/plugins/Bro_DNS.events.bif.bro) rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/plugins/Bro_FTP.events.bif.bro) rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/plugins/Bro_FTP.functions.bif.bro) rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/plugins/Bro_File.events.bif.bro) +rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/plugins/Bro_FileHash.events.bif.bro) rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/plugins/Bro_Finger.events.bif.bro) rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/plugins/Bro_GTPv1.events.bif.bro) rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/plugins/Bro_Gnutella.events.bif.bro) diff --git a/scripts/base/frameworks/file-analysis/main.bro b/scripts/base/frameworks/file-analysis/main.bro index 15a9d01b0a..3352787cba 100644 --- a/scripts/base/frameworks/file-analysis/main.bro +++ b/scripts/base/frameworks/file-analysis/main.bro @@ -15,7 +15,7 @@ export { ## A structure which represents a desired type of file analysis. type AnalyzerArgs: record { ## The type of analysis. - tag: Analyzer; + tag: FileAnalysis::Tag; ## The local filename to which to write an extracted file. Must be ## set when *tag* is :bro:see:`FileAnalysis::ANALYZER_EXTRACT`. @@ -89,7 +89,7 @@ export { conn_uids: set[string] &log; ## A set of analysis types done during the file analysis. - analyzers: set[Analyzer]; + analyzers: set[FileAnalysis::Tag]; ## Local filenames of extracted files. extracted_files: set[string] &log; diff --git a/scripts/base/init-bare.bro b/scripts/base/init-bare.bro index 2110110a40..b7cafa70c7 100644 --- a/scripts/base/init-bare.bro +++ b/scripts/base/init-bare.bro @@ -222,17 +222,6 @@ type endpoint_stats: record { endian_type: count; }; -## A unique analyzer instance ID. Each time instantiates a protocol analyzers -## for a connection, it assigns it a unique ID that can be used to reference -## that instance. -## -## .. bro:see:: Analyzer::name Analyzer::disable_analyzer protocol_confirmation -## protocol_violation -## -## .. todo::While we declare an alias for the type here, the events/functions still -## use ``count``. That should be changed. -type AnalyzerID: count; - module Tunnel; export { ## Records the identity of an encapsulating parent of a tunneled connection. @@ -3065,12 +3054,12 @@ module GLOBAL; ## Number of bytes per packet to capture from live interfaces. const snaplen = 8192 &redef; +# Load BiFs defined by plugins. +@load base/bif/plugins + # Load these frameworks here because they use fairly deep integration with # BiFs and script-land defined types. @load base/frameworks/logging @load base/frameworks/input @load base/frameworks/analyzer @load base/frameworks/file-analysis - -# Load BiFs defined by plugins. -@load base/bif/plugins diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 2bde8d65a5..a8ec20293b 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -114,7 +114,6 @@ set(BIF_SRCS logging.bif input.bif event.bif - file_analysis.bif const.bif types.bif strings.bif @@ -150,6 +149,7 @@ set(bro_SUBDIR_LIBS CACHE INTERNAL "subdir libraries" FORCE) set(bro_PLUGIN_LIBS CACHE INTERNAL "plugin libraries" FORCE) add_subdirectory(analyzer) +add_subdirectory(file_analysis) set(bro_SUBDIRS ${bro_SUBDIR_LIBS} @@ -355,20 +355,12 @@ set(bro_SRCS input/readers/Binary.cc input/readers/SQLite.cc - file_analysis/Manager.cc - file_analysis/File.cc - file_analysis/FileTimer.cc - file_analysis/Analyzer.h - file_analysis/AnalyzerSet.cc - file_analysis/Extract.cc - file_analysis/Hash.cc - file_analysis/DataEvent.cc - 3rdparty/sqlite3.c plugin/Component.cc plugin/Manager.cc plugin/Plugin.cc + plugin/Macros.h nb_dns.c digest.h diff --git a/src/Func.cc b/src/Func.cc index 97d84013e6..f3718fe231 100644 --- a/src/Func.cc +++ b/src/Func.cc @@ -553,14 +553,12 @@ void builtin_error(const char* msg, BroObj* arg) #include "input.bif.func_h" #include "reporter.bif.func_h" #include "strings.bif.func_h" -#include "file_analysis.bif.func_h" #include "bro.bif.func_def" #include "logging.bif.func_def" #include "input.bif.func_def" #include "reporter.bif.func_def" #include "strings.bif.func_def" -#include "file_analysis.bif.func_def" void init_builtin_funcs() { @@ -575,7 +573,6 @@ void init_builtin_funcs() #include "input.bif.func_init" #include "reporter.bif.func_init" #include "strings.bif.func_init" -#include "file_analysis.bif.func_init" did_builtin_init = true; } diff --git a/src/NetVar.cc b/src/NetVar.cc index 74cd6d08d0..2f50ce528b 100644 --- a/src/NetVar.cc +++ b/src/NetVar.cc @@ -249,7 +249,6 @@ OpaqueType* entropy_type; #include "logging.bif.netvar_def" #include "input.bif.netvar_def" #include "reporter.bif.netvar_def" -#include "file_analysis.bif.netvar_def" void init_event_handlers() { @@ -317,7 +316,6 @@ void init_net_var() #include "logging.bif.netvar_init" #include "input.bif.netvar_init" #include "reporter.bif.netvar_init" -#include "file_analysis.bif.netvar_init" conn_id = internal_type("conn_id")->AsRecordType(); endpoint = internal_type("endpoint")->AsRecordType(); diff --git a/src/NetVar.h b/src/NetVar.h index 0ecceb9f92..ac825e7845 100644 --- a/src/NetVar.h +++ b/src/NetVar.h @@ -260,6 +260,5 @@ extern void init_net_var(); #include "logging.bif.netvar_h" #include "input.bif.netvar_h" #include "reporter.bif.netvar_h" -#include "file_analysis.bif.netvar_h" #endif diff --git a/src/analyzer/Component.cc b/src/analyzer/Component.cc index 5844da848f..2a48d5f160 100644 --- a/src/analyzer/Component.cc +++ b/src/analyzer/Component.cc @@ -4,26 +4,12 @@ #include "Manager.h" #include "../Desc.h" +#include "../util.h" using namespace analyzer; Tag::type_t Component::type_counter = 0; -static const char* canonify_name(const char* name) - { - unsigned int len = strlen(name); - char* nname = new char[len + 1]; - - for ( unsigned int i = 0; i < len; i++ ) - { - char c = isalnum(name[i]) ? name[i] : '_'; - nname[i] = toupper(c); - } - - nname[len] = '\0'; - return nname; - } - Component::Component(const char* arg_name, factory_callback arg_factory, Tag::subtype_t arg_subtype, bool arg_enabled, bool arg_partial) : plugin::Component(plugin::component::ANALYZER) { diff --git a/src/analyzer/Component.h b/src/analyzer/Component.h index b766c2fe82..a520047fdb 100644 --- a/src/analyzer/Component.h +++ b/src/analyzer/Component.h @@ -23,7 +23,6 @@ class Analyzer; */ class Component : public plugin::Component { public: - typedef bool (*available_callback)(); typedef Analyzer* (*factory_callback)(Connection* conn); /** diff --git a/src/analyzer/Tag.h b/src/analyzer/Tag.h index cf33dca41c..edb0ade8a7 100644 --- a/src/analyzer/Tag.h +++ b/src/analyzer/Tag.h @@ -8,6 +8,11 @@ class EnumVal; +namespace file_analysis { +class Manager; +class Component; +} + namespace analyzer { class Manager; @@ -24,7 +29,7 @@ class Component; * subtype form an analyzer "tag". Each unique tag corresponds to a single * "analyzer" from the user's perspective. At the script layer, these tags * are mapped into enums of type \c Analyzer::Tag. Internally, the - * analyzer::Mangager maintains the mapping of tag to analyzer (and it also + * analyzer::Manager maintains the mapping of tag to analyzer (and it also * assigns them their main types), and analyzer::Component creates new * tags. * @@ -121,9 +126,11 @@ public: protected: friend class analyzer::Manager; friend class analyzer::Component; + friend class file_analysis::Manager; + friend class file_analysis::Component; /** - * Constructor. Note + * Constructor. * * @param type The main type. Note that the \a analyzer::Manager * manages the value space internally, so noone else should assign diff --git a/src/const.bif b/src/const.bif index 31e6ccee1a..ea84b3363d 100644 --- a/src/const.bif +++ b/src/const.bif @@ -23,5 +23,3 @@ const Tunnel::delay_gtp_confirmation: bool; const Tunnel::ip_tunnel_timeout: interval; const Threading::heartbeat_interval: interval; - -const FileAnalysis::salt: string; diff --git a/src/event.bif b/src/event.bif index 9d831cf141..6f363cb961 100644 --- a/src/event.bif +++ b/src/event.bif @@ -942,19 +942,6 @@ event file_gap%(f: fa_file, offset: count, len: count%); ## .. bro:see:: file_new file_over_new_connection file_timeout file_gap event file_state_remove%(f: fa_file%); -## This event is generated each time file analysis generates a digest of the -## file contents. -## -## f: The file. -## -## kind: The type of digest algorithm. -## -## hash: The result of the hashing. -## -## .. bro:see:: FileAnalysis::add_analyzer FileAnalysis::ANALYZER_MD5 -## FileAnalysis::ANALYZER_SHA1 FileAnalysis::ANALYZER_SHA256 -event file_hash%(f: fa_file, kind: string, hash: string%); - ## Generated when an internal DNS lookup produces the same result as last time. ## Bro keeps an internal DNS cache for host names and IP addresses it has ## already resolved. This event is generated when a subsequent lookup returns diff --git a/src/file_analysis/Analyzer.h b/src/file_analysis/Analyzer.h index d32532b264..dba022efca 100644 --- a/src/file_analysis/Analyzer.h +++ b/src/file_analysis/Analyzer.h @@ -5,10 +5,13 @@ #include "Val.h" #include "NetVar.h" +#include "analyzer/Tag.h" + +#include "file_analysis/file_analysis.bif.h" namespace file_analysis { -typedef BifEnum::FileAnalysis::Analyzer FA_Tag; +typedef int FA_Tag; class File; @@ -94,8 +97,7 @@ public: static FA_Tag ArgsTag(const RecordVal* args) { using BifType::Record::FileAnalysis::AnalyzerArgs; - return static_cast( - args->Lookup(AnalyzerArgs->FieldOffset("tag"))->AsEnum()); + return args->Lookup(AnalyzerArgs->FieldOffset("tag"))->AsEnum(); } protected: @@ -119,9 +121,6 @@ private: File* file; /**< The file to which the analyzer is attached. */ }; -typedef file_analysis::Analyzer* (*AnalyzerInstantiator)(RecordVal* args, - File* file); - } // namespace file_analysis #endif diff --git a/src/file_analysis/AnalyzerSet.cc b/src/file_analysis/AnalyzerSet.cc index 83c60d9abe..e350e8b0d8 100644 --- a/src/file_analysis/AnalyzerSet.cc +++ b/src/file_analysis/AnalyzerSet.cc @@ -3,21 +3,10 @@ #include "AnalyzerSet.h" #include "File.h" #include "Analyzer.h" -#include "Extract.h" -#include "DataEvent.h" -#include "Hash.h" +#include "Manager.h" using namespace file_analysis; -// keep in order w/ declared enum values in file_analysis.bif -static AnalyzerInstantiator analyzer_factory[] = { - file_analysis::Extract::Instantiate, - file_analysis::MD5::Instantiate, - file_analysis::SHA1::Instantiate, - file_analysis::SHA256::Instantiate, - file_analysis::DataEvent::Instantiate, -}; - static void analyzer_del_func(void* v) { delete (file_analysis::Analyzer*) v; @@ -154,14 +143,13 @@ HashKey* AnalyzerSet::GetKey(const RecordVal* args) const file_analysis::Analyzer* AnalyzerSet::InstantiateAnalyzer(RecordVal* args) const { - file_analysis::Analyzer* a = - analyzer_factory[file_analysis::Analyzer::ArgsTag(args)](args, file); + FA_Tag tag = file_analysis::Analyzer::ArgsTag(args); + file_analysis::Analyzer* a = file_mgr->InstantiateAnalyzer(tag, args, file); if ( ! a ) { - DBG_LOG(DBG_FILE_ANALYSIS, "Instantiate analyzer %d failed for file id", - " %s", file_analysis::Analyzer::ArgsTag(args), - file->GetID().c_str()); + reporter->Error("Failed file analyzer %s instantiation for file id %s", + file_mgr->GetAnalyzerName(tag), file->GetID().c_str()); return 0; } diff --git a/src/file_analysis/CMakeLists.txt b/src/file_analysis/CMakeLists.txt new file mode 100644 index 0000000000..f22c293cc4 --- /dev/null +++ b/src/file_analysis/CMakeLists.txt @@ -0,0 +1,22 @@ +include(BroSubdir) + +include_directories(BEFORE + ${CMAKE_CURRENT_SOURCE_DIR} + ${CMAKE_CURRENT_BINARY_DIR} +) + +add_subdirectory(analyzer) + +set(file_analysis_SRCS + Manager.cc + File.cc + FileTimer.cc + Analyzer.h + AnalyzerSet.cc + Component.cc +) + +bif_target(file_analysis.bif) + +bro_add_subdir_library(file_analysis ${file_analysis_SRCS} ${BIF_OUTPUT_CC}) +add_dependencies(bro_file_analysis generate_outputs) diff --git a/src/file_analysis/Component.cc b/src/file_analysis/Component.cc new file mode 100644 index 0000000000..5b6018c106 --- /dev/null +++ b/src/file_analysis/Component.cc @@ -0,0 +1,70 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +#include "Component.h" +#include "Manager.h" + +#include "../Desc.h" +#include "../util.h" + +using namespace file_analysis; + +analyzer::Tag::type_t Component::type_counter = 0; + +Component::Component(const char* arg_name, factory_callback arg_factory, + analyzer::Tag::subtype_t arg_subtype) + : plugin::Component(plugin::component::FILE_ANALYZER) + { + name = copy_string(arg_name); + canon_name = canonify_name(arg_name); + factory = arg_factory; + + tag = analyzer::Tag(++type_counter, arg_subtype); + } + +Component::Component(const Component& other) + : plugin::Component(Type()) + { + name = copy_string(other.name); + canon_name = copy_string(other.canon_name); + factory = other.factory; + tag = other.tag; + } + +Component::~Component() + { + delete [] name; + delete [] canon_name; + } + +analyzer::Tag Component::Tag() const + { + return tag; + } + +void Component::Describe(ODesc* d) + { + plugin::Component::Describe(d); + d->Add(name); + d->Add(" ("); + + if ( factory ) + { + d->Add("ANALYZER_"); + d->Add(canon_name); + d->Add(", "); + } + + d->Add(")"); + } + +Component& Component::operator=(const Component& other) + { + if ( &other != this ) + { + name = copy_string(other.name); + factory = other.factory; + tag = other.tag; + } + + return *this; + } diff --git a/src/file_analysis/Component.h b/src/file_analysis/Component.h new file mode 100644 index 0000000000..8584b5eb09 --- /dev/null +++ b/src/file_analysis/Component.h @@ -0,0 +1,109 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +#ifndef FILE_ANALYZER_PLUGIN_COMPONENT_H +#define FILE_ANALYZER_PLUGIN_COMPONENT_H + +#include "analyzer/Tag.h" +#include "plugin/Component.h" + +#include "Val.h" + +#include "../config.h" +#include "../util.h" + +namespace file_analysis { + +class File; +class Analyzer; + +/** + * Component description for plugins providing file analyzers. + * + * A plugin can provide a specific protocol analyzer by registering this + * analyzer component, describing the analyzer. + */ +class Component : public plugin::Component { +public: + typedef Analyzer* (*factory_callback)(RecordVal* args, File* file); + + /** + * Constructor. + * + * @param name The name of the provided analyzer. This name is used + * across the system to identify the analyzer, e.g., when calling + * file_analysis::Manager::InstantiateAnalyzer with a name. + * + * @param factory A factory function to instantiate instances of the + * analyzer's class, which must be derived directly or indirectly + * from file_analysis::Analyzer. This is typically a static \c + * Instatiate() method inside the class that just allocates and + * returns a new instance. + * + * @param subtype A subtype associated with this component that + * further distinguishes it. The subtype will be integrated into + * the analyzer::Tag that the manager associates with this analyzer, + * and analyzer instances can accordingly access it via analyzer::Tag(). + * If not used, leave at zero. + */ + Component(const char* name, factory_callback factory, + analyzer::Tag::subtype_t subtype = 0); + + /** + * Copy constructor. + */ + Component(const Component& other); + + /** + * Destructor. + */ + ~Component(); + + /** + * Returns the name of the analyzer. This name is unique across all + * analyzers and used to identify it. The returned name is derived + * from what's passed to the constructor but upper-cased and + * canonified to allow being part of a script-level ID. + */ + const char* Name() const { return name; } + + /** + * Returns a canonocalized version of the analyzer's name. The + * returned name is derived from what's passed to the constructor but + * upper-cased and transformed to allow being part of a script-level + * ID. + */ + const char* CanonicalName() const { return canon_name; } + + /** + * Returns the analyzer's factory function. + */ + factory_callback Factory() const { return factory; } + + /** + * Returns the analyzer's tag. Note that this is automatically + * generated for each new Components, and hence unique across all of + * them. + */ + analyzer::Tag Tag() const; + + /** + * Generates a human-readable description of the component's main + * parameters. This goes into the output of \c "bro -NN". + */ + virtual void Describe(ODesc* d); + + Component& operator=(const Component& other); + +private: + const char* name; // The analyzer's name. + const char* canon_name; // The analyzer's canonical name. + factory_callback factory; // The analyzer's factory callback. + analyzer::Tag tag; // The automatically assigned analyzer tag. + + // Global counter used to generate unique tags. + static analyzer::Tag::type_t type_counter; +}; + +} + +#endif diff --git a/src/file_analysis/Manager.cc b/src/file_analysis/Manager.cc index a458134732..b0ba55d965 100644 --- a/src/file_analysis/Manager.cc +++ b/src/file_analysis/Manager.cc @@ -10,12 +10,18 @@ #include "Var.h" #include "Event.h" +#include "plugin/Manager.h" + using namespace file_analysis; TableVal* Manager::disabled = 0; +string Manager::salt; Manager::Manager() { + tag_enum_type = new EnumType("FileAnalysis::Tag"); + ::ID* id = install_ID("Tag", "FileAnalysis", true, true); + add_type(id, tag_enum_type, 0, 0); } Manager::~Manager() @@ -23,6 +29,40 @@ Manager::~Manager() Terminate(); } +void Manager::InitPreScript() + { + std::list analyzers = plugin_mgr->Components(); + + for ( std::list::const_iterator i = analyzers.begin(); + i != analyzers.end(); ++i ) + RegisterAnalyzerComponent(*i); + } + +void Manager::RegisterAnalyzerComponent(Component* component) + { + const char* cname = component->CanonicalName(); + + if ( tag_enum_type->Lookup("FileAnalysis", cname) != -1 ) + reporter->FatalError("File Analyzer %s defined more than once", cname); + + DBG_LOG(DBG_FILE_ANALYSIS, "Registering analyzer %s (tag %s)", + component->Name(), component->Tag().AsString().c_str()); + + analyzers_by_name.insert(std::make_pair(cname, component)); + analyzers_by_tag.insert(std::make_pair(component->Tag(), component)); + analyzers_by_val.insert(std::make_pair( + component->Tag().AsEnumVal()->InternalInt(), component)); + + string id = fmt("ANALYZER_%s", cname); + tag_enum_type->AddName("FileAnalysis", id.c_str(), + component->Tag().AsEnumVal()->InternalInt(), true); + } + +void Manager::InitPostScript() + { + #include "file_analysis.bif.init.cc" + } + void Manager::Terminate() { vector keys; @@ -35,8 +75,6 @@ void Manager::Terminate() string Manager::HashHandle(const string& handle) const { - static string salt; - if ( salt.empty() ) salt = BifConst::FileAnalysis::salt->CheckString(); @@ -327,3 +365,31 @@ bool Manager::IsDisabled(analyzer::Tag tag) return rval; } + +Analyzer* Manager::InstantiateAnalyzer(int tag, RecordVal* args, File* f) const + { + analyzer_map_by_val::const_iterator it = analyzers_by_val.find(tag); + + if ( it == analyzers_by_val.end() ) + reporter->InternalError("cannot instantiate unknown file analyzer: %d", + tag); + + Component* c = it->second; + + if ( ! c->Factory() ) + reporter->InternalError("file analyzer %s cannot be instantiated " + "dynamically", c->CanonicalName()); + + return c->Factory()(args, f); + } + +const char* Manager::GetAnalyzerName(int tag) const + { + analyzer_map_by_val::const_iterator it = analyzers_by_val.find(tag); + + if ( it == analyzers_by_val.end() ) + reporter->InternalError("cannot get name of unknown file analyzer: %d", + tag); + + return it->second->CanonicalName(); + } diff --git a/src/file_analysis/Manager.h b/src/file_analysis/Manager.h index a96324871f..e56d9e7476 100644 --- a/src/file_analysis/Manager.h +++ b/src/file_analysis/Manager.h @@ -17,9 +17,12 @@ #include "File.h" #include "FileTimer.h" +#include "Component.h" #include "analyzer/Tag.h" +#include "file_analysis/file_analysis.bif.h" + namespace file_analysis { /** @@ -38,6 +41,18 @@ public: */ ~Manager(); + /** + * First-stage initializion of the manager. This is called early on + * during Bro's initialization, before any scripts are processed. + */ + void InitPreScript(); + + /** + * Second-stage initialization of the manager. This is called late + * during Bro's initialization after any scripts are processed. + */ + void InitPostScript(); + /** * Times out any active file analysis to prepare for shutdown. */ @@ -182,6 +197,23 @@ public: */ bool IsIgnored(const string& file_id); + /** + * Instantiates a new file analyzer instance for the file. + * @param tag The file analyzer's tag. + * @param args The file analzer argument/option values. + * @param f The file analzer is to be associated with. + * @return The new analyzer instance or null if tag is invalid. + */ + Analyzer* InstantiateAnalyzer(int tag, RecordVal* args, File* f) const; + + /** + * Translates a script-level file analyzer tag in to corresponding file + * analyzer name. + * @param tag The enum val of a file analyzer. + * @return The human-readable name of the file analyzer. + */ + const char* GetAnalyzerName(int tag) const; + protected: friend class FileTimer; @@ -255,11 +287,23 @@ protected: static bool IsDisabled(analyzer::Tag tag); private: + typedef map analyzer_map_by_name; + typedef map analyzer_map_by_tag; + typedef map analyzer_map_by_val; + + void RegisterAnalyzerComponent(Component* component); + IDMap id_map; /**< Map file ID to file_analysis::File records. */ IDSet ignored; /**< Ignored files. Will be finally removed on EOF. */ - string current_file_id; /**< Hash of what get_file_handle event sets.*/ + string current_file_id; /**< Hash of what get_file_handle event sets. */ + EnumType* tag_enum_type; /**< File analyzer tag type. */ + + analyzer_map_by_name analyzers_by_name; + analyzer_map_by_tag analyzers_by_tag; + analyzer_map_by_val analyzers_by_val; static TableVal* disabled; /**< Table of disabled analyzers. */ + static string salt; /**< A salt added to file handles before hashing. */ }; } // namespace file_analysis diff --git a/src/file_analysis/analyzer/CMakeLists.txt b/src/file_analysis/analyzer/CMakeLists.txt new file mode 100644 index 0000000000..bfafcd2894 --- /dev/null +++ b/src/file_analysis/analyzer/CMakeLists.txt @@ -0,0 +1,3 @@ +add_subdirectory(data_event) +add_subdirectory(extract) +add_subdirectory(hash) diff --git a/src/file_analysis/analyzer/data_event/CMakeLists.txt b/src/file_analysis/analyzer/data_event/CMakeLists.txt new file mode 100644 index 0000000000..81551feda2 --- /dev/null +++ b/src/file_analysis/analyzer/data_event/CMakeLists.txt @@ -0,0 +1,8 @@ +include(BroPlugin) + +include_directories(BEFORE ${CMAKE_CURRENT_SOURCE_DIR} + ${CMAKE_CURRENT_BINARY_DIR}) + +bro_plugin_begin(Bro FileDataEvent) +bro_plugin_cc(DataEvent.cc Plugin.cc) +bro_plugin_end() diff --git a/src/file_analysis/DataEvent.cc b/src/file_analysis/analyzer/data_event/DataEvent.cc similarity index 100% rename from src/file_analysis/DataEvent.cc rename to src/file_analysis/analyzer/data_event/DataEvent.cc diff --git a/src/file_analysis/DataEvent.h b/src/file_analysis/analyzer/data_event/DataEvent.h similarity index 100% rename from src/file_analysis/DataEvent.h rename to src/file_analysis/analyzer/data_event/DataEvent.h diff --git a/src/file_analysis/analyzer/data_event/Plugin.cc b/src/file_analysis/analyzer/data_event/Plugin.cc new file mode 100644 index 0000000000..7eb637f3a5 --- /dev/null +++ b/src/file_analysis/analyzer/data_event/Plugin.cc @@ -0,0 +1,26 @@ +#include "plugin/Plugin.h" +#include "file_analysis/Component.h" + +#include "DataEvent.h" + +namespace plugin { namespace Bro_FileDataEvent { + +class Plugin : public plugin::Plugin { +protected: + void InitPreScript() + { + SetName("Bro::FileDataEvent"); + SetVersion(-1); + SetAPIVersion(BRO_PLUGIN_API_VERSION); + SetDynamicPlugin(false); + + SetDescription("Delivers file content via events"); + + AddComponent(new ::file_analysis::Component("DATA_EVENT", + ::file_analysis::DataEvent::Instantiate)); + } +}; + +Plugin __plugin; + +} } diff --git a/src/file_analysis/analyzer/extract/CMakeLists.txt b/src/file_analysis/analyzer/extract/CMakeLists.txt new file mode 100644 index 0000000000..df3fa2646d --- /dev/null +++ b/src/file_analysis/analyzer/extract/CMakeLists.txt @@ -0,0 +1,8 @@ +include(BroPlugin) + +include_directories(BEFORE ${CMAKE_CURRENT_SOURCE_DIR} + ${CMAKE_CURRENT_BINARY_DIR}) + +bro_plugin_begin(Bro FileExtract) +bro_plugin_cc(Extract.cc Plugin.cc) +bro_plugin_end() diff --git a/src/file_analysis/Extract.cc b/src/file_analysis/analyzer/extract/Extract.cc similarity index 100% rename from src/file_analysis/Extract.cc rename to src/file_analysis/analyzer/extract/Extract.cc diff --git a/src/file_analysis/Extract.h b/src/file_analysis/analyzer/extract/Extract.h similarity index 100% rename from src/file_analysis/Extract.h rename to src/file_analysis/analyzer/extract/Extract.h diff --git a/src/file_analysis/analyzer/extract/Plugin.cc b/src/file_analysis/analyzer/extract/Plugin.cc new file mode 100644 index 0000000000..f6cde57f03 --- /dev/null +++ b/src/file_analysis/analyzer/extract/Plugin.cc @@ -0,0 +1,26 @@ +#include "plugin/Plugin.h" +#include "file_analysis/Component.h" + +#include "Extract.h" + +namespace plugin { namespace Bro_FileExtract { + +class Plugin : public plugin::Plugin { +protected: + void InitPreScript() + { + SetName("Bro::FileExtract"); + SetVersion(-1); + SetAPIVersion(BRO_PLUGIN_API_VERSION); + SetDynamicPlugin(false); + + SetDescription("Extract file content to local file system"); + + AddComponent(new ::file_analysis::Component("EXTRACT", + ::file_analysis::Extract::Instantiate)); + } +}; + +Plugin __plugin; + +} } diff --git a/src/file_analysis/analyzer/hash/CMakeLists.txt b/src/file_analysis/analyzer/hash/CMakeLists.txt new file mode 100644 index 0000000000..5734740198 --- /dev/null +++ b/src/file_analysis/analyzer/hash/CMakeLists.txt @@ -0,0 +1,9 @@ +include(BroPlugin) + +include_directories(BEFORE ${CMAKE_CURRENT_SOURCE_DIR} + ${CMAKE_CURRENT_BINARY_DIR}) + +bro_plugin_begin(Bro FileHash) +bro_plugin_cc(Hash.cc Plugin.cc) +bro_plugin_bif(events.bif) +bro_plugin_end() diff --git a/src/file_analysis/Hash.cc b/src/file_analysis/analyzer/hash/Hash.cc similarity index 100% rename from src/file_analysis/Hash.cc rename to src/file_analysis/analyzer/hash/Hash.cc diff --git a/src/file_analysis/Hash.h b/src/file_analysis/analyzer/hash/Hash.h similarity index 99% rename from src/file_analysis/Hash.h rename to src/file_analysis/analyzer/hash/Hash.h index e44af337aa..13303e21fc 100644 --- a/src/file_analysis/Hash.h +++ b/src/file_analysis/analyzer/hash/Hash.h @@ -10,6 +10,8 @@ #include "File.h" #include "Analyzer.h" +#include "events.bif.h" + namespace file_analysis { /** diff --git a/src/file_analysis/analyzer/hash/Plugin.cc b/src/file_analysis/analyzer/hash/Plugin.cc new file mode 100644 index 0000000000..1a7254105e --- /dev/null +++ b/src/file_analysis/analyzer/hash/Plugin.cc @@ -0,0 +1,33 @@ +#include "plugin/Plugin.h" +#include "file_analysis/Component.h" + +#include "Hash.h" + +namespace plugin { namespace Bro_FileHash { + +class Plugin : public plugin::Plugin { +protected: + void InitPreScript() + { + SetName("Bro::FileHash"); + SetVersion(-1); + SetAPIVersion(BRO_PLUGIN_API_VERSION); + SetDynamicPlugin(false); + + SetDescription("Hash file content"); + + AddComponent(new ::file_analysis::Component("MD5", + ::file_analysis::MD5::Instantiate)); + AddComponent(new ::file_analysis::Component("SHA1", + ::file_analysis::SHA1::Instantiate)); + AddComponent(new ::file_analysis::Component("SHA256", + ::file_analysis::SHA256::Instantiate)); + + extern std::list > __bif_events_init(); + AddBifInitFunction(&__bif_events_init); + } +}; + +Plugin __plugin; + +} } diff --git a/src/file_analysis/analyzer/hash/events.bif b/src/file_analysis/analyzer/hash/events.bif new file mode 100644 index 0000000000..b4a8de1c74 --- /dev/null +++ b/src/file_analysis/analyzer/hash/events.bif @@ -0,0 +1,12 @@ +## This event is generated each time file analysis generates a digest of the +## file contents. +## +## f: The file. +## +## kind: The type of digest algorithm. +## +## hash: The result of the hashing. +## +## .. bro:see:: FileAnalysis::add_analyzer FileAnalysis::ANALYZER_MD5 +## FileAnalysis::ANALYZER_SHA1 FileAnalysis::ANALYZER_SHA256 +event file_hash%(f: fa_file, kind: string, hash: string%); diff --git a/src/file_analysis.bif b/src/file_analysis/file_analysis.bif similarity index 77% rename from src/file_analysis.bif rename to src/file_analysis/file_analysis.bif index ef46ccf9c1..06ae9450dd 100644 --- a/src/file_analysis.bif +++ b/src/file_analysis/file_analysis.bif @@ -1,4 +1,4 @@ -##! Internal functions and types used by the logging framework. +##! Internal functions and types used by the file analysis framework. module FileAnalysis; @@ -8,25 +8,6 @@ module FileAnalysis; type AnalyzerArgs: record; -## An enumeration of various file analysis actions that can be taken. -enum Analyzer %{ - - ## Extract a file to local filesystem - ANALYZER_EXTRACT, - - ## Calculate an MD5 digest of the file's contents. - ANALYZER_MD5, - - ## Calculate an SHA1 digest of the file's contents. - ANALYZER_SHA1, - - ## Calculate an SHA256 digest of the file's contents. - ANALYZER_SHA256, - - ## Deliver the file contents to the script-layer in an event. - ANALYZER_DATA_EVENT, -%} - ## :bro:see:`FileAnalysis::set_timeout_interval`. function FileAnalysis::__set_timeout_interval%(file_id: string, t: interval%): bool %{ @@ -76,3 +57,5 @@ function set_file_handle%(handle: string%): any file_mgr->SetHandle(handle->CheckString()); return 0; %} + +const FileAnalysis::salt: string; diff --git a/src/main.cc b/src/main.cc index 491f8a732d..9947d51709 100644 --- a/src/main.cc +++ b/src/main.cc @@ -834,6 +834,7 @@ int main(int argc, char** argv) plugin_mgr->InitPreScript(); analyzer_mgr->InitPreScript(); + file_mgr->InitPreScript(); if ( events_file ) event_player = new EventPlayer(events_file); @@ -855,6 +856,7 @@ int main(int argc, char** argv) plugin_mgr->InitPostScript(); analyzer_mgr->InitPostScript(); + file_mgr->InitPostScript(); if ( print_plugins ) { diff --git a/src/plugin/Component.cc b/src/plugin/Component.cc index 7d2e69eb86..48b19f8f07 100644 --- a/src/plugin/Component.cc +++ b/src/plugin/Component.cc @@ -39,6 +39,10 @@ void Component::Describe(ODesc* d) d->Add("Analyzer"); break; + case component::FILE_ANALYZER: + d->Add("File Analyzer"); + break; + default: reporter->InternalError("unknown component type in plugin::Component::Describe"); } diff --git a/src/plugin/Component.h b/src/plugin/Component.h index fbeb70ebed..1a4b41b43f 100644 --- a/src/plugin/Component.h +++ b/src/plugin/Component.h @@ -15,16 +15,11 @@ namespace component { enum Type { READER, /// An input reader (not currently used). WRITER, /// An logging writer (not currenly used). - ANALYZER /// A protocol analyzer. + ANALYZER, /// A protocol analyzer. + FILE_ANALYZER /// A file analyzer. }; } -#if 0 -namespace input { class PluginComponent; } -namespace logging { class PluginComponent; } -namespace analyzer { class PluginComponent; } -#endif - /** * Base class for plugin components. A component is a specific piece of * functionality that a plugin provides, such as a protocol analyzer or a log diff --git a/src/util.cc b/src/util.cc index de9bd5b679..cff36f0f23 100644 --- a/src/util.cc +++ b/src/util.cc @@ -1617,3 +1617,18 @@ const char* bro_magic_buffer(magic_t cookie, const void* buffer, size_t length) return rval; } + +const char* canonify_name(const char* name) + { + unsigned int len = strlen(name); + char* nname = new char[len + 1]; + + for ( unsigned int i = 0; i < len; i++ ) + { + char c = isalnum(name[i]) ? name[i] : '_'; + nname[i] = toupper(c); + } + + nname[len] = '\0'; + return nname; + } diff --git a/src/util.h b/src/util.h index 49bcbf318b..cafa63b7e8 100644 --- a/src/util.h +++ b/src/util.h @@ -383,4 +383,12 @@ extern magic_t magic_mime_cookie; void bro_init_magic(magic_t* cookie_ptr, int flags); const char* bro_magic_buffer(magic_t cookie, const void* buffer, size_t length); +/** + * Canonicalizes a name by converting it to uppercase letters and replacing + * all non-alphanumeric characters with an underscore. + * @param name The string to canonicalize. + * @return The canonicalized version of \a name which caller may later delete[]. + */ +const char* canonify_name(const char* name); + #endif diff --git a/testing/btest/Baseline/coverage.bare-load-baseline/canonified_loaded_scripts.log b/testing/btest/Baseline/coverage.bare-load-baseline/canonified_loaded_scripts.log index 06652e37e7..9d3fb87861 100644 --- a/testing/btest/Baseline/coverage.bare-load-baseline/canonified_loaded_scripts.log +++ b/testing/btest/Baseline/coverage.bare-load-baseline/canonified_loaded_scripts.log @@ -3,7 +3,7 @@ #empty_field (empty) #unset_field - #path loaded_scripts -#open 2013-05-17-03-57-47 +#open 2013-06-10-19-50-56 #fields name #types string scripts/base/init-bare.bro @@ -13,31 +13,6 @@ scripts/base/init-bare.bro build/scripts/base/bif/bro.bif.bro build/scripts/base/bif/reporter.bif.bro build/scripts/base/bif/event.bif.bro - scripts/base/frameworks/logging/__load__.bro - scripts/base/frameworks/logging/main.bro - build/scripts/base/bif/logging.bif.bro - scripts/base/frameworks/logging/postprocessors/__load__.bro - scripts/base/frameworks/logging/postprocessors/scp.bro - scripts/base/frameworks/logging/postprocessors/sftp.bro - scripts/base/frameworks/logging/writers/ascii.bro - scripts/base/frameworks/logging/writers/dataseries.bro - scripts/base/frameworks/logging/writers/sqlite.bro - scripts/base/frameworks/logging/writers/elasticsearch.bro - scripts/base/frameworks/logging/writers/none.bro - scripts/base/frameworks/input/__load__.bro - scripts/base/frameworks/input/main.bro - build/scripts/base/bif/input.bif.bro - scripts/base/frameworks/input/readers/ascii.bro - scripts/base/frameworks/input/readers/raw.bro - scripts/base/frameworks/input/readers/benchmark.bro - scripts/base/frameworks/input/readers/binary.bro - scripts/base/frameworks/input/readers/sqlite.bro - scripts/base/frameworks/analyzer/__load__.bro - scripts/base/frameworks/analyzer/main.bro - build/scripts/base/bif/analyzer.bif.bro - scripts/base/frameworks/file-analysis/__load__.bro - scripts/base/frameworks/file-analysis/main.bro - build/scripts/base/bif/file_analysis.bif.bro build/scripts/base/bif/plugins/__load__.bro build/scripts/base/bif/plugins/Bro_ARP.events.bif.bro build/scripts/base/bif/plugins/Bro_AYIYA.events.bif.bro @@ -50,6 +25,7 @@ scripts/base/init-bare.bro build/scripts/base/bif/plugins/Bro_FTP.events.bif.bro build/scripts/base/bif/plugins/Bro_FTP.functions.bif.bro build/scripts/base/bif/plugins/Bro_File.events.bif.bro + build/scripts/base/bif/plugins/Bro_FileHash.events.bif.bro build/scripts/base/bif/plugins/Bro_Finger.events.bif.bro build/scripts/base/bif/plugins/Bro_GTPv1.events.bif.bro build/scripts/base/bif/plugins/Bro_Gnutella.events.bif.bro @@ -85,6 +61,31 @@ scripts/base/init-bare.bro build/scripts/base/bif/plugins/Bro_Teredo.events.bif.bro build/scripts/base/bif/plugins/Bro_UDP.events.bif.bro build/scripts/base/bif/plugins/Bro_ZIP.events.bif.bro + scripts/base/frameworks/logging/__load__.bro + scripts/base/frameworks/logging/main.bro + build/scripts/base/bif/logging.bif.bro + scripts/base/frameworks/logging/postprocessors/__load__.bro + scripts/base/frameworks/logging/postprocessors/scp.bro + scripts/base/frameworks/logging/postprocessors/sftp.bro + scripts/base/frameworks/logging/writers/ascii.bro + scripts/base/frameworks/logging/writers/dataseries.bro + scripts/base/frameworks/logging/writers/sqlite.bro + scripts/base/frameworks/logging/writers/elasticsearch.bro + scripts/base/frameworks/logging/writers/none.bro + scripts/base/frameworks/input/__load__.bro + scripts/base/frameworks/input/main.bro + build/scripts/base/bif/input.bif.bro + scripts/base/frameworks/input/readers/ascii.bro + scripts/base/frameworks/input/readers/raw.bro + scripts/base/frameworks/input/readers/benchmark.bro + scripts/base/frameworks/input/readers/binary.bro + scripts/base/frameworks/input/readers/sqlite.bro + scripts/base/frameworks/analyzer/__load__.bro + scripts/base/frameworks/analyzer/main.bro + build/scripts/base/bif/analyzer.bif.bro + scripts/base/frameworks/file-analysis/__load__.bro + scripts/base/frameworks/file-analysis/main.bro + build/scripts/base/bif/file_analysis.bif.bro scripts/policy/misc/loaded-scripts.bro scripts/base/utils/paths.bro -#close 2013-05-17-03-57-47 +#close 2013-06-10-19-50-56 diff --git a/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log b/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log index cb92b663f0..b861f44266 100644 --- a/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log +++ b/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log @@ -3,7 +3,7 @@ #empty_field (empty) #unset_field - #path loaded_scripts -#open 2013-05-17-03-58-48 +#open 2013-06-10-19-50-57 #fields name #types string scripts/base/init-bare.bro @@ -13,31 +13,6 @@ scripts/base/init-bare.bro build/scripts/base/bif/bro.bif.bro build/scripts/base/bif/reporter.bif.bro build/scripts/base/bif/event.bif.bro - scripts/base/frameworks/logging/__load__.bro - scripts/base/frameworks/logging/main.bro - build/scripts/base/bif/logging.bif.bro - scripts/base/frameworks/logging/postprocessors/__load__.bro - scripts/base/frameworks/logging/postprocessors/scp.bro - scripts/base/frameworks/logging/postprocessors/sftp.bro - scripts/base/frameworks/logging/writers/ascii.bro - scripts/base/frameworks/logging/writers/dataseries.bro - scripts/base/frameworks/logging/writers/sqlite.bro - scripts/base/frameworks/logging/writers/elasticsearch.bro - scripts/base/frameworks/logging/writers/none.bro - scripts/base/frameworks/input/__load__.bro - scripts/base/frameworks/input/main.bro - build/scripts/base/bif/input.bif.bro - scripts/base/frameworks/input/readers/ascii.bro - scripts/base/frameworks/input/readers/raw.bro - scripts/base/frameworks/input/readers/benchmark.bro - scripts/base/frameworks/input/readers/binary.bro - scripts/base/frameworks/input/readers/sqlite.bro - scripts/base/frameworks/analyzer/__load__.bro - scripts/base/frameworks/analyzer/main.bro - build/scripts/base/bif/analyzer.bif.bro - scripts/base/frameworks/file-analysis/__load__.bro - scripts/base/frameworks/file-analysis/main.bro - build/scripts/base/bif/file_analysis.bif.bro build/scripts/base/bif/plugins/__load__.bro build/scripts/base/bif/plugins/Bro_ARP.events.bif.bro build/scripts/base/bif/plugins/Bro_AYIYA.events.bif.bro @@ -50,6 +25,7 @@ scripts/base/init-bare.bro build/scripts/base/bif/plugins/Bro_FTP.events.bif.bro build/scripts/base/bif/plugins/Bro_FTP.functions.bif.bro build/scripts/base/bif/plugins/Bro_File.events.bif.bro + build/scripts/base/bif/plugins/Bro_FileHash.events.bif.bro build/scripts/base/bif/plugins/Bro_Finger.events.bif.bro build/scripts/base/bif/plugins/Bro_GTPv1.events.bif.bro build/scripts/base/bif/plugins/Bro_Gnutella.events.bif.bro @@ -85,6 +61,31 @@ scripts/base/init-bare.bro build/scripts/base/bif/plugins/Bro_Teredo.events.bif.bro build/scripts/base/bif/plugins/Bro_UDP.events.bif.bro build/scripts/base/bif/plugins/Bro_ZIP.events.bif.bro + scripts/base/frameworks/logging/__load__.bro + scripts/base/frameworks/logging/main.bro + build/scripts/base/bif/logging.bif.bro + scripts/base/frameworks/logging/postprocessors/__load__.bro + scripts/base/frameworks/logging/postprocessors/scp.bro + scripts/base/frameworks/logging/postprocessors/sftp.bro + scripts/base/frameworks/logging/writers/ascii.bro + scripts/base/frameworks/logging/writers/dataseries.bro + scripts/base/frameworks/logging/writers/sqlite.bro + scripts/base/frameworks/logging/writers/elasticsearch.bro + scripts/base/frameworks/logging/writers/none.bro + scripts/base/frameworks/input/__load__.bro + scripts/base/frameworks/input/main.bro + build/scripts/base/bif/input.bif.bro + scripts/base/frameworks/input/readers/ascii.bro + scripts/base/frameworks/input/readers/raw.bro + scripts/base/frameworks/input/readers/benchmark.bro + scripts/base/frameworks/input/readers/binary.bro + scripts/base/frameworks/input/readers/sqlite.bro + scripts/base/frameworks/analyzer/__load__.bro + scripts/base/frameworks/analyzer/main.bro + build/scripts/base/bif/analyzer.bif.bro + scripts/base/frameworks/file-analysis/__load__.bro + scripts/base/frameworks/file-analysis/main.bro + build/scripts/base/bif/file_analysis.bif.bro scripts/base/init-default.bro scripts/base/utils/site.bro scripts/base/utils/patterns.bro @@ -191,4 +192,4 @@ scripts/base/init-default.bro scripts/base/protocols/syslog/main.bro scripts/base/misc/find-checksum-offloading.bro scripts/policy/misc/loaded-scripts.bro -#close 2013-05-17-03-58-48 +#close 2013-06-10-19-50-57