From b7877792c98c22f072f26ed5ef4870e598fcf672 Mon Sep 17 00:00:00 2001 From: Seth Hall Date: Mon, 5 Aug 2013 00:02:48 -0400 Subject: [PATCH] First commit of file entropy analyzer. - Code comments need cleaned up still. --- scripts/policy/frameworks/files/entropy.bro | 19 +++++ src/file_analysis/analyzer/CMakeLists.txt | 1 + .../analyzer/entropy/CMakeLists.txt | 9 ++ src/file_analysis/analyzer/entropy/Entropy.cc | 71 ++++++++++++++++ src/file_analysis/analyzer/entropy/Entropy.h | 84 +++++++++++++++++++ src/file_analysis/analyzer/entropy/Plugin.cc | 29 +++++++ src/file_analysis/analyzer/entropy/events.bif | 8 ++ 7 files changed, 221 insertions(+) create mode 100644 scripts/policy/frameworks/files/entropy.bro create mode 100644 src/file_analysis/analyzer/entropy/CMakeLists.txt create mode 100644 src/file_analysis/analyzer/entropy/Entropy.cc create mode 100644 src/file_analysis/analyzer/entropy/Entropy.h create mode 100644 src/file_analysis/analyzer/entropy/Plugin.cc create mode 100644 src/file_analysis/analyzer/entropy/events.bif diff --git a/scripts/policy/frameworks/files/entropy.bro b/scripts/policy/frameworks/files/entropy.bro new file mode 100644 index 0000000000..89dcead7d6 --- /dev/null +++ b/scripts/policy/frameworks/files/entropy.bro @@ -0,0 +1,19 @@ + +module Files; + +export { + redef record Files::Info += { + ## The information density of the contents of the file, expressed as a number of bits per character. + entropy: double &log &optional; + }; +} + +event file_new(f: fa_file) + { + Files::add_analyzer(f, Files::ANALYZER_ENTROPY); + } + +event file_entropy(f: fa_file, ent: entropy_test_result) + { + f$info$entropy = ent$entropy; + } \ No newline at end of file diff --git a/src/file_analysis/analyzer/CMakeLists.txt b/src/file_analysis/analyzer/CMakeLists.txt index bfafcd2894..ca93c4512c 100644 --- a/src/file_analysis/analyzer/CMakeLists.txt +++ b/src/file_analysis/analyzer/CMakeLists.txt @@ -1,3 +1,4 @@ add_subdirectory(data_event) +add_subdirectory(entropy) add_subdirectory(extract) add_subdirectory(hash) diff --git a/src/file_analysis/analyzer/entropy/CMakeLists.txt b/src/file_analysis/analyzer/entropy/CMakeLists.txt new file mode 100644 index 0000000000..38db5e726a --- /dev/null +++ b/src/file_analysis/analyzer/entropy/CMakeLists.txt @@ -0,0 +1,9 @@ +include(BroPlugin) + +include_directories(BEFORE ${CMAKE_CURRENT_SOURCE_DIR} + ${CMAKE_CURRENT_BINARY_DIR}) + +bro_plugin_begin(Bro FileEntropy) +bro_plugin_cc(Entropy.cc Plugin.cc ../../Analyzer.cc) +bro_plugin_bif(events.bif) +bro_plugin_end() diff --git a/src/file_analysis/analyzer/entropy/Entropy.cc b/src/file_analysis/analyzer/entropy/Entropy.cc new file mode 100644 index 0000000000..2a1bc72723 --- /dev/null +++ b/src/file_analysis/analyzer/entropy/Entropy.cc @@ -0,0 +1,71 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +#include + +#include "Entropy.h" +#include "util.h" +#include "Event.h" +#include "file_analysis/Manager.h" + +using namespace file_analysis; + +Entropy::Entropy(RecordVal* args, File* file) + : file_analysis::Analyzer(file_mgr->GetComponentTag("ENTROPY"), args, file) + { + //entropy->Init(); + entropy = new EntropyVal; + } + +Entropy::~Entropy() + { + Unref(entropy); + } + +file_analysis::Analyzer* Entropy::Instantiate(RecordVal* args, File* file) + { + return new Entropy(args, file); + } + +bool Entropy::DeliverStream(const u_char* data, uint64 len) + { + if ( ! fed ) + fed = len > 0; + + entropy->Feed(data, len); + return true; + } + +bool Entropy::EndOfFile() + { + Finalize(); + return false; + } + +bool Entropy::Undelivered(uint64 offset, uint64 len) + { + return false; + } + +void Entropy::Finalize() + { + //if ( ! entropy->IsValid() || ! fed ) + if ( ! fed ) + return; + + val_list* vl = new val_list(); + vl->append(GetFile()->GetVal()->Ref()); + + double montepi, scc, ent, mean, chisq; + montepi = scc = ent = mean = chisq = 0.0; + entropy->Get(&ent, &chisq, &mean, &montepi, &scc); + + RecordVal* ent_result = new RecordVal(entropy_test_result); + ent_result->Assign(0, new Val(ent, TYPE_DOUBLE)); + ent_result->Assign(1, new Val(chisq, TYPE_DOUBLE)); + ent_result->Assign(2, new Val(mean, TYPE_DOUBLE)); + ent_result->Assign(3, new Val(montepi, TYPE_DOUBLE)); + ent_result->Assign(4, new Val(scc, TYPE_DOUBLE)); + + vl->append(ent_result); + mgr.QueueEvent(file_entropy, vl); + } diff --git a/src/file_analysis/analyzer/entropy/Entropy.h b/src/file_analysis/analyzer/entropy/Entropy.h new file mode 100644 index 0000000000..6a5075263c --- /dev/null +++ b/src/file_analysis/analyzer/entropy/Entropy.h @@ -0,0 +1,84 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +#ifndef FILE_ANALYSIS_ENTROPY_H +#define FILE_ANALYSIS_ENTROPY_H + +#include + +#include "Val.h" +#include "OpaqueVal.h" +#include "File.h" +#include "Analyzer.h" + +#include "events.bif.h" + +namespace file_analysis { + +/** + * An analyzer to produce a hash of file contents. + */ +class Entropy : public file_analysis::Analyzer { +public: + + /** + * Destructor. + */ + virtual ~Entropy(); + + /** + * Create a new instance of an Extract analyzer. + * @param args the \c AnalyzerArgs value which represents the analyzer. + * @param file the file to which the analyzer will be attached. + * @return the new Extract analyzer instance or a null pointer if the + * the "extraction_file" field of \a args wasn't set. + */ + static file_analysis::Analyzer* Instantiate(RecordVal* args, File* file); + + /** + * Incrementally hash next chunk of file contents. + * @param data pointer to start of a chunk of a file data. + * @param len number of bytes in the data chunk. + * @return false if the digest is in an invalid state, else true. + */ + virtual bool DeliverStream(const u_char* data, uint64 len); + + /** + * Finalizes the hash and raises a "file_entropy_test" event. + * @return always false so analyze will be deteched from file. + */ + virtual bool EndOfFile(); + + /** + * Missing data can't be handled, so just indicate the this analyzer should + * be removed from receiving further data. The hash will not be finalized. + * @param offset byte offset in file at which missing chunk starts. + * @param len number of missing bytes. + * @return always false so analyzer will detach from file. + */ + virtual bool Undelivered(uint64 offset, uint64 len); + +protected: + + /** + * Constructor. + * @param args the \c AnalyzerArgs value which represents the analyzer. + * @param file the file to which the analyzer will be attached. + * @param hv specific hash calculator object. + * @param kind human readable name of the hash algorithm to use. + */ + Entropy(RecordVal* args, File* file); + + /** + * If some file contents have been seen, finalizes the hash of them and + * raises the "file_hash" event with the results. + */ + void Finalize(); + +private: + EntropyVal* entropy; + bool fed; +}; + +} // namespace file_analysis + +#endif diff --git a/src/file_analysis/analyzer/entropy/Plugin.cc b/src/file_analysis/analyzer/entropy/Plugin.cc new file mode 100644 index 0000000000..3eeae62480 --- /dev/null +++ b/src/file_analysis/analyzer/entropy/Plugin.cc @@ -0,0 +1,29 @@ +#include "plugin/Plugin.h" +#include "file_analysis/Component.h" + +#include "Entropy.h" + +namespace plugin { namespace Bro_FileEntropy { + +class Plugin : public plugin::Plugin { +protected: + void InitPreScript() + { + SetName("Bro::FileEntropy"); + SetVersion(-1); + SetAPIVersion(BRO_PLUGIN_API_VERSION); + SetDynamicPlugin(false); + + SetDescription("Entropy test file content"); + + AddComponent(new ::file_analysis::Component("ENTROPY", + ::file_analysis::Entropy::Instantiate)); + + extern std::list > __bif_events_init(); + AddBifInitFunction(&__bif_events_init); + } +}; + +Plugin __plugin; + +} } diff --git a/src/file_analysis/analyzer/entropy/events.bif b/src/file_analysis/analyzer/entropy/events.bif new file mode 100644 index 0000000000..a51bb3d39b --- /dev/null +++ b/src/file_analysis/analyzer/entropy/events.bif @@ -0,0 +1,8 @@ +## This event is generated each time file analysis performs +## entropy testing on a file. +## +## f: The file. +## +## ent: The results of the entropy testing. +## +event file_entropy%(f: fa_file, ent: entropy_test_result%); \ No newline at end of file