Merge remote-tracking branch 'origin/master' into topic/seth/file-analysis-exe-analyzer

Conflicts:
	src/CMakeLists.txt
	src/binpac_bro.h
	src/event.bif
	src/file_analysis.bif
	src/file_analysis/AnalyzerSet.cc
This commit is contained in:
Seth Hall 2013-07-27 00:04:40 -04:00
commit 998cedb3b8
670 changed files with 35868 additions and 15013 deletions

View file

@ -5,10 +5,13 @@
#include "Val.h"
#include "NetVar.h"
#include "analyzer/Tag.h"
#include "file_analysis/file_analysis.bif.h"
namespace file_analysis {
typedef BifEnum::FileAnalysis::Analyzer FA_Tag;
typedef int FA_Tag;
class File;
@ -17,6 +20,11 @@ class File;
*/
class Analyzer {
public:
/**
* Destructor. Nothing special about it. Virtual since we definitely expect
* to delete instances of derived classes via pointers to this class.
*/
virtual ~Analyzer()
{
DBG_LOG(DBG_FILE_ANALYSIS, "Destroy file analyzer %d", tag);
@ -24,7 +32,10 @@ public:
}
/**
* Subclasses may override this to receive file data non-sequentially.
* Subclasses may override this metod to receive file data non-sequentially.
* @param data points to start of a chunk of file data.
* @param len length in bytes of the chunk of data pointed to by \a data.
* @param offset the byte offset within full file that data chunk starts.
* @return true if the analyzer is still in a valid state to continue
* receiving data/events or false if it's essentially "done".
*/
@ -32,7 +43,9 @@ public:
{ return true; }
/**
* Subclasses may override this to receive file sequentially.
* Subclasses may override this method to receive file sequentially.
* @param data points to start of the next chunk of file data.
* @param len length in bytes of the chunk of data pointed to by \a data.
* @return true if the analyzer is still in a valid state to continue
* receiving data/events or false if it's essentially "done".
*/
@ -40,7 +53,7 @@ public:
{ return true; }
/**
* Subclasses may override this to specifically handle an EOF signal,
* Subclasses may override this method to specifically handle an EOF signal,
* which means no more data is going to be incoming and the analyzer
* may be deleted/cleaned up soon.
* @return true if the analyzer is still in a valid state to continue
@ -50,7 +63,10 @@ public:
{ return true; }
/**
* Subclasses may override this to handle missing data in a file stream.
* Subclasses may override this method to handle missing data in a file.
* @param offset the byte offset within full file at which the missing
* data chunk occurs.
* @param len the number of missing bytes.
* @return true if the analyzer is still in a valid state to continue
* receiving data/events or false if it's essentially "done".
*/
@ -73,17 +89,25 @@ public:
File* GetFile() const { return file; }
/**
* Retrieves an analyzer tag field from full analyzer argument record.
* @param args an \c AnalyzerArgs (script-layer type) value.
* @return the analyzer tag equivalent of the 'tag' field from the
* AnalyzerArgs value \a args.
* \c AnalyzerArgs value \a args.
*/
static FA_Tag ArgsTag(const RecordVal* args)
{
using BifType::Record::FileAnalysis::AnalyzerArgs;
return static_cast<FA_Tag>(
args->Lookup(AnalyzerArgs->FieldOffset("tag"))->AsEnum());
return args->Lookup(AnalyzerArgs->FieldOffset("tag"))->AsEnum();
}
protected:
/**
* Constructor. Only derived classes are meant to be instantiated.
* @param arg_args an \c AnalyzerArgs (script-layer type) value specifiying
* tunable options, if any, related to a particular analyzer type.
* @param arg_file the file to which the the analyzer is being attached.
*/
Analyzer(RecordVal* arg_args, File* arg_file)
: tag(file_analysis::Analyzer::ArgsTag(arg_args)),
args(arg_args->Ref()->AsRecordVal()),
@ -91,13 +115,11 @@ protected:
{}
private:
FA_Tag tag;
RecordVal* args;
File* file;
};
typedef file_analysis::Analyzer* (*AnalyzerInstantiator)(RecordVal* args,
File* file);
FA_Tag tag; /**< The particular analyzer type of the analyzer instance. */
RecordVal* args; /**< \c AnalyzerArgs val gives tunable analyzer params. */
File* file; /**< The file to which the analyzer is attached. */
};
} // namespace file_analysis

View file

@ -3,23 +3,10 @@
#include "AnalyzerSet.h"
#include "File.h"
#include "Analyzer.h"
#include "Extract.h"
#include "DataEvent.h"
#include "Hash.h"
#include "analyzers/PE.h"
#include "Manager.h"
using namespace file_analysis;
// keep in order w/ declared enum values in file_analysis.bif
static AnalyzerInstantiator analyzer_factory[] = {
file_analysis::Extract::Instantiate,
file_analysis::MD5::Instantiate,
file_analysis::SHA1::Instantiate,
file_analysis::SHA256::Instantiate,
file_analysis::DataEvent::Instantiate,
file_analysis::PE::Instantiate,
};
static void analyzer_del_func(void* v)
{
delete (file_analysis::Analyzer*) v;
@ -156,14 +143,13 @@ HashKey* AnalyzerSet::GetKey(const RecordVal* args) const
file_analysis::Analyzer* AnalyzerSet::InstantiateAnalyzer(RecordVal* args) const
{
file_analysis::Analyzer* a =
analyzer_factory[file_analysis::Analyzer::ArgsTag(args)](args, file);
FA_Tag tag = file_analysis::Analyzer::ArgsTag(args);
file_analysis::Analyzer* a = file_mgr->InstantiateAnalyzer(tag, args, file);
if ( ! a )
{
DBG_LOG(DBG_FILE_ANALYSIS, "Instantiate analyzer %d failed for file id",
" %s", file_analysis::Analyzer::ArgsTag(args),
file->GetID().c_str());
reporter->Error("Failed file analyzer %s instantiation for file id %s",
file_mgr->GetAnalyzerName(tag), file->GetID().c_str());
return 0;
}

View file

@ -16,67 +16,144 @@ class File;
declare(PDict,Analyzer);
/**
* A set of file analysis analyzers indexed by AnalyzerArgs. Allows queueing
* of addition/removals so that those modifications can happen at well-defined
* times (e.g. to make sure a loop iterator isn't invalidated).
* A set of file analysis analyzers indexed by an \c AnalyzerArgs (script-layer
* type) value. Allows queueing of addition/removals so that those
* modifications can happen at well-defined times (e.g. to make sure a loop
* iterator isn't invalidated).
*/
class AnalyzerSet {
public:
/**
* Constructor. Nothing special.
* @param arg_file the file to which all analyzers in the set are attached.
*/
AnalyzerSet(File* arg_file);
/**
* Destructor. Any queued analyzer additions/removals are aborted and
* will not occur.
*/
~AnalyzerSet();
/**
* Attach an analyzer to #file immediately.
* @param args an \c AnalyzerArgs value which specifies an analyzer.
* @return true if analyzer was instantiated/attached, else false.
*/
bool Add(RecordVal* args);
/**
* Queue the attachment of an analyzer to #file.
* @param args an \c AnalyzerArgs value which specifies an analyzer.
* @return true if analyzer was able to be instantiated, else false.
*/
bool QueueAdd(RecordVal* args);
/**
* Remove an analyzer from #file immediately.
* @param args an \c AnalyzerArgs value which specifies an analyzer.
* @return false if analyzer didn't exist and so wasn't removed, else true.
*/
bool Remove(const RecordVal* args);
/**
* Queue the removal of an analyzer from #file.
* @param args an \c AnalyzerArgs value which specifies an analyzer.
* @return true if analyzer exists at time of call, else false;
*/
bool QueueRemove(const RecordVal* args);
/**
* Perform all queued modifications to the currently active analyzers.
* Perform all queued modifications to the current analyzer set.
*/
void DrainModifications();
/**
* Prepare the analyzer set to be iterated over.
* @see Dictionary#InitForIteration
* @return an iterator that may be used to loop over analyzers in the set.
*/
IterCookie* InitForIteration() const
{ return analyzer_map.InitForIteration(); }
/**
* Get next entry in the analyzer set.
* @see Dictionary#NextEntry
* @param c a set iterator.
* @return the next analyzer in the set or a null pointer if there is no
* more left (in that case the cookie is also deleted).
*/
file_analysis::Analyzer* NextEntry(IterCookie* c)
{ return analyzer_map.NextEntry(c); }
protected:
/**
* Get a hash key which represents an analyzer instance.
* @param args an \c AnalyzerArgs value which specifies an analyzer.
* @return the hash key calculated from \a args
*/
HashKey* GetKey(const RecordVal* args) const;
/**
* Create an instance of a file analyzer.
* @param args an \c AnalyzerArgs value which specifies an analyzer.
* @return a new file analyzer instance.
*/
file_analysis::Analyzer* InstantiateAnalyzer(RecordVal* args) const;
/**
* Insert an analyzer instance in to the set.
* @param a an analyzer instance.
* @param key the hash key which represents the analyzer's \c AnalyzerArgs.
*/
void Insert(file_analysis::Analyzer* a, HashKey* key);
/**
* Remove an analyzer instance from the set.
* @param tag enumarator which specifies type of the analyzer to remove,
* just used for debugging messages.
* @param key the hash key which represents the analyzer's \c AnalyzerArgs.
*/
bool Remove(FA_Tag tag, HashKey* key);
private:
File* file;
File* file; /**< File which owns the set */
CompositeHash* analyzer_hash; /**< AnalyzerArgs hashes. */
PDict(file_analysis::Analyzer) analyzer_map; /**< Indexed by AnalyzerArgs. */
/**
* Abstract base class for analyzer set modifications.
*/
class Modification {
public:
virtual ~Modification() {}
/**
* Perform the modification on an analyzer set.
* @param set the analyzer set on which the modification will happen.
* @return true if the modification altered \a set.
*/
virtual bool Perform(AnalyzerSet* set) = 0;
/**
* Don't perform the modification on the analyzer set and clean up.
*/
virtual void Abort() = 0;
};
/**
* Represents a request to add an analyzer to an analyzer set.
*/
class AddMod : public Modification {
public:
/**
* Construct request which can add an analyzer to an analyzer set.
* @param arg_a an analyzer instance to add to an analyzer set.
* @param arg_key hash key representing the analyzer's \c AnalyzerArgs.
*/
AddMod(file_analysis::Analyzer* arg_a, HashKey* arg_key)
: Modification(), a(arg_a), key(arg_key) {}
virtual ~AddMod() {}
@ -88,8 +165,16 @@ private:
HashKey* key;
};
/**
* Represents a request to remove an analyzer from an analyzer set.
*/
class RemoveMod : public Modification {
public:
/**
* Construct request which can remove an analyzer from an analyzer set.
* @param arg_a an analyzer instance to add to an analyzer set.
* @param arg_key hash key representing the analyzer's \c AnalyzerArgs.
*/
RemoveMod(FA_Tag arg_tag, HashKey* arg_key)
: Modification(), tag(arg_tag), key(arg_key) {}
virtual ~RemoveMod() {}
@ -102,7 +187,7 @@ private:
};
typedef queue<Modification*> ModQueue;
ModQueue mod_queue;
ModQueue mod_queue; /**< A queue of analyzer additions/removals requests. */
};
} // namespace file_analysiss

View file

@ -0,0 +1,22 @@
include(BroSubdir)
include_directories(BEFORE
${CMAKE_CURRENT_SOURCE_DIR}
${CMAKE_CURRENT_BINARY_DIR}
)
add_subdirectory(analyzer)
set(file_analysis_SRCS
Manager.cc
File.cc
FileTimer.cc
Analyzer.h
AnalyzerSet.cc
Component.cc
)
bif_target(file_analysis.bif)
bro_add_subdir_library(file_analysis ${file_analysis_SRCS} ${BIF_OUTPUT_CC})
add_dependencies(bro_file_analysis generate_outputs)

View file

@ -0,0 +1,69 @@
// See the file "COPYING" in the main distribution directory for copyright.
#include "Component.h"
#include "Manager.h"
#include "../Desc.h"
#include "../util.h"
using namespace file_analysis;
analyzer::Tag::type_t Component::type_counter = 0;
Component::Component(const char* arg_name, factory_callback arg_factory,
analyzer::Tag::subtype_t arg_subtype)
: plugin::Component(plugin::component::FILE_ANALYZER)
{
name = copy_string(arg_name);
canon_name = canonify_name(arg_name);
factory = arg_factory;
tag = analyzer::Tag(++type_counter, arg_subtype);
}
Component::Component(const Component& other)
: plugin::Component(Type())
{
name = copy_string(other.name);
canon_name = copy_string(other.canon_name);
factory = other.factory;
tag = other.tag;
}
Component::~Component()
{
delete [] name;
delete [] canon_name;
}
analyzer::Tag Component::Tag() const
{
return tag;
}
void Component::Describe(ODesc* d) const
{
plugin::Component::Describe(d);
d->Add(name);
d->Add(" (");
if ( factory )
{
d->Add("ANALYZER_");
d->Add(canon_name);
}
d->Add(")");
}
Component& Component::operator=(const Component& other)
{
if ( &other != this )
{
name = copy_string(other.name);
factory = other.factory;
tag = other.tag;
}
return *this;
}

View file

@ -0,0 +1,109 @@
// See the file "COPYING" in the main distribution directory for copyright.
#ifndef FILE_ANALYZER_PLUGIN_COMPONENT_H
#define FILE_ANALYZER_PLUGIN_COMPONENT_H
#include "analyzer/Tag.h"
#include "plugin/Component.h"
#include "Val.h"
#include "../config.h"
#include "../util.h"
namespace file_analysis {
class File;
class Analyzer;
/**
* Component description for plugins providing file analyzers.
*
* A plugin can provide a specific file analyzer by registering this
* analyzer component, describing the analyzer.
*/
class Component : public plugin::Component {
public:
typedef Analyzer* (*factory_callback)(RecordVal* args, File* file);
/**
* Constructor.
*
* @param name The name of the provided analyzer. This name is used
* across the system to identify the analyzer, e.g., when calling
* file_analysis::Manager::InstantiateAnalyzer with a name.
*
* @param factory A factory function to instantiate instances of the
* analyzer's class, which must be derived directly or indirectly
* from file_analysis::Analyzer. This is typically a static \c
* Instatiate() method inside the class that just allocates and
* returns a new instance.
*
* @param subtype A subtype associated with this component that
* further distinguishes it. The subtype will be integrated into
* the analyzer::Tag that the manager associates with this analyzer,
* and analyzer instances can accordingly access it via analyzer::Tag().
* If not used, leave at zero.
*/
Component(const char* name, factory_callback factory,
analyzer::Tag::subtype_t subtype = 0);
/**
* Copy constructor.
*/
Component(const Component& other);
/**
* Destructor.
*/
~Component();
/**
* Returns the name of the analyzer. This name is unique across all
* analyzers and used to identify it. The returned name is derived
* from what's passed to the constructor but upper-cased and
* canonified to allow being part of a script-level ID.
*/
virtual const char* Name() const { return name; }
/**
* Returns a canonocalized version of the analyzer's name. The
* returned name is derived from what's passed to the constructor but
* upper-cased and transformed to allow being part of a script-level
* ID.
*/
const char* CanonicalName() const { return canon_name; }
/**
* Returns the analyzer's factory function.
*/
factory_callback Factory() const { return factory; }
/**
* Returns the analyzer's tag. Note that this is automatically
* generated for each new Components, and hence unique across all of
* them.
*/
analyzer::Tag Tag() const;
/**
* Generates a human-readable description of the component's main
* parameters. This goes into the output of \c "bro -NN".
*/
virtual void Describe(ODesc* d) const;
Component& operator=(const Component& other);
private:
const char* name; // The analyzer's name.
const char* canon_name; // The analyzer's canonical name.
factory_callback factory; // The analyzer's factory callback.
analyzer::Tag tag; // The automatically assigned analyzer tag.
// Global counter used to generate unique tags.
static analyzer::Tag::type_t type_counter;
};
}
#endif

View file

@ -1,36 +0,0 @@
// See the file "COPYING" in the main distribution directory for copyright.
#ifndef FILE_ANALYSIS_DATAEVENT_H
#define FILE_ANALYSIS_DATAEVENT_H
#include <string>
#include "Val.h"
#include "File.h"
#include "Analyzer.h"
namespace file_analysis {
/**
* An analyzer to send file data to script-layer events.
*/
class DataEvent : public file_analysis::Analyzer {
public:
virtual bool DeliverChunk(const u_char* data, uint64 len, uint64 offset);
virtual bool DeliverStream(const u_char* data, uint64 len);
static file_analysis::Analyzer* Instantiate(RecordVal* args, File* file);
protected:
DataEvent(RecordVal* args, File* file,
EventHandlerPtr ce, EventHandlerPtr se);
private:
EventHandlerPtr chunk_event;
EventHandlerPtr stream_event;
};
} // namespace file_analysis
#endif

View file

@ -1,35 +0,0 @@
// See the file "COPYING" in the main distribution directory for copyright.
#ifndef FILE_ANALYSIS_EXTRACT_H
#define FILE_ANALYSIS_EXTRACT_H
#include <string>
#include "Val.h"
#include "File.h"
#include "Analyzer.h"
namespace file_analysis {
/**
* An analyzer to extract files to disk.
*/
class Extract : public file_analysis::Analyzer {
public:
virtual ~Extract();
virtual bool DeliverChunk(const u_char* data, uint64 len, uint64 offset);
static file_analysis::Analyzer* Instantiate(RecordVal* args, File* file);
protected:
Extract(RecordVal* args, File* file, const string& arg_filename);
private:
string filename;
int fd;
};
} // namespace file_analysis
#endif

View file

@ -1,19 +1,19 @@
// See the file "COPYING" in the main distribution directory for copyright.
#include <string>
#include <openssl/md5.h>
#include "File.h"
#include "FileTimer.h"
#include "FileID.h"
#include "Analyzer.h"
#include "Manager.h"
#include "Reporter.h"
#include "Val.h"
#include "Type.h"
#include "../Analyzer.h"
#include "Event.h"
#include "analyzer/Analyzer.h"
#include "analyzer/Manager.h"
using namespace file_analysis;
static Val* empty_connection_table()
@ -51,8 +51,6 @@ int File::bof_buffer_size_idx = -1;
int File::bof_buffer_idx = -1;
int File::mime_type_idx = -1;
string File::salt;
void File::StaticInit()
{
if ( id_idx != -1 )
@ -72,42 +70,27 @@ void File::StaticInit()
bof_buffer_size_idx = Idx("bof_buffer_size");
bof_buffer_idx = Idx("bof_buffer");
mime_type_idx = Idx("mime_type");
salt = BifConst::FileAnalysis::salt->CheckString();
}
File::File(const string& unique, Connection* conn, AnalyzerTag::Tag tag,
File::File(const string& file_id, Connection* conn, analyzer::Tag tag,
bool is_orig)
: id(""), unique(unique), val(0), postpone_timeout(false),
first_chunk(true), missed_bof(false), need_reassembly(false), done(false),
analyzers(this)
: id(file_id), val(0), postpone_timeout(false), first_chunk(true),
missed_bof(false), need_reassembly(false), done(false), analyzers(this)
{
StaticInit();
char tmp[20];
uint64 hash[2];
string msg(unique + salt);
MD5(reinterpret_cast<const u_char*>(msg.data()), msg.size(),
reinterpret_cast<u_char*>(hash));
uitoa_n(hash[0], tmp, sizeof(tmp), 62);
DBG_LOG(DBG_FILE_ANALYSIS, "Creating new File object %s (%s)", tmp,
unique.c_str());
DBG_LOG(DBG_FILE_ANALYSIS, "Creating new File object %s", file_id.c_str());
val = new RecordVal(fa_file_type);
val->Assign(id_idx, new StringVal(tmp));
id = FileID(tmp);
val->Assign(id_idx, new StringVal(file_id.c_str()));
if ( conn )
{
// add source, connection, is_orig fields
val->Assign(source_idx, new StringVal(::Analyzer::GetTagName(tag)));
SetSource(analyzer_mgr->GetAnalyzerName(tag));
val->Assign(is_orig_idx, new Val(is_orig, TYPE_BOOL));
UpdateConnectionFields(conn);
}
else
// use the unique file handle as source
val->Assign(source_idx, new StringVal(unique.c_str()));
UpdateLastActivityTime();
}
@ -187,6 +170,18 @@ int File::Idx(const string& field)
return rval;
}
string File::GetSource() const
{
Val* v = val->Lookup(source_idx);
return v ? v->AsString()->CheckString() : string();
}
void File::SetSource(const string& source)
{
val->Assign(source_idx, new StringVal(source.c_str()));
}
double File::GetTimeoutInterval() const
{
return LookupFieldDefaultInterval(timeout_interval_idx);
@ -423,7 +418,7 @@ void File::Gap(uint64 offset, uint64 len)
bool File::FileEventAvailable(EventHandlerPtr h)
{
return h && ! file_mgr->IsIgnored(unique);
return h && ! file_mgr->IsIgnored(id);
}
void File::FileEvent(EventHandlerPtr h)

View file

@ -6,11 +6,9 @@
#include <string>
#include <vector>
#include "AnalyzerTags.h"
#include "Conn.h"
#include "Val.h"
#include "AnalyzerSet.h"
#include "FileID.h"
#include "BroString.h"
namespace file_analysis {
@ -20,13 +18,30 @@ namespace file_analysis {
*/
class File {
public:
/**
* Destructor. Nothing fancy, releases a reference to the wrapped
* \c fa_file value.
*/
~File();
/**
* @return the #val record.
* @return the wrapped \c fa_file record value, #val.
*/
RecordVal* GetVal() const { return val; }
/**
* @return the value of the "source" field from #val record or an empty
* string if it's not initialized.
*/
string GetSource() const;
/**
* Set the "source" field from #val record to \a source.
* @param source the new value of the "source" field.
*/
void SetSource(const string& source);
/**
* @return value (seconds) of the "timeout_interval" field from #val record.
*/
@ -34,18 +49,14 @@ public:
/**
* Set the "timeout_interval" field from #val record to \a interval seconds.
* @param interval the new value of the "timeout_interval" field.
*/
void SetTimeoutInterval(double interval);
/**
* @return value of the "id" field from #val record.
*/
FileID GetID() const { return id; }
/**
* @return the string which uniquely identifies the file.
*/
string GetUnique() const { return unique; }
string GetID() const { return id; }
/**
* @return value of "last_active" field in #val record;
@ -59,13 +70,15 @@ public:
/**
* Set "total_bytes" field of #val record to \a size.
* @param size the new value of the "total_bytes" field.
*/
void SetTotalBytes(uint64 size);
/**
* Compares "seen_bytes" field to "total_bytes" field of #val record
* and returns true if the comparison indicates the full file was seen.
* If "total_bytes" hasn't been set yet, it returns false.
* Compares "seen_bytes" field to "total_bytes" field of #val record to
* determine if the full file has been seen.
* @return false if "total_bytes" hasn't been set yet or "seen_bytes" is
* less than it, else true.
*/
bool IsComplete() const;
@ -79,23 +92,30 @@ public:
/**
* Queues attaching an analyzer. Only one analyzer per type can be attached
* at a time unless the arguments differ.
* @param args an \c AnalyzerArgs value representing a file analyzer.
* @return false if analyzer can't be instantiated, else true.
*/
bool AddAnalyzer(RecordVal* args);
/**
* Queues removal of an analyzer.
* @param args an \c AnalyzerArgs value representing a file analyzer.
* @return true if analyzer was active at time of call, else false.
*/
bool RemoveAnalyzer(const RecordVal* args);
/**
* Pass in non-sequential data and deliver to attached analyzers.
* @param data pointer to start of a chunk of file data.
* @param len number of bytes in the data chunk.
* @param offset number of bytes from start of file at which chunk occurs.
*/
void DataIn(const u_char* data, uint64 len, uint64 offset);
/**
* Pass in sequential data and deliver to attached analyzers.
* @param data pointer to start of a chunk of file data.
* @param len number of bytes in the data chunk.
*/
void DataIn(const u_char* data, uint64 len);
@ -106,10 +126,13 @@ public:
/**
* Inform attached analyzers about a gap in file stream.
* @param offset number of bytes in to file at which missing chunk starts.
* @param len length in bytes of the missing chunk of file data.
*/
void Gap(uint64 offset, uint64 len);
/**
* @param h pointer to an event handler.
* @return true if event has a handler and the file isn't ignored.
*/
bool FileEventAvailable(EventHandlerPtr h);
@ -117,11 +140,14 @@ public:
/**
* Raises an event related to the file's life-cycle, the only parameter
* to that event is the \c fa_file record..
* @param h pointer to an event handler.
*/
void FileEvent(EventHandlerPtr h);
/**
* Raises an event related to the file's life-cycle.
* @param h pointer to an event handler.
* @param vl list of argument values to pass to event call.
*/
void FileEvent(EventHandlerPtr h, val_list* vl);
@ -130,35 +156,51 @@ protected:
/**
* Constructor; only file_analysis::Manager should be creating these.
* @param file_id an identifier string for the file in pretty hash form
* (similar to connection uids).
* @param conn a network connection over which the file is transferred.
* @param tag the network protocol over which the file is transferred.
* @param is_orig true if the file is being transferred from the originator
* of the connection to the responder. False indicates the other
* direction.
*/
File(const string& unique, Connection* conn = 0,
AnalyzerTag::Tag tag = AnalyzerTag::Error, bool is_orig = false);
File(const string& file_id, Connection* conn = 0,
analyzer::Tag tag = analyzer::Tag::Error, bool is_orig = false);
/**
* Updates the "conn_ids" and "conn_uids" fields in #val record with the
* \c conn_id and UID taken from \a conn.
* @param conn the connection over which a part of the file has been seen.
*/
void UpdateConnectionFields(Connection* conn);
/**
* Increment a byte count field of #val record by \a size.
* @param size number of bytes by which to increment.
* @param field_idx the index of the field in \c fa_file to increment.
*/
void IncrementByteCount(uint64 size, int field_idx);
/**
* Wrapper to RecordVal::LookupWithDefault for the field in #val at index
* \a idx which automatically unrefs the Val and returns a converted value.
* @param idx the index of a field of type "count" in \c fa_file.
* @return the value of the field, which may be it &default.
*/
uint64 LookupFieldDefaultCount(int idx) const;
/**
* Wrapper to RecordVal::LookupWithDefault for the field in #val at index
* \a idx which automatically unrefs the Val and returns a converted value.
* @param idx the index of a field of type "interval" in \c fa_file.
* @return the value of the field, which may be it &default.
*/
double LookupFieldDefaultInterval(int idx) const;
/**
* Buffers incoming data at the beginning of a file.
* @param data pointer to a data chunk to buffer.
* @param len number of bytes in the data chunk.
* @return true if buffering is still required, else false
*/
bool BufferBOF(const u_char* data, uint64 len);
@ -171,11 +213,15 @@ protected:
/**
* Does mime type detection and assigns type (if available) to \c mime_type
* field in #val.
* @param data pointer to a chunk of file data.
* @param len number of bytes in the data chunk.
* @return whether mime type was available.
*/
bool DetectMIME(const u_char* data, uint64 len);
/**
* Lookup a record field index/offset by name.
* @param field_name the name of the \c fa_file record field.
* @return the field offset in #val record corresponding to \a field_name.
*/
static int Idx(const string& field_name);
@ -186,15 +232,14 @@ protected:
static void StaticInit();
private:
FileID id; /**< A pretty hash that likely identifies file */
string unique; /**< A string that uniquely identifies file */
string id; /**< A pretty hash that likely identifies file */
RecordVal* val; /**< \c fa_file from script layer. */
bool postpone_timeout; /**< Whether postponing timeout is requested. */
bool first_chunk; /**< Track first non-linear chunk. */
bool missed_bof; /**< Flags that we missed start of file. */
bool need_reassembly; /**< Whether file stream reassembly is needed. */
bool done; /**< If this object is about to be deleted. */
AnalyzerSet analyzers;
AnalyzerSet analyzers; /**< A set of attached file analyzer. */
struct BOF_Buffer {
BOF_Buffer() : full(false), replayed(false), size(0) {}
@ -207,8 +252,6 @@ private:
BroString::CVec chunks;
} bof_buffer; /**< Beginning of file buffer. */
static string salt;
static int id_idx;
static int parent_id_idx;
static int source_idx;

View file

@ -1,34 +0,0 @@
// See the file "COPYING" in the main distribution directory for copyright.
#ifndef FILE_ANALYSIS_FILEID_H
#define FILE_ANALYSIS_FILEID_H
namespace file_analysis {
/**
* A simple string wrapper class to help enforce some type safety between
* methods of FileAnalysis::Manager, some of which use a unique string to
* identify files, and others which use a pretty hash (the FileID) to identify
* files. A FileID is primarily used in methods which interface with the
* script-layer, while the unique strings are used for methods which interface
* with protocol analyzers or anything that sends data to the file analysis
* framework.
*/
struct FileID {
string id;
explicit FileID(const string arg_id) : id(arg_id) {}
FileID(const FileID& other) : id(other.id) {}
const char* c_str() const { return id.c_str(); }
bool operator==(const FileID& rhs) const { return id == rhs.id; }
bool operator<(const FileID& rhs) const { return id < rhs.id; }
FileID& operator=(const FileID& rhs) { id = rhs.id; return *this; }
FileID& operator=(const string& rhs) { id = rhs; return *this; }
};
} // namespace file_analysis
#endif

View file

@ -5,7 +5,7 @@
using namespace file_analysis;
FileTimer::FileTimer(double t, const FileID& id, double interval)
FileTimer::FileTimer(double t, const string& id, double interval)
: Timer(t + interval, TIMER_FILE_ANALYSIS_INACTIVITY), file_id(id)
{
DBG_LOG(DBG_FILE_ANALYSIS, "New %f second timeout timer for %s",

View file

@ -5,7 +5,6 @@
#include <string>
#include "Timer.h"
#include "FileID.h"
namespace file_analysis {
@ -14,16 +13,25 @@ namespace file_analysis {
*/
class FileTimer : public Timer {
public:
FileTimer(double t, const FileID& id, double interval);
/**
* Constructor, nothing interesting about it.
* @param t unix time at which the timer should start ticking.
* @param id the file identifier which will be checked for inactivity.
* @param interval amount of time after \a t to check for inactivity.
*/
FileTimer(double t, const string& id, double interval);
/**
* Check inactivity of file_analysis::File corresponding to #file_id,
* reschedule if active, else call file_analysis::Manager::Timeout.
* @param t current unix time
* @param is_expire true if all pending timers are being expired.
*/
void Dispatch(double t, int is_expire);
private:
FileID file_id;
string file_id;
};
} // namespace file_analysis

View file

@ -1,74 +0,0 @@
// See the file "COPYING" in the main distribution directory for copyright.
#ifndef FILE_ANALYSIS_HASH_H
#define FILE_ANALYSIS_HASH_H
#include <string>
#include "Val.h"
#include "OpaqueVal.h"
#include "File.h"
#include "Analyzer.h"
namespace file_analysis {
/**
* An analyzer to produce a hash of file contents.
*/
class Hash : public file_analysis::Analyzer {
public:
virtual ~Hash();
virtual bool DeliverStream(const u_char* data, uint64 len);
virtual bool EndOfFile();
virtual bool Undelivered(uint64 offset, uint64 len);
protected:
Hash(RecordVal* args, File* file, HashVal* hv, const char* kind);
void Finalize();
private:
HashVal* hash;
bool fed;
const char* kind;
};
class MD5 : public Hash {
public:
static file_analysis::Analyzer* Instantiate(RecordVal* args, File* file)
{ return file_hash ? new MD5(args, file) : 0; }
protected:
MD5(RecordVal* args, File* file)
: Hash(args, file, new MD5Val(), "md5")
{}
};
class SHA1 : public Hash {
public:
static file_analysis::Analyzer* Instantiate(RecordVal* args, File* file)
{ return file_hash ? new SHA1(args, file) : 0; }
protected:
SHA1(RecordVal* args, File* file)
: Hash(args, file, new SHA1Val(), "sha1")
{}
};
class SHA256 : public Hash {
public:
static file_analysis::Analyzer* Instantiate(RecordVal* args, File* file)
{ return file_hash ? new SHA256(args, file) : 0; }
protected:
SHA256(RecordVal* args, File* file)
: Hash(args, file, new SHA256Val(), "sha256")
{}
};
} // namespace file_analysis
#endif

View file

@ -2,6 +2,7 @@
#include <vector>
#include <string>
#include <openssl/md5.h>
#include "Manager.h"
#include "File.h"
@ -9,12 +10,18 @@
#include "Var.h"
#include "Event.h"
#include "plugin/Manager.h"
using namespace file_analysis;
TableVal* Manager::disabled = 0;
string Manager::salt;
Manager::Manager()
{
tag_enum_type = new EnumType("FileAnalysis::Tag");
::ID* id = install_ID("Tag", "FileAnalysis", true, true);
add_type(id, tag_enum_type, 0, 0);
}
Manager::~Manager()
@ -22,9 +29,43 @@ Manager::~Manager()
Terminate();
}
void Manager::InitPreScript()
{
std::list<Component*> analyzers = plugin_mgr->Components<Component>();
for ( std::list<Component*>::const_iterator i = analyzers.begin();
i != analyzers.end(); ++i )
RegisterAnalyzerComponent(*i);
}
void Manager::RegisterAnalyzerComponent(Component* component)
{
const char* cname = component->CanonicalName();
if ( tag_enum_type->Lookup("FileAnalysis", cname) != -1 )
reporter->FatalError("File Analyzer %s defined more than once", cname);
DBG_LOG(DBG_FILE_ANALYSIS, "Registering analyzer %s (tag %s)",
component->Name(), component->Tag().AsString().c_str());
analyzers_by_name.insert(std::make_pair(cname, component));
analyzers_by_tag.insert(std::make_pair(component->Tag(), component));
analyzers_by_val.insert(std::make_pair(
component->Tag().AsEnumVal()->InternalInt(), component));
string id = fmt("ANALYZER_%s", cname);
tag_enum_type->AddName("FileAnalysis", id.c_str(),
component->Tag().AsEnumVal()->InternalInt(), true);
}
void Manager::InitPostScript()
{
}
void Manager::Terminate()
{
vector<FileID> keys;
vector<string> keys;
for ( IDMap::iterator it = id_map.begin(); it != id_map.end(); ++it )
keys.push_back(it->first);
@ -32,160 +73,139 @@ void Manager::Terminate()
Timeout(keys[i], true);
}
string Manager::HashHandle(const string& handle) const
{
if ( salt.empty() )
salt = BifConst::FileAnalysis::salt->CheckString();
char tmp[20];
uint64 hash[2];
string msg(handle + salt);
MD5(reinterpret_cast<const u_char*>(msg.data()), msg.size(),
reinterpret_cast<u_char*>(hash));
uitoa_n(hash[0], tmp, sizeof(tmp), 62);
return tmp;
}
void Manager::SetHandle(const string& handle)
{
current_handle = handle;
}
void Manager::DataIn(const u_char* data, uint64 len, uint64 offset,
AnalyzerTag::Tag tag, Connection* conn, bool is_orig)
{
if ( IsDisabled(tag) )
if ( handle.empty() )
return;
current_file_id = HashHandle(handle);
}
void Manager::DataIn(const u_char* data, uint64 len, uint64 offset,
analyzer::Tag tag, Connection* conn, bool is_orig)
{
GetFileHandle(tag, conn, is_orig);
DataIn(data, len, offset, GetFile(current_handle, conn, tag, is_orig));
}
File* file = GetFile(current_file_id, conn, tag, is_orig);
void Manager::DataIn(const u_char* data, uint64 len, uint64 offset,
const string& unique)
{
DataIn(data, len, offset, GetFile(unique));
}
void Manager::DataIn(const u_char* data, uint64 len, uint64 offset,
File* file)
{
if ( ! file )
return;
file->DataIn(data, len, offset);
if ( file->IsComplete() )
RemoveFile(file->GetUnique());
RemoveFile(file->GetID());
}
void Manager::DataIn(const u_char* data, uint64 len, AnalyzerTag::Tag tag,
void Manager::DataIn(const u_char* data, uint64 len, analyzer::Tag tag,
Connection* conn, bool is_orig)
{
if ( IsDisabled(tag) )
return;
GetFileHandle(tag, conn, is_orig);
// Sequential data input shouldn't be going over multiple conns, so don't
// do the check to update connection set.
DataIn(data, len, GetFile(current_handle, conn, tag, is_orig, false));
}
File* file = GetFile(current_file_id, conn, tag, is_orig, false);
void Manager::DataIn(const u_char* data, uint64 len, const string& unique)
{
DataIn(data, len, GetFile(unique));
}
void Manager::DataIn(const u_char* data, uint64 len, File* file)
{
if ( ! file )
return;
file->DataIn(data, len);
if ( file->IsComplete() )
RemoveFile(file->GetUnique());
RemoveFile(file->GetID());
}
void Manager::EndOfFile(AnalyzerTag::Tag tag, Connection* conn)
void Manager::DataIn(const u_char* data, uint64 len, const string& file_id,
const string& source)
{
File* file = GetFile(file_id);
if ( ! file )
return;
if ( file->GetSource().empty() )
file->SetSource(source);
file->DataIn(data, len);
if ( file->IsComplete() )
RemoveFile(file->GetID());
}
void Manager::EndOfFile(analyzer::Tag tag, Connection* conn)
{
EndOfFile(tag, conn, true);
EndOfFile(tag, conn, false);
}
void Manager::EndOfFile(AnalyzerTag::Tag tag, Connection* conn, bool is_orig)
void Manager::EndOfFile(analyzer::Tag tag, Connection* conn, bool is_orig)
{
if ( IsDisabled(tag) )
return;
// Don't need to create a file if we're just going to remove it right away.
GetFileHandle(tag, conn, is_orig);
EndOfFile(current_handle);
RemoveFile(current_file_id);
}
void Manager::EndOfFile(const string& unique)
void Manager::EndOfFile(const string& file_id)
{
RemoveFile(unique);
RemoveFile(file_id);
}
void Manager::Gap(uint64 offset, uint64 len, AnalyzerTag::Tag tag,
void Manager::Gap(uint64 offset, uint64 len, analyzer::Tag tag,
Connection* conn, bool is_orig)
{
if ( IsDisabled(tag) )
return;
GetFileHandle(tag, conn, is_orig);
Gap(offset, len, GetFile(current_handle, conn, tag, is_orig));
}
File* file = GetFile(current_file_id, conn, tag, is_orig);
void Manager::Gap(uint64 offset, uint64 len, const string& unique)
{
Gap(offset, len, GetFile(unique));
}
void Manager::Gap(uint64 offset, uint64 len, File* file)
{
if ( ! file )
return;
file->Gap(offset, len);
}
void Manager::SetSize(uint64 size, AnalyzerTag::Tag tag, Connection* conn,
void Manager::SetSize(uint64 size, analyzer::Tag tag, Connection* conn,
bool is_orig)
{
if ( IsDisabled(tag) )
return;
GetFileHandle(tag, conn, is_orig);
SetSize(size, GetFile(current_handle, conn, tag, is_orig));
}
File* file = GetFile(current_file_id, conn, tag, is_orig);
void Manager::SetSize(uint64 size, const string& unique)
{
SetSize(size, GetFile(unique));
}
void Manager::SetSize(uint64 size, File* file)
{
if ( ! file )
return;
file->SetTotalBytes(size);
if ( file->IsComplete() )
RemoveFile(file->GetUnique());
RemoveFile(file->GetID());
}
bool Manager::PostponeTimeout(const FileID& file_id) const
bool Manager::SetTimeoutInterval(const string& file_id, double interval) const
{
File* file = Lookup(file_id);
if ( ! file )
return false;
file->postpone_timeout = true;
return true;
}
bool Manager::SetTimeoutInterval(const FileID& file_id, double interval) const
{
File* file = Lookup(file_id);
if ( ! file )
return false;
if ( interval > 0 )
file->postpone_timeout = true;
file->SetTimeoutInterval(interval);
return true;
}
bool Manager::AddAnalyzer(const FileID& file_id, RecordVal* args) const
bool Manager::AddAnalyzer(const string& file_id, RecordVal* args) const
{
File* file = Lookup(file_id);
@ -195,7 +215,7 @@ bool Manager::AddAnalyzer(const FileID& file_id, RecordVal* args) const
return file->AddAnalyzer(args);
}
bool Manager::RemoveAnalyzer(const FileID& file_id, const RecordVal* args) const
bool Manager::RemoveAnalyzer(const string& file_id, const RecordVal* args) const
{
File* file = Lookup(file_id);
@ -205,32 +225,23 @@ bool Manager::RemoveAnalyzer(const FileID& file_id, const RecordVal* args) const
return file->RemoveAnalyzer(args);
}
File* Manager::GetFile(const string& unique, Connection* conn,
AnalyzerTag::Tag tag, bool is_orig, bool update_conn)
File* Manager::GetFile(const string& file_id, Connection* conn,
analyzer::Tag tag, bool is_orig, bool update_conn)
{
if ( unique.empty() )
if ( file_id.empty() )
return 0;
if ( IsIgnored(unique) )
if ( IsIgnored(file_id) )
return 0;
File* rval = str_map[unique];
File* rval = id_map[file_id];
if ( ! rval )
{
rval = str_map[unique] = new File(unique, conn, tag, is_orig);
FileID id = rval->GetID();
if ( id_map[id] )
{
reporter->Error("Evicted duplicate file ID: %s", id.c_str());
RemoveFile(unique);
}
id_map[id] = rval;
rval = id_map[file_id] = new File(file_id, conn, tag, is_orig);
rval->ScheduleInactivityTimer();
if ( IsIgnored(unique) )
if ( IsIgnored(file_id) )
return 0;
}
else
@ -244,7 +255,7 @@ File* Manager::GetFile(const string& unique, Connection* conn,
return rval;
}
File* Manager::Lookup(const FileID& file_id) const
File* Manager::Lookup(const string& file_id) const
{
IDMap::const_iterator it = id_map.find(file_id);
@ -254,7 +265,7 @@ File* Manager::Lookup(const FileID& file_id) const
return it->second;
}
void Manager::Timeout(const FileID& file_id, bool is_terminating)
void Manager::Timeout(const string& file_id, bool is_terminating)
{
File* file = Lookup(file_id);
@ -277,59 +288,59 @@ void Manager::Timeout(const FileID& file_id, bool is_terminating)
DBG_LOG(DBG_FILE_ANALYSIS, "File analysis timeout for %s",
file->GetID().c_str());
RemoveFile(file->GetUnique());
RemoveFile(file->GetID());
}
bool Manager::IgnoreFile(const FileID& file_id)
bool Manager::IgnoreFile(const string& file_id)
{
if ( id_map.find(file_id) == id_map.end() )
return false;
DBG_LOG(DBG_FILE_ANALYSIS, "Ignore FileID %s", file_id.c_str());
ignored.insert(file_id);
return true;
}
bool Manager::RemoveFile(const string& file_id)
{
IDMap::iterator it = id_map.find(file_id);
if ( it == id_map.end() )
return false;
DBG_LOG(DBG_FILE_ANALYSIS, "Ignore FileID %s", file_id.c_str());
ignored.insert(it->second->GetUnique());
return true;
}
bool Manager::RemoveFile(const string& unique)
{
StrMap::iterator it = str_map.find(unique);
if ( it == str_map.end() )
return false;
DBG_LOG(DBG_FILE_ANALYSIS, "Remove FileID %s", file_id.c_str());
it->second->EndOfFile();
FileID id = it->second->GetID();
DBG_LOG(DBG_FILE_ANALYSIS, "Remove FileID %s", id.c_str());
if ( ! id_map.erase(id) )
reporter->Error("No mapping for fileID %s", id.c_str());
ignored.erase(unique);
delete it->second;
str_map.erase(unique);
id_map.erase(file_id);
ignored.erase(file_id);
return true;
}
bool Manager::IsIgnored(const string& unique)
bool Manager::IsIgnored(const string& file_id)
{
return ignored.find(unique) != ignored.end();
return ignored.find(file_id) != ignored.end();
}
void Manager::GetFileHandle(AnalyzerTag::Tag tag, Connection* c, bool is_orig)
void Manager::GetFileHandle(analyzer::Tag tag, Connection* c, bool is_orig)
{
current_handle.clear();
current_file_id.clear();
if ( IsDisabled(tag) )
return;
if ( ! get_file_handle )
return;
EnumVal* tagval = tag.AsEnumVal();
Ref(tagval);
val_list* vl = new val_list();
vl->append(new Val(tag, TYPE_COUNT));
vl->append(tagval);
vl->append(c->BuildConnVal());
vl->append(new Val(is_orig, TYPE_BOOL));
@ -337,7 +348,7 @@ void Manager::GetFileHandle(AnalyzerTag::Tag tag, Connection* c, bool is_orig)
mgr.Drain(); // need file handle immediately so we don't have to buffer data
}
bool Manager::IsDisabled(AnalyzerTag::Tag tag)
bool Manager::IsDisabled(analyzer::Tag tag)
{
if ( ! disabled )
disabled = internal_const_val("FileAnalysis::disable")->AsTableVal();
@ -354,3 +365,31 @@ bool Manager::IsDisabled(AnalyzerTag::Tag tag)
return rval;
}
Analyzer* Manager::InstantiateAnalyzer(int tag, RecordVal* args, File* f) const
{
analyzer_map_by_val::const_iterator it = analyzers_by_val.find(tag);
if ( it == analyzers_by_val.end() )
reporter->InternalError("cannot instantiate unknown file analyzer: %d",
tag);
Component* c = it->second;
if ( ! c->Factory() )
reporter->InternalError("file analyzer %s cannot be instantiated "
"dynamically", c->CanonicalName());
return c->Factory()(args, f);
}
const char* Manager::GetAnalyzerName(int tag) const
{
analyzer_map_by_val::const_iterator it = analyzers_by_val.find(tag);
if ( it == analyzers_by_val.end() )
reporter->InternalError("cannot get name of unknown file analyzer: %d",
tag);
return it->second->CanonicalName();
}

View file

@ -9,7 +9,6 @@
#include <queue>
#include "Net.h"
#include "AnalyzerTags.h"
#include "Conn.h"
#include "Val.h"
#include "Analyzer.h"
@ -18,7 +17,11 @@
#include "File.h"
#include "FileTimer.h"
#include "FileID.h"
#include "Component.h"
#include "analyzer/Tag.h"
#include "file_analysis/file_analysis.bif.h"
namespace file_analysis {
@ -27,152 +30,280 @@ namespace file_analysis {
*/
class Manager {
public:
/**
* Constructor.
*/
Manager();
/**
* Destructor. Times out any currently active file analyses.
*/
~Manager();
/**
* First-stage initializion of the manager. This is called early on
* during Bro's initialization, before any scripts are processed.
*/
void InitPreScript();
/**
* Second-stage initialization of the manager. This is called late
* during Bro's initialization after any scripts are processed.
*/
void InitPostScript();
/**
* Times out any active file analysis to prepare for shutdown.
*/
void Terminate();
/**
* Take in a unique file handle string to identifiy incoming file data.
* Creates a file identifier from a unique file handle string.
* @param handle a unique string which identifies a single file.
* @return a prettified MD5 hash of \a handle, truncated to 64-bits.
*/
string HashHandle(const string& handle) const;
/**
* Take in a unique file handle string to identify next piece of
* incoming file data/information.
* @param handle a unique string which identifies a single file.
*/
void SetHandle(const string& handle);
/**
* Pass in non-sequential file data.
* @param data pointer to start of a chunk of file data.
* @param len number of bytes in the data chunk.
* @param offset number of bytes from start of file that data chunk occurs.
* @param tag network protocol over which the file data is transferred.
* @param conn network connection over which the file data is transferred.
* @param is_orig true if the file is being sent from connection originator
* or false if is being sent in the opposite direction.
*/
void DataIn(const u_char* data, uint64 len, uint64 offset,
AnalyzerTag::Tag tag, Connection* conn, bool is_orig);
void DataIn(const u_char* data, uint64 len, uint64 offset,
const string& unique);
void DataIn(const u_char* data, uint64 len, uint64 offset,
File* file);
analyzer::Tag tag, Connection* conn, bool is_orig);
/**
* Pass in sequential file data.
* @param data pointer to start of a chunk of file data.
* @param len number of bytes in the data chunk.
* @param tag network protocol over which the file data is transferred.
* @param conn network connection over which the file data is transferred.
* @param is_orig true if the file is being sent from connection originator
* or false if is being sent in the opposite direction.
*/
void DataIn(const u_char* data, uint64 len, AnalyzerTag::Tag tag,
void DataIn(const u_char* data, uint64 len, analyzer::Tag tag,
Connection* conn, bool is_orig);
void DataIn(const u_char* data, uint64 len, const string& unique);
void DataIn(const u_char* data, uint64 len, File* file);
/**
* Signal the end of file data.
* Pass in sequential file data from external source (e.g. input framework).
* @param data pointer to start of a chunk of file data.
* @param len number of bytes in the data chunk.
* @param file_id an identifier for the file (usually a hash of \a source).
* @param source uniquely identifies the file and should also describe
* in human-readable form where the file input is coming from (e.g.
* a local file path).
*/
void EndOfFile(AnalyzerTag::Tag tag, Connection* conn);
void EndOfFile(AnalyzerTag::Tag tag, Connection* conn, bool is_orig);
void EndOfFile(const string& unique);
void DataIn(const u_char* data, uint64 len, const string& file_id,
const string& source);
/**
* Signal the end of file data regardless of which direction it is being
* sent over the connection.
* @param tag network protocol over which the file data is transferred.
* @param conn network connection over which the file data is transferred.
*/
void EndOfFile(analyzer::Tag tag, Connection* conn);
/**
* Signal the end of file data being transferred over a connection in
* a particular direction.
* @param tag network protocol over which the file data is transferred.
* @param conn network connection over which the file data is transferred.
*/
void EndOfFile(analyzer::Tag tag, Connection* conn, bool is_orig);
/**
* Signal the end of file data being transferred using the file identifier.
* @param file_id the file identifier/hash.
*/
void EndOfFile(const string& file_id);
/**
* Signal a gap in the file data stream.
* @param offset number of bytes in to file at which missing chunk starts.
* @param len length in bytes of the missing chunk of file data.
* @param tag network protocol over which the file data is transferred.
* @param conn network connection over which the file data is transferred.
* @param is_orig true if the file is being sent from connection originator
* or false if is being sent in the opposite direction.
*/
void Gap(uint64 offset, uint64 len, AnalyzerTag::Tag tag, Connection* conn,
void Gap(uint64 offset, uint64 len, analyzer::Tag tag, Connection* conn,
bool is_orig);
void Gap(uint64 offset, uint64 len, const string& unique);
void Gap(uint64 offset, uint64 len, File* file);
/**
* Provide the expected number of bytes that comprise a file.
* @param size the number of bytes in the full file.
* @param tag network protocol over which the file data is transferred.
* @param conn network connection over which the file data is transferred.
* @param is_orig true if the file is being sent from connection originator
* or false if is being sent in the opposite direction.
*/
void SetSize(uint64 size, AnalyzerTag::Tag tag, Connection* conn,
void SetSize(uint64 size, analyzer::Tag tag, Connection* conn,
bool is_orig);
void SetSize(uint64 size, const string& unique);
void SetSize(uint64 size, File* file);
/**
* Starts ignoring a file, which will finally be removed from internal
* mappings on EOF or TIMEOUT.
* @param file_id the file identifier/hash.
* @return false if file identifier did not map to anything, else true.
*/
bool IgnoreFile(const FileID& file_id);
/**
* If called during a \c file_timeout event handler, requests deferral of
* analysis timeout.
*/
bool PostponeTimeout(const FileID& file_id) const;
bool IgnoreFile(const string& file_id);
/**
* Set's an inactivity threshold for the file.
* @param file_id the file identifier/hash.
* @param interval the amount of time in which no activity is seen for
* the file identified by \a file_id that will cause the file
* to be considered stale, timed out, and then resource reclaimed.
* @return false if file identifier did not map to anything, else true.
*/
bool SetTimeoutInterval(const FileID& file_id, double interval) const;
bool SetTimeoutInterval(const string& file_id, double interval) const;
/**
* Queue attachment of an analzer to the file identifier. Multiple
* analyzers of a given type can be attached per file identifier at a time
* as long as the arguments differ.
* @param file_id the file identifier/hash.
* @param args a \c AnalyzerArgs value which describes a file analyzer.
* @return false if the analyzer failed to be instantiated, else true.
*/
bool AddAnalyzer(const FileID& file_id, RecordVal* args) const;
bool AddAnalyzer(const string& file_id, RecordVal* args) const;
/**
* Queue removal of an analyzer for a given file identifier.
* @param file_id the file identifier/hash.
* @param args a \c AnalyzerArgs value which describes a file analyzer.
* @return true if the analyzer is active at the time of call, else false.
*/
bool RemoveAnalyzer(const FileID& file_id, const RecordVal* args) const;
bool RemoveAnalyzer(const string& file_id, const RecordVal* args) const;
/**
* @return whether the file mapped to \a unique is being ignored.
* Tells whether analysis for a file is active or ignored.
* @param file_id the file identifier/hash.
* @return whether the file mapped to \a file_id is being ignored.
*/
bool IsIgnored(const string& unique);
bool IsIgnored(const string& file_id);
/**
* Instantiates a new file analyzer instance for the file.
* @param tag The file analyzer's tag.
* @param args The file analzer argument/option values.
* @param f The file analzer is to be associated with.
* @return The new analyzer instance or null if tag is invalid.
*/
Analyzer* InstantiateAnalyzer(int tag, RecordVal* args, File* f) const;
/**
* Translates a script-level file analyzer tag in to corresponding file
* analyzer name.
* @param tag The enum val of a file analyzer.
* @return The human-readable name of the file analyzer.
*/
const char* GetAnalyzerName(int tag) const;
protected:
friend class FileTimer;
typedef map<string, File*> StrMap;
typedef set<string> StrSet;
typedef map<FileID, File*> IDMap;
typedef set<string> IDSet;
typedef map<string, File*> IDMap;
/**
* @return the File object mapped to \a unique or a null pointer if analysis
* is being ignored for the associated file. An File object may be
* created if a mapping doesn't exist, and if it did exist, the
* activity time is refreshed along with any connection-related
* fields.
* Create a new file to be analyzed or retrieve an existing one.
* @param file_id the file identifier/hash.
* @param conn network connection, if any, over which the file is
* transferred.
* @param tag network protocol, if any, over which the file is transferred.
* @param is_orig true if the file is being sent from connection originator
* or false if is being sent in the opposite direction (or if it
* this file isn't related to a connection).
* @param update_conn whether we need to update connection-related field
* in the \c fa_file record value associated with the file.
* @return the File object mapped to \a file_id or a null pointer if
* analysis is being ignored for the associated file. An File
* object may be created if a mapping doesn't exist, and if it did
* exist, the activity time is refreshed along with any
* connection-related fields.
*/
File* GetFile(const string& unique, Connection* conn = 0,
AnalyzerTag::Tag tag = AnalyzerTag::Error,
File* GetFile(const string& file_id, Connection* conn = 0,
analyzer::Tag tag = analyzer::Tag::Error,
bool is_orig = false, bool update_conn = true);
/**
* Try to retrieve a file that's being analyzed, using its identifier/hash.
* @param file_id the file identifier/hash.
* @return the File object mapped to \a file_id, or a null pointer if no
* mapping exists.
*/
File* Lookup(const FileID& file_id) const;
File* Lookup(const string& file_id) const;
/**
* Evaluate timeout policy for a file and remove the File object mapped to
* \a file_id if needed.
* @param file_id the file identifier/hash.
* @param is_termination whether the Manager (and probably Bro) is in a
* terminating state. If true, then the timeout cannot be postponed.
*/
void Timeout(const FileID& file_id, bool is_terminating = ::terminating);
void Timeout(const string& file_id, bool is_terminating = ::terminating);
/**
* Immediately remove file_analysis::File object associated with \a unique.
* @return false if file string did not map to anything, else true.
* Immediately remove file_analysis::File object associated with \a file_id.
* @param file_id the file identifier/hash.
* @return false if file id string did not map to anything, else true.
*/
bool RemoveFile(const string& unique);
bool RemoveFile(const string& file_id);
/**
* Sets #current_handle to a unique file handle string based on what the
* \c get_file_handle event derives from the connection params. The
* event queue is flushed so that we can get the handle value immediately.
* Sets #current_file_id to a hash of a unique file handle string based on
* what the \c get_file_handle event derives from the connection params.
* Event queue is flushed so that we can get the handle value immediately.
* @param tag network protocol over which the file is transferred.
* @param conn network connection over which the file is transferred.
* @param is_orig true if the file is being sent from connection originator
* or false if is being sent in the opposite direction.
*/
void GetFileHandle(AnalyzerTag::Tag tag, Connection* c, bool is_orig);
void GetFileHandle(analyzer::Tag tag, Connection* c, bool is_orig);
/**
* @return whether file analysis is disabled for the given analyzer.
* Check if analysis is available for files transferred over a given
* network protocol.
* @param tag the network protocol over which files can be transferred and
* analyzed by the file analysis framework.
* @return whether file analysis is disabled for the analyzer given by
* \a tag.
*/
static bool IsDisabled(AnalyzerTag::Tag tag);
static bool IsDisabled(analyzer::Tag tag);
private:
StrMap str_map; /**< Map unique string to file_analysis::File. */
typedef map<string, Component*> analyzer_map_by_name;
typedef map<analyzer::Tag, Component*> analyzer_map_by_tag;
typedef map<int, Component*> analyzer_map_by_val;
void RegisterAnalyzerComponent(Component* component);
IDMap id_map; /**< Map file ID to file_analysis::File records. */
StrSet ignored; /**< Ignored files. Will be finally removed on EOF. */
string current_handle; /**< Last file handle set by get_file_handle event.*/
IDSet ignored; /**< Ignored files. Will be finally removed on EOF. */
string current_file_id; /**< Hash of what get_file_handle event sets. */
EnumType* tag_enum_type; /**< File analyzer tag type. */
analyzer_map_by_name analyzers_by_name;
analyzer_map_by_tag analyzers_by_tag;
analyzer_map_by_val analyzers_by_val;
static TableVal* disabled; /**< Table of disabled analyzers. */
static string salt; /**< A salt added to file handles before hashing. */
};
} // namespace file_analysis

View file

@ -0,0 +1,3 @@
add_subdirectory(data_event)
add_subdirectory(extract)
add_subdirectory(hash)

View file

@ -0,0 +1,8 @@
include(BroPlugin)
include_directories(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}
${CMAKE_CURRENT_BINARY_DIR})
bro_plugin_begin(Bro FileDataEvent)
bro_plugin_cc(DataEvent.cc Plugin.cc)
bro_plugin_end()

View file

@ -0,0 +1,69 @@
// See the file "COPYING" in the main distribution directory for copyright.
#ifndef FILE_ANALYSIS_DATAEVENT_H
#define FILE_ANALYSIS_DATAEVENT_H
#include <string>
#include "Val.h"
#include "File.h"
#include "Analyzer.h"
namespace file_analysis {
/**
* An analyzer to send file data to script-layer via events.
*/
class DataEvent : public file_analysis::Analyzer {
public:
/**
* Generates the event, if any, specified by the "chunk_event" field of this
* analyzer's \c AnalyzerArgs. This is for non-sequential file data input.
* @param data pointer to start of file data chunk.
* @param len number of bytes in the data chunk.
* @param offset number of bytes from start of file at which chunk occurs.
* @return always true
*/
virtual bool DeliverChunk(const u_char* data, uint64 len, uint64 offset);
/**
* Generates the event, if any, specified by the "stream_event" field of
* this analyzer's \c AnalyzerArgs. This is for sequential file data input.
* @param data pointer to start of file data chunk.
* @param len number of bytes in the data chunk.
* @return always true
*/
virtual bool DeliverStream(const u_char* data, uint64 len);
/**
* Create a new instance of a DataEvent analyzer.
* @param args the \c AnalyzerArgs value which represents the analyzer.
* @param file the file to which the analyzer will be attached.
* @return the new DataEvent analyzer instance or a null pointer if
* no "chunk_event" or "stream_event" field was specfied in \a args.
*/
static file_analysis::Analyzer* Instantiate(RecordVal* args, File* file);
protected:
/**
* Constructor.
* @param args the \c AnalyzerArgs value which represents the analyzer.
* @param file the file to which the analyzer will be attached.
* @param ce pointer to event handler which will be called to receive
* non-sequential file data.
* @param se pointer to event handler which will be called to receive
* sequential file data.
*/
DataEvent(RecordVal* args, File* file,
EventHandlerPtr ce, EventHandlerPtr se);
private:
EventHandlerPtr chunk_event;
EventHandlerPtr stream_event;
};
} // namespace file_analysis
#endif

View file

@ -0,0 +1,26 @@
#include "plugin/Plugin.h"
#include "file_analysis/Component.h"
#include "DataEvent.h"
namespace plugin { namespace Bro_FileDataEvent {
class Plugin : public plugin::Plugin {
protected:
void InitPreScript()
{
SetName("Bro::FileDataEvent");
SetVersion(-1);
SetAPIVersion(BRO_PLUGIN_API_VERSION);
SetDynamicPlugin(false);
SetDescription("Delivers file content via events");
AddComponent(new ::file_analysis::Component("DATA_EVENT",
::file_analysis::DataEvent::Instantiate));
}
};
Plugin __plugin;
} }

View file

@ -0,0 +1,8 @@
include(BroPlugin)
include_directories(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}
${CMAKE_CURRENT_BINARY_DIR})
bro_plugin_begin(Bro FileExtract)
bro_plugin_cc(Extract.cc Plugin.cc)
bro_plugin_end()

View file

@ -0,0 +1,62 @@
// See the file "COPYING" in the main distribution directory for copyright.
#ifndef FILE_ANALYSIS_EXTRACT_H
#define FILE_ANALYSIS_EXTRACT_H
#include <string>
#include "Val.h"
#include "File.h"
#include "Analyzer.h"
namespace file_analysis {
/**
* An analyzer to extract content of files to local disk.
*/
class Extract : public file_analysis::Analyzer {
public:
/**
* Destructor. Will close the file that was used for data extraction.
*/
virtual ~Extract();
/**
* Write a chunk of file data to the local extraction file.
* @param data pointer to a chunk of file data.
* @param len number of bytes in the data chunk.
* @param offset number of bytes from start of file at which chunk starts.
* @return false if there was no extraction file open and the data couldn't
* be written, else true.
*/
virtual bool DeliverChunk(const u_char* data, uint64 len, uint64 offset);
/**
* Create a new instance of an Extract analyzer.
* @param args the \c AnalyzerArgs value which represents the analyzer.
* @param file the file to which the analyzer will be attached.
* @return the new Extract analyzer instance or a null pointer if the
* the "extraction_file" field of \a args wasn't set.
*/
static file_analysis::Analyzer* Instantiate(RecordVal* args, File* file);
protected:
/**
* Constructor.
* @param args the \c AnalyzerArgs value which represents the analyzer.
* @param file the file to which the analyzer will be attached.
* @param arg_filename a file system path which specifies the local file
* to which the contents of the file will be extracted/written.
*/
Extract(RecordVal* args, File* file, const string& arg_filename);
private:
string filename;
int fd;
};
} // namespace file_analysis
#endif

View file

@ -0,0 +1,26 @@
#include "plugin/Plugin.h"
#include "file_analysis/Component.h"
#include "Extract.h"
namespace plugin { namespace Bro_FileExtract {
class Plugin : public plugin::Plugin {
protected:
void InitPreScript()
{
SetName("Bro::FileExtract");
SetVersion(-1);
SetAPIVersion(BRO_PLUGIN_API_VERSION);
SetDynamicPlugin(false);
SetDescription("Extract file content to local file system");
AddComponent(new ::file_analysis::Component("EXTRACT",
::file_analysis::Extract::Instantiate));
}
};
Plugin __plugin;
} }

View file

@ -0,0 +1,9 @@
include(BroPlugin)
include_directories(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}
${CMAKE_CURRENT_BINARY_DIR})
bro_plugin_begin(Bro FileHash)
bro_plugin_cc(Hash.cc Plugin.cc)
bro_plugin_bif(events.bif)
bro_plugin_end()

View file

@ -0,0 +1,160 @@
// See the file "COPYING" in the main distribution directory for copyright.
#ifndef FILE_ANALYSIS_HASH_H
#define FILE_ANALYSIS_HASH_H
#include <string>
#include "Val.h"
#include "OpaqueVal.h"
#include "File.h"
#include "Analyzer.h"
#include "events.bif.h"
namespace file_analysis {
/**
* An analyzer to produce a hash of file contents.
*/
class Hash : public file_analysis::Analyzer {
public:
/**
* Destructor.
*/
virtual ~Hash();
/**
* Incrementally hash next chunk of file contents.
* @param data pointer to start of a chunk of a file data.
* @param len number of bytes in the data chunk.
* @return false if the digest is in an invalid state, else true.
*/
virtual bool DeliverStream(const u_char* data, uint64 len);
/**
* Finalizes the hash and raises a "file_hash" event.
* @return always false so analyze will be deteched from file.
*/
virtual bool EndOfFile();
/**
* Missing data can't be handled, so just indicate the this analyzer should
* be removed from receiving further data. The hash will not be finalized.
* @param offset byte offset in file at which missing chunk starts.
* @param len number of missing bytes.
* @return always false so analyzer will detach from file.
*/
virtual bool Undelivered(uint64 offset, uint64 len);
protected:
/**
* Constructor.
* @param args the \c AnalyzerArgs value which represents the analyzer.
* @param file the file to which the analyzer will be attached.
* @param hv specific hash calculator object.
* @param kind human readable name of the hash algorithm to use.
*/
Hash(RecordVal* args, File* file, HashVal* hv, const char* kind);
/**
* If some file contents have been seen, finalizes the hash of them and
* raises the "file_hash" event with the results.
*/
void Finalize();
private:
HashVal* hash;
bool fed;
const char* kind;
};
/**
* An analyzer to produce an MD5 hash of file contents.
*/
class MD5 : public Hash {
public:
/**
* Create a new instance of the MD5 hashing file analyzer.
* @param args the \c AnalyzerArgs value which represents the analyzer.
* @param file the file to which the analyzer will be attached.
* @return the new MD5 analyzer instance or a null pointer if there's no
* handler for the "file_hash" event.
*/
static file_analysis::Analyzer* Instantiate(RecordVal* args, File* file)
{ return file_hash ? new MD5(args, file) : 0; }
protected:
/**
* Constructor.
* @param args the \c AnalyzerArgs value which represents the analyzer.
* @param file the file to which the analyzer will be attached.
*/
MD5(RecordVal* args, File* file)
: Hash(args, file, new MD5Val(), "md5")
{}
};
/**
* An analyzer to produce a SHA1 hash of file contents.
*/
class SHA1 : public Hash {
public:
/**
* Create a new instance of the SHA1 hashing file analyzer.
* @param args the \c AnalyzerArgs value which represents the analyzer.
* @param file the file to which the analyzer will be attached.
* @return the new MD5 analyzer instance or a null pointer if there's no
* handler for the "file_hash" event.
*/
static file_analysis::Analyzer* Instantiate(RecordVal* args, File* file)
{ return file_hash ? new SHA1(args, file) : 0; }
protected:
/**
* Constructor.
* @param args the \c AnalyzerArgs value which represents the analyzer.
* @param file the file to which the analyzer will be attached.
*/
SHA1(RecordVal* args, File* file)
: Hash(args, file, new SHA1Val(), "sha1")
{}
};
/**
* An analyzer to produce a SHA256 hash of file contents.
*/
class SHA256 : public Hash {
public:
/**
* Create a new instance of the SHA256 hashing file analyzer.
* @param args the \c AnalyzerArgs value which represents the analyzer.
* @param file the file to which the analyzer will be attached.
* @return the new MD5 analyzer instance or a null pointer if there's no
* handler for the "file_hash" event.
*/
static file_analysis::Analyzer* Instantiate(RecordVal* args, File* file)
{ return file_hash ? new SHA256(args, file) : 0; }
protected:
/**
* Constructor.
* @param args the \c AnalyzerArgs value which represents the analyzer.
* @param file the file to which the analyzer will be attached.
*/
SHA256(RecordVal* args, File* file)
: Hash(args, file, new SHA256Val(), "sha256")
{}
};
} // namespace file_analysis
#endif

View file

@ -0,0 +1,33 @@
#include "plugin/Plugin.h"
#include "file_analysis/Component.h"
#include "Hash.h"
namespace plugin { namespace Bro_FileHash {
class Plugin : public plugin::Plugin {
protected:
void InitPreScript()
{
SetName("Bro::FileHash");
SetVersion(-1);
SetAPIVersion(BRO_PLUGIN_API_VERSION);
SetDynamicPlugin(false);
SetDescription("Hash file content");
AddComponent(new ::file_analysis::Component("MD5",
::file_analysis::MD5::Instantiate));
AddComponent(new ::file_analysis::Component("SHA1",
::file_analysis::SHA1::Instantiate));
AddComponent(new ::file_analysis::Component("SHA256",
::file_analysis::SHA256::Instantiate));
extern std::list<std::pair<const char*, int> > __bif_events_init();
AddBifInitFunction(&__bif_events_init);
}
};
Plugin __plugin;
} }

View file

@ -0,0 +1,12 @@
## This event is generated each time file analysis generates a digest of the
## file contents.
##
## f: The file.
##
## kind: The type of digest algorithm.
##
## hash: The result of the hashing.
##
## .. bro:see:: FileAnalysis::add_analyzer FileAnalysis::ANALYZER_MD5
## FileAnalysis::ANALYZER_SHA1 FileAnalysis::ANALYZER_SHA256
event file_hash%(f: fa_file, kind: string, hash: string%);

View file

@ -0,0 +1,61 @@
##! Internal functions and types used by the file analysis framework.
module FileAnalysis;
%%{
#include "file_analysis/Manager.h"
%%}
type AnalyzerArgs: record;
## :bro:see:`FileAnalysis::set_timeout_interval`.
function FileAnalysis::__set_timeout_interval%(file_id: string, t: interval%): bool
%{
bool result = file_mgr->SetTimeoutInterval(file_id->CheckString(), t);
return new Val(result, TYPE_BOOL);
%}
## :bro:see:`FileAnalysis::add_analyzer`.
function FileAnalysis::__add_analyzer%(file_id: string, args: any%): bool
%{
using BifType::Record::FileAnalysis::AnalyzerArgs;
RecordVal* rv = args->AsRecordVal()->CoerceTo(AnalyzerArgs);
bool result = file_mgr->AddAnalyzer(file_id->CheckString(), rv);
Unref(rv);
return new Val(result, TYPE_BOOL);
%}
## :bro:see:`FileAnalysis::remove_analyzer`.
function FileAnalysis::__remove_analyzer%(file_id: string, args: any%): bool
%{
using BifType::Record::FileAnalysis::AnalyzerArgs;
RecordVal* rv = args->AsRecordVal()->CoerceTo(AnalyzerArgs);
bool result = file_mgr->RemoveAnalyzer(file_id->CheckString(), rv);
Unref(rv);
return new Val(result, TYPE_BOOL);
%}
## :bro:see:`FileAnalysis::stop`.
function FileAnalysis::__stop%(file_id: string%): bool
%{
bool result = file_mgr->IgnoreFile(file_id->CheckString());
return new Val(result, TYPE_BOOL);
%}
module GLOBAL;
## For use within a :bro:see:`get_file_handle` handler to set a unique
## identifier to associate with the current input to the file analysis
## framework. Using an empty string for the handle signifies that the
## input will be ignored/discarded.
##
## handle: A string that uniquely identifies a file.
##
## .. bro:see:: get_file_handle
function set_file_handle%(handle: string%): any
%{
file_mgr->SetHandle(handle->CheckString());
return 0;
%}
const FileAnalysis::salt: string;