Improve file analysis doxygen comments.

This commit is contained in:
Jon Siwek 2013-05-23 10:22:49 -05:00
parent 705a84d688
commit f82167d067
8 changed files with 421 additions and 24 deletions

View file

@ -17,6 +17,11 @@ class File;
*/ */
class Analyzer { class Analyzer {
public: public:
/**
* Destructor. Nothing special about it. Virtual since we definitely expect
* to delete instances of derived classes via pointers to this class.
*/
virtual ~Analyzer() virtual ~Analyzer()
{ {
DBG_LOG(DBG_FILE_ANALYSIS, "Destroy file analyzer %d", tag); DBG_LOG(DBG_FILE_ANALYSIS, "Destroy file analyzer %d", tag);
@ -24,7 +29,10 @@ public:
} }
/** /**
* Subclasses may override this to receive file data non-sequentially. * Subclasses may override this metod to receive file data non-sequentially.
* @param data points to start of a chunk of file data.
* @param len length in bytes of the chunk of data pointed to by \a data.
* @param offset the byte offset within full file that data chunk starts.
* @return true if the analyzer is still in a valid state to continue * @return true if the analyzer is still in a valid state to continue
* receiving data/events or false if it's essentially "done". * receiving data/events or false if it's essentially "done".
*/ */
@ -32,7 +40,9 @@ public:
{ return true; } { return true; }
/** /**
* Subclasses may override this to receive file sequentially. * Subclasses may override this method to receive file sequentially.
* @param data points to start of the next chunk of file data.
* @param len length in bytes of the chunk of data pointed to by \a data.
* @return true if the analyzer is still in a valid state to continue * @return true if the analyzer is still in a valid state to continue
* receiving data/events or false if it's essentially "done". * receiving data/events or false if it's essentially "done".
*/ */
@ -40,7 +50,7 @@ public:
{ return true; } { return true; }
/** /**
* Subclasses may override this to specifically handle an EOF signal, * Subclasses may override this method to specifically handle an EOF signal,
* which means no more data is going to be incoming and the analyzer * which means no more data is going to be incoming and the analyzer
* may be deleted/cleaned up soon. * may be deleted/cleaned up soon.
* @return true if the analyzer is still in a valid state to continue * @return true if the analyzer is still in a valid state to continue
@ -50,7 +60,10 @@ public:
{ return true; } { return true; }
/** /**
* Subclasses may override this to handle missing data in a file stream. * Subclasses may override this method to handle missing data in a file.
* @param offset the byte offset within full file at which the missing
* data chunk occurs.
* @param len the number of missing bytes.
* @return true if the analyzer is still in a valid state to continue * @return true if the analyzer is still in a valid state to continue
* receiving data/events or false if it's essentially "done". * receiving data/events or false if it's essentially "done".
*/ */
@ -73,8 +86,10 @@ public:
File* GetFile() const { return file; } File* GetFile() const { return file; }
/** /**
* Retrieves an analyzer tag field from full analyzer argument record.
* @param args an \c AnalyzerArgs (script-layer type) value.
* @return the analyzer tag equivalent of the 'tag' field from the * @return the analyzer tag equivalent of the 'tag' field from the
* AnalyzerArgs value \a args. * \c AnalyzerArgs value \a args.
*/ */
static FA_Tag ArgsTag(const RecordVal* args) static FA_Tag ArgsTag(const RecordVal* args)
{ {
@ -84,6 +99,13 @@ public:
} }
protected: protected:
/**
* Constructor. Only derived classes are meant to be instantiated.
* @param arg_args an \c AnalyzerArgs (script-layer type) value specifiying
* tunable options, if any, related to a particular analyzer type.
* @param arg_file the file to which the the analyzer is being attached.
*/
Analyzer(RecordVal* arg_args, File* arg_file) Analyzer(RecordVal* arg_args, File* arg_file)
: tag(file_analysis::Analyzer::ArgsTag(arg_args)), : tag(file_analysis::Analyzer::ArgsTag(arg_args)),
args(arg_args->Ref()->AsRecordVal()), args(arg_args->Ref()->AsRecordVal()),
@ -91,9 +113,10 @@ protected:
{} {}
private: private:
FA_Tag tag;
RecordVal* args; FA_Tag tag; /**< The particular analyzer type of the analyzer instance. */
File* file; RecordVal* args; /**< \c AnalyzerArgs val gives tunable analyzer params. */
File* file; /**< The file to which the analyzer is attached. */
}; };
typedef file_analysis::Analyzer* (*AnalyzerInstantiator)(RecordVal* args, typedef file_analysis::Analyzer* (*AnalyzerInstantiator)(RecordVal* args,

View file

@ -16,67 +16,144 @@ class File;
declare(PDict,Analyzer); declare(PDict,Analyzer);
/** /**
* A set of file analysis analyzers indexed by AnalyzerArgs. Allows queueing * A set of file analysis analyzers indexed by an \c AnalyzerArgs (script-layer
* of addition/removals so that those modifications can happen at well-defined * type) value. Allows queueing of addition/removals so that those
* times (e.g. to make sure a loop iterator isn't invalidated). * modifications can happen at well-defined times (e.g. to make sure a loop
* iterator isn't invalidated).
*/ */
class AnalyzerSet { class AnalyzerSet {
public: public:
/**
* Constructor. Nothing special.
* @param arg_file the file to which all analyzers in the set are attached.
*/
AnalyzerSet(File* arg_file); AnalyzerSet(File* arg_file);
/**
* Destructor. Any queued analyzer additions/removals are aborted and
* will not occur.
*/
~AnalyzerSet(); ~AnalyzerSet();
/** /**
* Attach an analyzer to #file immediately.
* @param args an \c AnalyzerArgs value which specifies an analyzer.
* @return true if analyzer was instantiated/attached, else false. * @return true if analyzer was instantiated/attached, else false.
*/ */
bool Add(RecordVal* args); bool Add(RecordVal* args);
/** /**
* Queue the attachment of an analyzer to #file.
* @param args an \c AnalyzerArgs value which specifies an analyzer.
* @return true if analyzer was able to be instantiated, else false. * @return true if analyzer was able to be instantiated, else false.
*/ */
bool QueueAdd(RecordVal* args); bool QueueAdd(RecordVal* args);
/** /**
* Remove an analyzer from #file immediately.
* @param args an \c AnalyzerArgs value which specifies an analyzer.
* @return false if analyzer didn't exist and so wasn't removed, else true. * @return false if analyzer didn't exist and so wasn't removed, else true.
*/ */
bool Remove(const RecordVal* args); bool Remove(const RecordVal* args);
/** /**
* Queue the removal of an analyzer from #file.
* @param args an \c AnalyzerArgs value which specifies an analyzer.
* @return true if analyzer exists at time of call, else false; * @return true if analyzer exists at time of call, else false;
*/ */
bool QueueRemove(const RecordVal* args); bool QueueRemove(const RecordVal* args);
/** /**
* Perform all queued modifications to the currently active analyzers. * Perform all queued modifications to the current analyzer set.
*/ */
void DrainModifications(); void DrainModifications();
/**
* Prepare the analyzer set to be iterated over.
* @see Dictionary#InitForIteration
* @return an iterator that may be used to loop over analyzers in the set.
*/
IterCookie* InitForIteration() const IterCookie* InitForIteration() const
{ return analyzer_map.InitForIteration(); } { return analyzer_map.InitForIteration(); }
/**
* Get next entry in the analyzer set.
* @see Dictionary#NextEntry
* @param c a set iterator.
* @return the next analyzer in the set or a null pointer if there is no
* more left (in that case the cookie is also deleted).
*/
file_analysis::Analyzer* NextEntry(IterCookie* c) file_analysis::Analyzer* NextEntry(IterCookie* c)
{ return analyzer_map.NextEntry(c); } { return analyzer_map.NextEntry(c); }
protected: protected:
/**
* Get a hash key which represents an analyzer instance.
* @param args an \c AnalyzerArgs value which specifies an analyzer.
* @return the hash key calculated from \a args
*/
HashKey* GetKey(const RecordVal* args) const; HashKey* GetKey(const RecordVal* args) const;
/**
* Create an instance of a file analyzer.
* @param args an \c AnalyzerArgs value which specifies an analyzer.
* @return a new file analyzer instance.
*/
file_analysis::Analyzer* InstantiateAnalyzer(RecordVal* args) const; file_analysis::Analyzer* InstantiateAnalyzer(RecordVal* args) const;
/**
* Insert an analyzer instance in to the set.
* @param a an analyzer instance.
* @param key the hash key which represents the analyzer's \c AnalyzerArgs.
*/
void Insert(file_analysis::Analyzer* a, HashKey* key); void Insert(file_analysis::Analyzer* a, HashKey* key);
/**
* Remove an analyzer instance from the set.
* @param tag enumarator which specifies type of the analyzer to remove,
* just used for debugging messages.
* @param key the hash key which represents the analyzer's \c AnalyzerArgs.
*/
bool Remove(FA_Tag tag, HashKey* key); bool Remove(FA_Tag tag, HashKey* key);
private: private:
File* file;
File* file; /**< File which owns the set */
CompositeHash* analyzer_hash; /**< AnalyzerArgs hashes. */ CompositeHash* analyzer_hash; /**< AnalyzerArgs hashes. */
PDict(file_analysis::Analyzer) analyzer_map; /**< Indexed by AnalyzerArgs. */ PDict(file_analysis::Analyzer) analyzer_map; /**< Indexed by AnalyzerArgs. */
/**
* Abstract base class for analyzer set modifications.
*/
class Modification { class Modification {
public: public:
virtual ~Modification() {} virtual ~Modification() {}
/**
* Perform the modification on an analyzer set.
* @param set the analyzer set on which the modification will happen.
* @return true if the modification altered \a set.
*/
virtual bool Perform(AnalyzerSet* set) = 0; virtual bool Perform(AnalyzerSet* set) = 0;
/**
* Don't perform the modification on the analyzer set and clean up.
*/
virtual void Abort() = 0; virtual void Abort() = 0;
}; };
/**
* Represents a request to add an analyzer to an analyzer set.
*/
class AddMod : public Modification { class AddMod : public Modification {
public: public:
/**
* Construct request which can add an analyzer to an analyzer set.
* @param arg_a an analyzer instance to add to an analyzer set.
* @param arg_key hash key representing the analyzer's \c AnalyzerArgs.
*/
AddMod(file_analysis::Analyzer* arg_a, HashKey* arg_key) AddMod(file_analysis::Analyzer* arg_a, HashKey* arg_key)
: Modification(), a(arg_a), key(arg_key) {} : Modification(), a(arg_a), key(arg_key) {}
virtual ~AddMod() {} virtual ~AddMod() {}
@ -88,8 +165,16 @@ private:
HashKey* key; HashKey* key;
}; };
/**
* Represents a request to remove an analyzer from an analyzer set.
*/
class RemoveMod : public Modification { class RemoveMod : public Modification {
public: public:
/**
* Construct request which can remove an analyzer from an analyzer set.
* @param arg_a an analyzer instance to add to an analyzer set.
* @param arg_key hash key representing the analyzer's \c AnalyzerArgs.
*/
RemoveMod(FA_Tag arg_tag, HashKey* arg_key) RemoveMod(FA_Tag arg_tag, HashKey* arg_key)
: Modification(), tag(arg_tag), key(arg_key) {} : Modification(), tag(arg_tag), key(arg_key) {}
virtual ~RemoveMod() {} virtual ~RemoveMod() {}
@ -102,7 +187,7 @@ private:
}; };
typedef queue<Modification*> ModQueue; typedef queue<Modification*> ModQueue;
ModQueue mod_queue; ModQueue mod_queue; /**< A queue of analyzer additions/removals requests. */
}; };
} // namespace file_analysiss } // namespace file_analysiss

View file

@ -12,17 +12,50 @@
namespace file_analysis { namespace file_analysis {
/** /**
* An analyzer to send file data to script-layer events. * An analyzer to send file data to script-layer via events.
*/ */
class DataEvent : public file_analysis::Analyzer { class DataEvent : public file_analysis::Analyzer {
public: public:
/**
* Generates the event, if any, specified by the "chunk_event" field of this
* analyzer's \c AnalyzerArgs. This is for non-sequential file data input.
* @param data pointer to start of file data chunk.
* @param len number of bytes in the data chunk.
* @param offset number of bytes from start of file at which chunk occurs.
* @return always true
*/
virtual bool DeliverChunk(const u_char* data, uint64 len, uint64 offset); virtual bool DeliverChunk(const u_char* data, uint64 len, uint64 offset);
/**
* Generates the event, if any, specified by the "stream_event" field of
* this analyzer's \c AnalyzerArgs. This is for sequential file data input.
* @param data pointer to start of file data chunk.
* @param len number of bytes in the data chunk.
* @return always true
*/
virtual bool DeliverStream(const u_char* data, uint64 len); virtual bool DeliverStream(const u_char* data, uint64 len);
/**
* Create a new instance of a DataEvent analyzer.
* @param args the \c AnalyzerArgs value which represents the analyzer.
* @param file the file to which the analyzer will be attached.
* @return the new DataEvent analyzer instance or a null pointer if
* no "chunk_event" or "stream_event" field was specfied in \a args.
*/
static file_analysis::Analyzer* Instantiate(RecordVal* args, File* file); static file_analysis::Analyzer* Instantiate(RecordVal* args, File* file);
protected: protected:
/**
* Constructor.
* @param args the \c AnalyzerArgs value which represents the analyzer.
* @param file the file to which the analyzer will be attached.
* @param ce pointer to event handler which will be called to receive
* non-sequential file data.
* @param se pointer to event handler which will be called to receive
* sequential file data.
*/
DataEvent(RecordVal* args, File* file, DataEvent(RecordVal* args, File* file,
EventHandlerPtr ce, EventHandlerPtr se); EventHandlerPtr ce, EventHandlerPtr se);

View file

@ -12,17 +12,44 @@
namespace file_analysis { namespace file_analysis {
/** /**
* An analyzer to extract files to disk. * An analyzer to extract content of files to local disk.
*/ */
class Extract : public file_analysis::Analyzer { class Extract : public file_analysis::Analyzer {
public: public:
/**
* Destructor. Will close the file that was used for data extraction.
*/
virtual ~Extract(); virtual ~Extract();
/**
* Write a chunk of file data to the local extraction file.
* @param data pointer to a chunk of file data.
* @param len number of bytes in the data chunk.
* @param offset number of bytes from start of file at which chunk starts.
* @return false if there was no extraction file open and the data couldn't
* be written, else true.
*/
virtual bool DeliverChunk(const u_char* data, uint64 len, uint64 offset); virtual bool DeliverChunk(const u_char* data, uint64 len, uint64 offset);
/**
* Create a new instance of an Extract analyzer.
* @param args the \c AnalyzerArgs value which represents the analyzer.
* @param file the file to which the analyzer will be attached.
* @return the new Extract analyzer instance or a null pointer if the
* the "extraction_file" field of \a args wasn't set.
*/
static file_analysis::Analyzer* Instantiate(RecordVal* args, File* file); static file_analysis::Analyzer* Instantiate(RecordVal* args, File* file);
protected: protected:
/**
* Constructor.
* @param args the \c AnalyzerArgs value which represents the analyzer.
* @param file the file to which the analyzer will be attached.
* @param arg_filename a file system path which specifies the local file
* to which the contents of the file will be extracted/written.
*/
Extract(RecordVal* args, File* file, const string& arg_filename); Extract(RecordVal* args, File* file, const string& arg_filename);
private: private:

View file

@ -19,10 +19,15 @@ namespace file_analysis {
*/ */
class File { class File {
public: public:
/**
* Destructor. Nothing fancy, releases a reference to the wrapped
* \c fa_file value.
*/
~File(); ~File();
/** /**
* @return the #val record. * @return the wrapped \c fa_file record value, #val.
*/ */
RecordVal* GetVal() const { return val; } RecordVal* GetVal() const { return val; }
@ -34,6 +39,7 @@ public:
/** /**
* Set the "source" field from #val record to \a source. * Set the "source" field from #val record to \a source.
* @param source the new value of the "source" field.
*/ */
void SetSource(const string& source); void SetSource(const string& source);
@ -44,6 +50,7 @@ public:
/** /**
* Set the "timeout_interval" field from #val record to \a interval seconds. * Set the "timeout_interval" field from #val record to \a interval seconds.
* @param interval the new value of the "timeout_interval" field.
*/ */
void SetTimeoutInterval(double interval); void SetTimeoutInterval(double interval);
@ -64,13 +71,15 @@ public:
/** /**
* Set "total_bytes" field of #val record to \a size. * Set "total_bytes" field of #val record to \a size.
* @param size the new value of the "total_bytes" field.
*/ */
void SetTotalBytes(uint64 size); void SetTotalBytes(uint64 size);
/** /**
* Compares "seen_bytes" field to "total_bytes" field of #val record * Compares "seen_bytes" field to "total_bytes" field of #val record to
* and returns true if the comparison indicates the full file was seen. * determine if the full file has been seen.
* If "total_bytes" hasn't been set yet, it returns false. * @return false if "total_bytes" hasn't been set yet or "seen_bytes" is
* less than it, else true.
*/ */
bool IsComplete() const; bool IsComplete() const;
@ -84,23 +93,30 @@ public:
/** /**
* Queues attaching an analyzer. Only one analyzer per type can be attached * Queues attaching an analyzer. Only one analyzer per type can be attached
* at a time unless the arguments differ. * at a time unless the arguments differ.
* @param args an \c AnalyzerArgs value representing a file analyzer.
* @return false if analyzer can't be instantiated, else true. * @return false if analyzer can't be instantiated, else true.
*/ */
bool AddAnalyzer(RecordVal* args); bool AddAnalyzer(RecordVal* args);
/** /**
* Queues removal of an analyzer. * Queues removal of an analyzer.
* @param args an \c AnalyzerArgs value representing a file analyzer.
* @return true if analyzer was active at time of call, else false. * @return true if analyzer was active at time of call, else false.
*/ */
bool RemoveAnalyzer(const RecordVal* args); bool RemoveAnalyzer(const RecordVal* args);
/** /**
* Pass in non-sequential data and deliver to attached analyzers. * Pass in non-sequential data and deliver to attached analyzers.
* @param data pointer to start of a chunk of file data.
* @param len number of bytes in the data chunk.
* @param offset number of bytes from start of file at which chunk occurs.
*/ */
void DataIn(const u_char* data, uint64 len, uint64 offset); void DataIn(const u_char* data, uint64 len, uint64 offset);
/** /**
* Pass in sequential data and deliver to attached analyzers. * Pass in sequential data and deliver to attached analyzers.
* @param data pointer to start of a chunk of file data.
* @param len number of bytes in the data chunk.
*/ */
void DataIn(const u_char* data, uint64 len); void DataIn(const u_char* data, uint64 len);
@ -111,10 +127,13 @@ public:
/** /**
* Inform attached analyzers about a gap in file stream. * Inform attached analyzers about a gap in file stream.
* @param offset number of bytes in to file at which missing chunk starts.
* @param len length in bytes of the missing chunk of file data.
*/ */
void Gap(uint64 offset, uint64 len); void Gap(uint64 offset, uint64 len);
/** /**
* @param h pointer to an event handler.
* @return true if event has a handler and the file isn't ignored. * @return true if event has a handler and the file isn't ignored.
*/ */
bool FileEventAvailable(EventHandlerPtr h); bool FileEventAvailable(EventHandlerPtr h);
@ -122,11 +141,14 @@ public:
/** /**
* Raises an event related to the file's life-cycle, the only parameter * Raises an event related to the file's life-cycle, the only parameter
* to that event is the \c fa_file record.. * to that event is the \c fa_file record..
* @param h pointer to an event handler.
*/ */
void FileEvent(EventHandlerPtr h); void FileEvent(EventHandlerPtr h);
/** /**
* Raises an event related to the file's life-cycle. * Raises an event related to the file's life-cycle.
* @param h pointer to an event handler.
* @param vl list of argument values to pass to event call.
*/ */
void FileEvent(EventHandlerPtr h, val_list* vl); void FileEvent(EventHandlerPtr h, val_list* vl);
@ -135,6 +157,13 @@ protected:
/** /**
* Constructor; only file_analysis::Manager should be creating these. * Constructor; only file_analysis::Manager should be creating these.
* @param file_id an identifier string for the file in pretty hash form
* (similar to connection uids).
* @param conn a network connection over which the file is transferred.
* @param tag the network protocol over which the file is transferred.
* @param is_orig true if the file is being transferred from the originator
* of the connection to the responder. False indicates the other
* direction.
*/ */
File(const string& file_id, Connection* conn = 0, File(const string& file_id, Connection* conn = 0,
AnalyzerTag::Tag tag = AnalyzerTag::Error, bool is_orig = false); AnalyzerTag::Tag tag = AnalyzerTag::Error, bool is_orig = false);
@ -142,28 +171,37 @@ protected:
/** /**
* Updates the "conn_ids" and "conn_uids" fields in #val record with the * Updates the "conn_ids" and "conn_uids" fields in #val record with the
* \c conn_id and UID taken from \a conn. * \c conn_id and UID taken from \a conn.
* @param conn the connection over which a part of the file has been seen.
*/ */
void UpdateConnectionFields(Connection* conn); void UpdateConnectionFields(Connection* conn);
/** /**
* Increment a byte count field of #val record by \a size. * Increment a byte count field of #val record by \a size.
* @param size number of bytes by which to increment.
* @param field_idx the index of the field in \c fa_file to increment.
*/ */
void IncrementByteCount(uint64 size, int field_idx); void IncrementByteCount(uint64 size, int field_idx);
/** /**
* Wrapper to RecordVal::LookupWithDefault for the field in #val at index * Wrapper to RecordVal::LookupWithDefault for the field in #val at index
* \a idx which automatically unrefs the Val and returns a converted value. * \a idx which automatically unrefs the Val and returns a converted value.
* @param idx the index of a field of type "count" in \c fa_file.
* @return the value of the field, which may be it &default.
*/ */
uint64 LookupFieldDefaultCount(int idx) const; uint64 LookupFieldDefaultCount(int idx) const;
/** /**
* Wrapper to RecordVal::LookupWithDefault for the field in #val at index * Wrapper to RecordVal::LookupWithDefault for the field in #val at index
* \a idx which automatically unrefs the Val and returns a converted value. * \a idx which automatically unrefs the Val and returns a converted value.
* @param idx the index of a field of type "interval" in \c fa_file.
* @return the value of the field, which may be it &default.
*/ */
double LookupFieldDefaultInterval(int idx) const; double LookupFieldDefaultInterval(int idx) const;
/** /**
* Buffers incoming data at the beginning of a file. * Buffers incoming data at the beginning of a file.
* @param data pointer to a data chunk to buffer.
* @param len number of bytes in the data chunk.
* @return true if buffering is still required, else false * @return true if buffering is still required, else false
*/ */
bool BufferBOF(const u_char* data, uint64 len); bool BufferBOF(const u_char* data, uint64 len);
@ -176,11 +214,15 @@ protected:
/** /**
* Does mime type detection and assigns type (if available) to \c mime_type * Does mime type detection and assigns type (if available) to \c mime_type
* field in #val. * field in #val.
* @param data pointer to a chunk of file data.
* @param len number of bytes in the data chunk.
* @return whether mime type was available. * @return whether mime type was available.
*/ */
bool DetectMIME(const u_char* data, uint64 len); bool DetectMIME(const u_char* data, uint64 len);
/** /**
* Lookup a record field index/offset by name.
* @param field_name the name of the \c fa_file record field.
* @return the field offset in #val record corresponding to \a field_name. * @return the field offset in #val record corresponding to \a field_name.
*/ */
static int Idx(const string& field_name); static int Idx(const string& field_name);
@ -198,7 +240,7 @@ private:
bool missed_bof; /**< Flags that we missed start of file. */ bool missed_bof; /**< Flags that we missed start of file. */
bool need_reassembly; /**< Whether file stream reassembly is needed. */ bool need_reassembly; /**< Whether file stream reassembly is needed. */
bool done; /**< If this object is about to be deleted. */ bool done; /**< If this object is about to be deleted. */
AnalyzerSet analyzers; AnalyzerSet analyzers; /**< A set of attached file analyzer. */
struct BOF_Buffer { struct BOF_Buffer {
BOF_Buffer() : full(false), replayed(false), size(0) {} BOF_Buffer() : full(false), replayed(false), size(0) {}

View file

@ -13,11 +13,20 @@ namespace file_analysis {
*/ */
class FileTimer : public Timer { class FileTimer : public Timer {
public: public:
/**
* Constructor, nothing interesting about it.
* @param t unix time at which the timer should start ticking.
* @param id the file identifier which will be checked for inactivity.
* @param interval amount of time after \a t to check for inactivity.
*/
FileTimer(double t, const string& id, double interval); FileTimer(double t, const string& id, double interval);
/** /**
* Check inactivity of file_analysis::File corresponding to #file_id, * Check inactivity of file_analysis::File corresponding to #file_id,
* reschedule if active, else call file_analysis::Manager::Timeout. * reschedule if active, else call file_analysis::Manager::Timeout.
* @param t current unix time
* @param is_expire true if all pending timers are being expired.
*/ */
void Dispatch(double t, int is_expire); void Dispatch(double t, int is_expire);

View file

@ -17,17 +17,50 @@ namespace file_analysis {
*/ */
class Hash : public file_analysis::Analyzer { class Hash : public file_analysis::Analyzer {
public: public:
/**
* Destructor.
*/
virtual ~Hash(); virtual ~Hash();
/**
* Incrementally hash next chunk of file contents.
* @param data pointer to start of a chunk of a file data.
* @param len number of bytes in the data chunk.
* @return false if the digest is in an invalid state, else true.
*/
virtual bool DeliverStream(const u_char* data, uint64 len); virtual bool DeliverStream(const u_char* data, uint64 len);
/**
* Finalizes the hash and raises a "file_hash" event.
* @return always false so analyze will be deteched from file.
*/
virtual bool EndOfFile(); virtual bool EndOfFile();
/**
* Missing data can't be handled, so just indicate the this analyzer should
* be removed from receiving further data. The hash will not be finalized.
* @param offset byte offset in file at which missing chunk starts.
* @param len number of missing bytes.
* @return always false so analyzer will detach from file.
*/
virtual bool Undelivered(uint64 offset, uint64 len); virtual bool Undelivered(uint64 offset, uint64 len);
protected: protected:
/**
* Constructor.
* @param args the \c AnalyzerArgs value which represents the analyzer.
* @param file the file to which the analyzer will be attached.
* @param hv specific hash calculator object.
* @param kind human readable name of the hash algorithm to use.
*/
Hash(RecordVal* args, File* file, HashVal* hv, const char* kind); Hash(RecordVal* args, File* file, HashVal* hv, const char* kind);
/**
* If some file contents have been seen, finalizes the hash of them and
* raises the "file_hash" event with the results.
*/
void Finalize(); void Finalize();
private: private:
@ -36,34 +69,85 @@ private:
const char* kind; const char* kind;
}; };
/**
* An analyzer to produce an MD5 hash of file contents.
*/
class MD5 : public Hash { class MD5 : public Hash {
public: public:
/**
* Create a new instance of the MD5 hashing file analyzer.
* @param args the \c AnalyzerArgs value which represents the analyzer.
* @param file the file to which the analyzer will be attached.
* @return the new MD5 analyzer instance or a null pointer if there's no
* handler for the "file_hash" event.
*/
static file_analysis::Analyzer* Instantiate(RecordVal* args, File* file) static file_analysis::Analyzer* Instantiate(RecordVal* args, File* file)
{ return file_hash ? new MD5(args, file) : 0; } { return file_hash ? new MD5(args, file) : 0; }
protected: protected:
/**
* Constructor.
* @param args the \c AnalyzerArgs value which represents the analyzer.
* @param file the file to which the analyzer will be attached.
*/
MD5(RecordVal* args, File* file) MD5(RecordVal* args, File* file)
: Hash(args, file, new MD5Val(), "md5") : Hash(args, file, new MD5Val(), "md5")
{} {}
}; };
/**
* An analyzer to produce a SHA1 hash of file contents.
*/
class SHA1 : public Hash { class SHA1 : public Hash {
public: public:
/**
* Create a new instance of the SHA1 hashing file analyzer.
* @param args the \c AnalyzerArgs value which represents the analyzer.
* @param file the file to which the analyzer will be attached.
* @return the new MD5 analyzer instance or a null pointer if there's no
* handler for the "file_hash" event.
*/
static file_analysis::Analyzer* Instantiate(RecordVal* args, File* file) static file_analysis::Analyzer* Instantiate(RecordVal* args, File* file)
{ return file_hash ? new SHA1(args, file) : 0; } { return file_hash ? new SHA1(args, file) : 0; }
protected: protected:
/**
* Constructor.
* @param args the \c AnalyzerArgs value which represents the analyzer.
* @param file the file to which the analyzer will be attached.
*/
SHA1(RecordVal* args, File* file) SHA1(RecordVal* args, File* file)
: Hash(args, file, new SHA1Val(), "sha1") : Hash(args, file, new SHA1Val(), "sha1")
{} {}
}; };
/**
* An analyzer to produce a SHA256 hash of file contents.
*/
class SHA256 : public Hash { class SHA256 : public Hash {
public: public:
/**
* Create a new instance of the SHA256 hashing file analyzer.
* @param args the \c AnalyzerArgs value which represents the analyzer.
* @param file the file to which the analyzer will be attached.
* @return the new MD5 analyzer instance or a null pointer if there's no
* handler for the "file_hash" event.
*/
static file_analysis::Analyzer* Instantiate(RecordVal* args, File* file) static file_analysis::Analyzer* Instantiate(RecordVal* args, File* file)
{ return file_hash ? new SHA256(args, file) : 0; } { return file_hash ? new SHA256(args, file) : 0; }
protected: protected:
/**
* Constructor.
* @param args the \c AnalyzerArgs value which represents the analyzer.
* @param file the file to which the analyzer will be attached.
*/
SHA256(RecordVal* args, File* file) SHA256(RecordVal* args, File* file)
: Hash(args, file, new SHA256Val(), "sha256") : Hash(args, file, new SHA256Val(), "sha256")
{} {}

View file

@ -26,7 +26,15 @@ namespace file_analysis {
*/ */
class Manager { class Manager {
public: public:
/**
* Constructor.
*/
Manager(); Manager();
/**
* Destructor. Times out any currently active file analyses.
*/
~Manager(); ~Manager();
/** /**
@ -35,48 +43,97 @@ public:
void Terminate(); void Terminate();
/** /**
* Creates a file identifier from a unique file handle string.
* @param handle a unique string which identifies a single file.
* @return a prettified MD5 hash of \a handle, truncated to 64-bits. * @return a prettified MD5 hash of \a handle, truncated to 64-bits.
*/ */
string HashHandle(const string& handle) const; string HashHandle(const string& handle) const;
/** /**
* Take in a unique file handle string to identify incoming file data. * Take in a unique file handle string to identify next piece of
* incoming file data/information.
* @param handle a unique string which identifies a single file.
*/ */
void SetHandle(const string& handle); void SetHandle(const string& handle);
/** /**
* Pass in non-sequential file data. * Pass in non-sequential file data.
* @param data pointer to start of a chunk of file data.
* @param len number of bytes in the data chunk.
* @param offset number of bytes from start of file that data chunk occurs.
* @param tag network protocol over which the file data is transferred.
* @param conn network connection over which the file data is transferred.
* @param is_orig true if the file is being sent from connection originator
* or false if is being sent in the opposite direction.
*/ */
void DataIn(const u_char* data, uint64 len, uint64 offset, void DataIn(const u_char* data, uint64 len, uint64 offset,
AnalyzerTag::Tag tag, Connection* conn, bool is_orig); AnalyzerTag::Tag tag, Connection* conn, bool is_orig);
/** /**
* Pass in sequential file data. * Pass in sequential file data.
* @param data pointer to start of a chunk of file data.
* @param len number of bytes in the data chunk.
* @param tag network protocol over which the file data is transferred.
* @param conn network connection over which the file data is transferred.
* @param is_orig true if the file is being sent from connection originator
* or false if is being sent in the opposite direction.
*/ */
void DataIn(const u_char* data, uint64 len, AnalyzerTag::Tag tag, void DataIn(const u_char* data, uint64 len, AnalyzerTag::Tag tag,
Connection* conn, bool is_orig); Connection* conn, bool is_orig);
/** /**
* Pass in sequential file data from external source (e.g. input framework). * Pass in sequential file data from external source (e.g. input framework).
* @param data pointer to start of a chunk of file data.
* @param len number of bytes in the data chunk.
* @param file_id an identifier for the file (usually a hash of \a source).
* @param source uniquely identifies the file and should also describe
* in human-readable form where the file input is coming from (e.g.
* a local file path).
*/ */
void DataIn(const u_char* data, uint64 len, const string& file_id, void DataIn(const u_char* data, uint64 len, const string& file_id,
const string& source); const string& source);
/** /**
* Signal the end of file data. * Signal the end of file data regardless of which direction it is being
* sent over the connection.
* @param tag network protocol over which the file data is transferred.
* @param conn network connection over which the file data is transferred.
*/ */
void EndOfFile(AnalyzerTag::Tag tag, Connection* conn); void EndOfFile(AnalyzerTag::Tag tag, Connection* conn);
/**
* Signal the end of file data being transferred over a connection in
* a particular direction.
* @param tag network protocol over which the file data is transferred.
* @param conn network connection over which the file data is transferred.
*/
void EndOfFile(AnalyzerTag::Tag tag, Connection* conn, bool is_orig); void EndOfFile(AnalyzerTag::Tag tag, Connection* conn, bool is_orig);
/**
* Signal the end of file data being transferred using the file identifier.
* @param file_id the file identifier/hash.
*/
void EndOfFile(const string& file_id); void EndOfFile(const string& file_id);
/** /**
* Signal a gap in the file data stream. * Signal a gap in the file data stream.
* @param offset number of bytes in to file at which missing chunk starts.
* @param len length in bytes of the missing chunk of file data.
* @param tag network protocol over which the file data is transferred.
* @param conn network connection over which the file data is transferred.
* @param is_orig true if the file is being sent from connection originator
* or false if is being sent in the opposite direction.
*/ */
void Gap(uint64 offset, uint64 len, AnalyzerTag::Tag tag, Connection* conn, void Gap(uint64 offset, uint64 len, AnalyzerTag::Tag tag, Connection* conn,
bool is_orig); bool is_orig);
/** /**
* Provide the expected number of bytes that comprise a file. * Provide the expected number of bytes that comprise a file.
* @param size the number of bytes in the full file.
* @param tag network protocol over which the file data is transferred.
* @param conn network connection over which the file data is transferred.
* @param is_orig true if the file is being sent from connection originator
* or false if is being sent in the opposite direction.
*/ */
void SetSize(uint64 size, AnalyzerTag::Tag tag, Connection* conn, void SetSize(uint64 size, AnalyzerTag::Tag tag, Connection* conn,
bool is_orig); bool is_orig);
@ -84,12 +141,18 @@ public:
/** /**
* Starts ignoring a file, which will finally be removed from internal * Starts ignoring a file, which will finally be removed from internal
* mappings on EOF or TIMEOUT. * mappings on EOF or TIMEOUT.
* @param file_id the file identifier/hash.
* @return false if file identifier did not map to anything, else true. * @return false if file identifier did not map to anything, else true.
*/ */
bool IgnoreFile(const string& file_id); bool IgnoreFile(const string& file_id);
/** /**
* Set's an inactivity threshold for the file. * Set's an inactivity threshold for the file.
* @param file_id the file identifier/hash.
* @param interval the amount of time in which no activity is seen for
* the file identified by \a file_id that will cause the file
* to be considered stale, timed out, and then resource reclaimed.
* @return false if file identifier did not map to anything, else true.
*/ */
bool SetTimeoutInterval(const string& file_id, double interval) const; bool SetTimeoutInterval(const string& file_id, double interval) const;
@ -97,17 +160,23 @@ public:
* Queue attachment of an analzer to the file identifier. Multiple * Queue attachment of an analzer to the file identifier. Multiple
* analyzers of a given type can be attached per file identifier at a time * analyzers of a given type can be attached per file identifier at a time
* as long as the arguments differ. * as long as the arguments differ.
* @param file_id the file identifier/hash.
* @param args a \c AnalyzerArgs value which describes a file analyzer.
* @return false if the analyzer failed to be instantiated, else true. * @return false if the analyzer failed to be instantiated, else true.
*/ */
bool AddAnalyzer(const string& file_id, RecordVal* args) const; bool AddAnalyzer(const string& file_id, RecordVal* args) const;
/** /**
* Queue removal of an analyzer for a given file identifier. * Queue removal of an analyzer for a given file identifier.
* @param file_id the file identifier/hash.
* @param args a \c AnalyzerArgs value which describes a file analyzer.
* @return true if the analyzer is active at the time of call, else false. * @return true if the analyzer is active at the time of call, else false.
*/ */
bool RemoveAnalyzer(const string& file_id, const RecordVal* args) const; bool RemoveAnalyzer(const string& file_id, const RecordVal* args) const;
/** /**
* Tells whether analysis for a file is active or ignored.
* @param file_id the file identifier/hash.
* @return whether the file mapped to \a file_id is being ignored. * @return whether the file mapped to \a file_id is being ignored.
*/ */
bool IsIgnored(const string& file_id); bool IsIgnored(const string& file_id);
@ -119,6 +188,16 @@ protected:
typedef map<string, File*> IDMap; typedef map<string, File*> IDMap;
/** /**
* Create a new file to be analyzed or retrieve an existing one.
* @param file_id the file identifier/hash.
* @param conn network connection, if any, over which the file is
* transferred.
* @param tag network protocol, if any, over which the file is transferred.
* @param is_orig true if the file is being sent from connection originator
* or false if is being sent in the opposite direction (or if it
* this file isn't related to a connection).
* @param update_conn whether we need to update connection-related field
* in the \c fa_file record value associated with the file.
* @return the File object mapped to \a file_id or a null pointer if * @return the File object mapped to \a file_id or a null pointer if
* analysis is being ignored for the associated file. An File * analysis is being ignored for the associated file. An File
* object may be created if a mapping doesn't exist, and if it did * object may be created if a mapping doesn't exist, and if it did
@ -130,6 +209,8 @@ protected:
bool is_orig = false, bool update_conn = true); bool is_orig = false, bool update_conn = true);
/** /**
* Try to retrieve a file that's being analyzed, using its identifier/hash.
* @param file_id the file identifier/hash.
* @return the File object mapped to \a file_id, or a null pointer if no * @return the File object mapped to \a file_id, or a null pointer if no
* mapping exists. * mapping exists.
*/ */
@ -138,11 +219,15 @@ protected:
/** /**
* Evaluate timeout policy for a file and remove the File object mapped to * Evaluate timeout policy for a file and remove the File object mapped to
* \a file_id if needed. * \a file_id if needed.
* @param file_id the file identifier/hash.
* @param is_termination whether the Manager (and probably Bro) is in a
* terminating state. If true, then the timeout cannot be postponed.
*/ */
void Timeout(const string& file_id, bool is_terminating = ::terminating); void Timeout(const string& file_id, bool is_terminating = ::terminating);
/** /**
* Immediately remove file_analysis::File object associated with \a file_id. * Immediately remove file_analysis::File object associated with \a file_id.
* @param file_id the file identifier/hash.
* @return false if file id string did not map to anything, else true. * @return false if file id string did not map to anything, else true.
*/ */
bool RemoveFile(const string& file_id); bool RemoveFile(const string& file_id);
@ -151,11 +236,20 @@ protected:
* Sets #current_file_id to a hash of a unique file handle string based on * Sets #current_file_id to a hash of a unique file handle string based on
* what the \c get_file_handle event derives from the connection params. * what the \c get_file_handle event derives from the connection params.
* Event queue is flushed so that we can get the handle value immediately. * Event queue is flushed so that we can get the handle value immediately.
* @param tag network protocol over which the file is transferred.
* @param conn network connection over which the file is transferred.
* @param is_orig true if the file is being sent from connection originator
* or false if is being sent in the opposite direction.
*/ */
void GetFileHandle(AnalyzerTag::Tag tag, Connection* c, bool is_orig); void GetFileHandle(AnalyzerTag::Tag tag, Connection* c, bool is_orig);
/** /**
* @return whether file analysis is disabled for the given analyzer. * Check if analysis is available for files transferred over a given
* network protocol.
* @param tag the network protocol over which files can be transferred and
* analyzed by the file analysis framework.
* @return whether file analysis is disabled for the analyzer given by
* \a tag.
*/ */
static bool IsDisabled(AnalyzerTag::Tag tag); static bool IsDisabled(AnalyzerTag::Tag tag);