mirror of
https://github.com/zeek/zeek.git
synced 2025-10-02 14:48:21 +00:00
A set of file analysis extensions.
- Enable manager to associate analyzers with a MIME type. With that, one can now say enable all analyzers for, e.g., "image/gif". This is exposed to script-land as Files::add_analyzers_for_mime_type(f: fa_file, mtype: string) For MIME types identified via libmagic, this happens automatically (via the file_new() handler in files/main.bro). - Extend the analyzer API to better match that of protocol analyzers: - Adding unique analyzer IDs so that we can refer to instances from script-land. - Adding subtypes to Components so that a single analyzer implementation can support different types of analyzers internally. - Add an analyzer method SetTag() that allows to set the tag after construction. - Adding Init() and Done() methods for consistency with what other classes offer. - Add debug logging to the file_analysis stream. TODO: test cases missing for the new script-land functionality.
This commit is contained in:
parent
f0fe270029
commit
d34f23c8d4
12 changed files with 337 additions and 30 deletions
|
@ -56,7 +56,7 @@ export {
|
||||||
## local file path which was read, or some other input source.
|
## local file path which was read, or some other input source.
|
||||||
source: string &log &optional;
|
source: string &log &optional;
|
||||||
|
|
||||||
## A value to represent the depth of this file in relation
|
## A value to represent the depth of this file in relation
|
||||||
## to its source. In SMTP, it is the depth of the MIME
|
## to its source. In SMTP, it is the depth of the MIME
|
||||||
## attachment on the message. In HTTP, it is the depth of the
|
## attachment on the message. In HTTP, it is the depth of the
|
||||||
## request within the TCP connection.
|
## request within the TCP connection.
|
||||||
|
@ -72,7 +72,7 @@ export {
|
||||||
mime_type: string &log &optional;
|
mime_type: string &log &optional;
|
||||||
|
|
||||||
## A filename for the file if one is available from the source
|
## A filename for the file if one is available from the source
|
||||||
## for the file. These will frequently come from
|
## for the file. These will frequently come from
|
||||||
## "Content-Disposition" headers in network protocols.
|
## "Content-Disposition" headers in network protocols.
|
||||||
filename: string &log &optional;
|
filename: string &log &optional;
|
||||||
|
|
||||||
|
@ -148,9 +148,18 @@ export {
|
||||||
## Returns: true if the analyzer will be added, or false if analysis
|
## Returns: true if the analyzer will be added, or false if analysis
|
||||||
## for the file isn't currently active or the *args*
|
## for the file isn't currently active or the *args*
|
||||||
## were invalid for the analyzer type.
|
## were invalid for the analyzer type.
|
||||||
global add_analyzer: function(f: fa_file,
|
global add_analyzer: function(f: fa_file,
|
||||||
tag: Files::Tag,
|
tag: Files::Tag,
|
||||||
args: AnalyzerArgs &default=AnalyzerArgs()): bool;
|
args: AnalyzerArgs &default=AnalyzerArgs()): bool;
|
||||||
|
|
||||||
|
## Adds all analyzers associated with a give MIME type to the analysis of
|
||||||
|
## a file. Note that analyzers added via MIME types cannot take further
|
||||||
|
## arguments.
|
||||||
|
##
|
||||||
|
## f: the file.
|
||||||
|
##
|
||||||
|
## mtype: the MIME type; it will be compared case-insensitive.
|
||||||
|
global add_analyzers_for_mime_type: function(f: fa_file, mtype: string);
|
||||||
|
|
||||||
## Removes an analyzer from the analysis of a given file.
|
## Removes an analyzer from the analysis of a given file.
|
||||||
##
|
##
|
||||||
|
@ -195,7 +204,7 @@ export {
|
||||||
## A callback to generate a file handle on demand when
|
## A callback to generate a file handle on demand when
|
||||||
## one is needed by the core.
|
## one is needed by the core.
|
||||||
get_file_handle: function(c: connection, is_orig: bool): string;
|
get_file_handle: function(c: connection, is_orig: bool): string;
|
||||||
|
|
||||||
## A callback to "describe" a file. In the case of an HTTP
|
## A callback to "describe" a file. In the case of an HTTP
|
||||||
## transfer the most obvious description would be the URL.
|
## transfer the most obvious description would be the URL.
|
||||||
## It's like an extremely compressed version of the normal log.
|
## It's like an extremely compressed version of the normal log.
|
||||||
|
@ -206,7 +215,7 @@ export {
|
||||||
## Register callbacks for protocols that work with the Files framework.
|
## Register callbacks for protocols that work with the Files framework.
|
||||||
## The callbacks must uniquely identify a file and each protocol can
|
## The callbacks must uniquely identify a file and each protocol can
|
||||||
## only have a single callback registered for it.
|
## only have a single callback registered for it.
|
||||||
##
|
##
|
||||||
## tag: Tag for the protocol analyzer having a callback being registered.
|
## tag: Tag for the protocol analyzer having a callback being registered.
|
||||||
##
|
##
|
||||||
## reg: A :bro:see:`Files::ProtoRegistration` record.
|
## reg: A :bro:see:`Files::ProtoRegistration` record.
|
||||||
|
@ -258,13 +267,13 @@ function set_info(f: fa_file)
|
||||||
f$info$source = f$source;
|
f$info$source = f$source;
|
||||||
f$info$duration = f$last_active - f$info$ts;
|
f$info$duration = f$last_active - f$info$ts;
|
||||||
f$info$seen_bytes = f$seen_bytes;
|
f$info$seen_bytes = f$seen_bytes;
|
||||||
if ( f?$total_bytes )
|
if ( f?$total_bytes )
|
||||||
f$info$total_bytes = f$total_bytes;
|
f$info$total_bytes = f$total_bytes;
|
||||||
f$info$missing_bytes = f$missing_bytes;
|
f$info$missing_bytes = f$missing_bytes;
|
||||||
f$info$overflow_bytes = f$overflow_bytes;
|
f$info$overflow_bytes = f$overflow_bytes;
|
||||||
if ( f?$is_orig )
|
if ( f?$is_orig )
|
||||||
f$info$is_orig = f$is_orig;
|
f$info$is_orig = f$is_orig;
|
||||||
if ( f?$mime_type )
|
if ( f?$mime_type )
|
||||||
f$info$mime_type = f$mime_type;
|
f$info$mime_type = f$mime_type;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -288,6 +297,15 @@ function add_analyzer(f: fa_file, tag: Files::Tag, args: AnalyzerArgs): bool
|
||||||
return T;
|
return T;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function add_analyzers_for_mime_type(f: fa_file, mtype: string)
|
||||||
|
{
|
||||||
|
local dummy_args: AnalyzerArgs;
|
||||||
|
local analyzers = __add_analyzers_for_mime_type(f$id, mtype, dummy_args);
|
||||||
|
|
||||||
|
for ( tag in analyzers )
|
||||||
|
add f$info$analyzers[Files::analyzer_name(tag)];
|
||||||
|
}
|
||||||
|
|
||||||
function register_analyzer_add_callback(tag: Files::Tag, callback: function(f: fa_file, args: AnalyzerArgs))
|
function register_analyzer_add_callback(tag: Files::Tag, callback: function(f: fa_file, args: AnalyzerArgs))
|
||||||
{
|
{
|
||||||
analyzer_add_callbacks[tag] = callback;
|
analyzer_add_callbacks[tag] = callback;
|
||||||
|
@ -311,6 +329,9 @@ function analyzer_name(tag: Files::Tag): string
|
||||||
event file_new(f: fa_file) &priority=10
|
event file_new(f: fa_file) &priority=10
|
||||||
{
|
{
|
||||||
set_info(f);
|
set_info(f);
|
||||||
|
|
||||||
|
if ( f?$mime_type )
|
||||||
|
add_analyzers_for_mime_type(f, f$mime_type);
|
||||||
}
|
}
|
||||||
|
|
||||||
event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priority=10
|
event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priority=10
|
||||||
|
|
|
@ -60,6 +60,13 @@ type addr_vec: vector of addr;
|
||||||
## directly and then remove this alias.
|
## directly and then remove this alias.
|
||||||
type table_string_of_string: table[string] of string;
|
type table_string_of_string: table[string] of string;
|
||||||
|
|
||||||
|
## A set of file analyzer tags.
|
||||||
|
##
|
||||||
|
## .. todo:: We need this type definition only for declaring builtin functions
|
||||||
|
## via ``bifcl``. We should extend ``bifcl`` to understand composite types
|
||||||
|
## directly and then remove this alias.
|
||||||
|
type files_tag_set: set[Files::Tag];
|
||||||
|
|
||||||
## A connection's transport-layer protocol. Note that Bro uses the term
|
## A connection's transport-layer protocol. Note that Bro uses the term
|
||||||
## "connection" broadly, using flow semantics for ICMP and UDP.
|
## "connection" broadly, using flow semantics for ICMP and UDP.
|
||||||
type transport_proto: enum {
|
type transport_proto: enum {
|
||||||
|
|
|
@ -3,9 +3,17 @@
|
||||||
#include "Analyzer.h"
|
#include "Analyzer.h"
|
||||||
#include "Manager.h"
|
#include "Manager.h"
|
||||||
|
|
||||||
|
file_analysis::ID file_analysis::Analyzer::id_counter = 0;
|
||||||
|
|
||||||
file_analysis::Analyzer::~Analyzer()
|
file_analysis::Analyzer::~Analyzer()
|
||||||
{
|
{
|
||||||
DBG_LOG(DBG_FILE_ANALYSIS, "Destroy file analyzer %s",
|
DBG_LOG(DBG_FILE_ANALYSIS, "Destroy file analyzer %s",
|
||||||
file_mgr->GetComponentName(tag));
|
file_mgr->GetComponentName(tag));
|
||||||
Unref(args);
|
Unref(args);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void file_analysis::Analyzer::SetAnalyzerTag(const file_analysis::Tag& arg_tag)
|
||||||
|
{
|
||||||
|
assert(! tag || tag == arg_tag);
|
||||||
|
tag = arg_tag;
|
||||||
|
}
|
||||||
|
|
|
@ -13,6 +13,8 @@ namespace file_analysis {
|
||||||
|
|
||||||
class File;
|
class File;
|
||||||
|
|
||||||
|
typedef uint32 ID;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Base class for analyzers that can be attached to file_analysis::File objects.
|
* Base class for analyzers that can be attached to file_analysis::File objects.
|
||||||
*/
|
*/
|
||||||
|
@ -25,6 +27,18 @@ public:
|
||||||
*/
|
*/
|
||||||
virtual ~Analyzer();
|
virtual ~Analyzer();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initializes the analyzer before input processing starts.
|
||||||
|
*/
|
||||||
|
virtual void Init()
|
||||||
|
{ };
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Finishes the analyzer's operation after all input has been parsed.
|
||||||
|
*/
|
||||||
|
virtual void Done()
|
||||||
|
{ };
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Subclasses may override this metod to receive file data non-sequentially.
|
* Subclasses may override this metod to receive file data non-sequentially.
|
||||||
* @param data points to start of a chunk of file data.
|
* @param data points to start of a chunk of file data.
|
||||||
|
@ -72,6 +86,13 @@ public:
|
||||||
*/
|
*/
|
||||||
file_analysis::Tag Tag() const { return tag; }
|
file_analysis::Tag Tag() const { return tag; }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the analyzer instance's internal ID. These IDs are unique
|
||||||
|
* across all analyzers instantiated and can thus be used to
|
||||||
|
* indentify a specific instance.
|
||||||
|
*/
|
||||||
|
ID GetID() const { return id; }
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return the AnalyzerArgs associated with the analyzer.
|
* @return the AnalyzerArgs associated with the analyzer.
|
||||||
*/
|
*/
|
||||||
|
@ -82,10 +103,19 @@ public:
|
||||||
*/
|
*/
|
||||||
File* GetFile() const { return file; }
|
File* GetFile() const { return file; }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the tag associated with the analyzer's type. Note that this
|
||||||
|
* can be called only right after construction, if the constructor
|
||||||
|
* did not receive a name or tag. The method cannot be used to change
|
||||||
|
* an existing tag.
|
||||||
|
*/
|
||||||
|
void SetAnalyzerTag(const file_analysis::Tag& tag);
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Constructor. Only derived classes are meant to be instantiated.
|
* Constructor. Only derived classes are meant to be instantiated.
|
||||||
|
* @param arg_tag the tag definining the analyzer's type.
|
||||||
* @param arg_args an \c AnalyzerArgs (script-layer type) value specifiying
|
* @param arg_args an \c AnalyzerArgs (script-layer type) value specifiying
|
||||||
* tunable options, if any, related to a particular analyzer type.
|
* tunable options, if any, related to a particular analyzer type.
|
||||||
* @param arg_file the file to which the the analyzer is being attached.
|
* @param arg_file the file to which the the analyzer is being attached.
|
||||||
|
@ -94,13 +124,35 @@ protected:
|
||||||
: tag(arg_tag),
|
: tag(arg_tag),
|
||||||
args(arg_args->Ref()->AsRecordVal()),
|
args(arg_args->Ref()->AsRecordVal()),
|
||||||
file(arg_file)
|
file(arg_file)
|
||||||
{}
|
{
|
||||||
|
id = ++id_counter;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructor. Only derived classes are meant to be instantiated.
|
||||||
|
* As this version of the constructor does not receive a name or tag,
|
||||||
|
* SetAnalyzerTag() must be called before the instance can be used.
|
||||||
|
*
|
||||||
|
* @param arg_args an \c AnalyzerArgs (script-layer type) value specifiying
|
||||||
|
* tunable options, if any, related to a particular analyzer type.
|
||||||
|
* @param arg_file the file to which the the analyzer is being attached.
|
||||||
|
*/
|
||||||
|
Analyzer(RecordVal* arg_args, File* arg_file)
|
||||||
|
: tag(),
|
||||||
|
args(arg_args->Ref()->AsRecordVal()),
|
||||||
|
file(arg_file)
|
||||||
|
{
|
||||||
|
id = ++id_counter;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
||||||
|
ID id; /**< Unique instance ID. */
|
||||||
file_analysis::Tag tag; /**< The particular type of the analyzer instance. */
|
file_analysis::Tag tag; /**< The particular type of the analyzer instance. */
|
||||||
RecordVal* args; /**< \c AnalyzerArgs val gives tunable analyzer params. */
|
RecordVal* args; /**< \c AnalyzerArgs val gives tunable analyzer params. */
|
||||||
File* file; /**< The file to which the analyzer is attached. */
|
File* file; /**< The file to which the analyzer is attached. */
|
||||||
|
|
||||||
|
static ID id_counter;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace file_analysis
|
} // namespace file_analysis
|
||||||
|
|
|
@ -9,7 +9,10 @@ using namespace file_analysis;
|
||||||
|
|
||||||
static void analyzer_del_func(void* v)
|
static void analyzer_del_func(void* v)
|
||||||
{
|
{
|
||||||
delete (file_analysis::Analyzer*) v;
|
file_analysis::Analyzer* a = (file_analysis::Analyzer*)v;
|
||||||
|
|
||||||
|
a->Done();
|
||||||
|
delete a;
|
||||||
}
|
}
|
||||||
|
|
||||||
AnalyzerSet::AnalyzerSet(File* arg_file) : file(arg_file)
|
AnalyzerSet::AnalyzerSet(File* arg_file) : file(arg_file)
|
||||||
|
@ -98,6 +101,7 @@ bool AnalyzerSet::AddMod::Perform(AnalyzerSet* set)
|
||||||
}
|
}
|
||||||
|
|
||||||
set->Insert(a, key);
|
set->Insert(a, key);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -124,7 +128,9 @@ bool AnalyzerSet::Remove(file_analysis::Tag tag, HashKey* key)
|
||||||
file_mgr->GetComponentName(tag),
|
file_mgr->GetComponentName(tag),
|
||||||
file->GetID().c_str());
|
file->GetID().c_str());
|
||||||
|
|
||||||
|
a->Done();
|
||||||
delete a;
|
delete a;
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -176,6 +182,8 @@ void AnalyzerSet::Insert(file_analysis::Analyzer* a, HashKey* key)
|
||||||
file_mgr->GetComponentName(a->Tag()), file->GetID().c_str());
|
file_mgr->GetComponentName(a->Tag()), file->GetID().c_str());
|
||||||
analyzer_map.Insert(key, a);
|
analyzer_map.Insert(key, a);
|
||||||
delete key;
|
delete key;
|
||||||
|
|
||||||
|
a->Init();
|
||||||
}
|
}
|
||||||
|
|
||||||
void AnalyzerSet::DrainModifications()
|
void AnalyzerSet::DrainModifications()
|
||||||
|
|
|
@ -8,13 +8,15 @@
|
||||||
|
|
||||||
using namespace file_analysis;
|
using namespace file_analysis;
|
||||||
|
|
||||||
Component::Component(const char* arg_name, factory_callback arg_factory)
|
Component::Component(const char* arg_name, factory_callback arg_factory, Tag::subtype_t subtype)
|
||||||
: plugin::Component(plugin::component::FILE_ANALYZER),
|
: plugin::Component(plugin::component::FILE_ANALYZER),
|
||||||
plugin::TaggedComponent<file_analysis::Tag>()
|
plugin::TaggedComponent<file_analysis::Tag>(subtype)
|
||||||
{
|
{
|
||||||
name = copy_string(arg_name);
|
name = copy_string(arg_name);
|
||||||
canon_name = canonify_name(arg_name);
|
canon_name = canonify_name(arg_name);
|
||||||
factory = arg_factory;
|
factory = arg_factory;
|
||||||
|
|
||||||
|
file_mgr->RegisterComponent(this, "ANALYZER_");
|
||||||
}
|
}
|
||||||
|
|
||||||
Component::Component(const Component& other)
|
Component::Component(const Component& other)
|
||||||
|
@ -24,6 +26,8 @@ Component::Component(const Component& other)
|
||||||
name = copy_string(other.name);
|
name = copy_string(other.name);
|
||||||
canon_name = copy_string(other.canon_name);
|
canon_name = copy_string(other.canon_name);
|
||||||
factory = other.factory;
|
factory = other.factory;
|
||||||
|
|
||||||
|
// TODO: Do we need the RegisterComponent() call here?
|
||||||
}
|
}
|
||||||
|
|
||||||
Component::~Component()
|
Component::~Component()
|
||||||
|
|
|
@ -40,8 +40,14 @@ public:
|
||||||
* from file_analysis::Analyzer. This is typically a static \c
|
* from file_analysis::Analyzer. This is typically a static \c
|
||||||
* Instatiate() method inside the class that just allocates and
|
* Instatiate() method inside the class that just allocates and
|
||||||
* returns a new instance.
|
* returns a new instance.
|
||||||
|
*
|
||||||
|
* @param subtype A subtype associated with this component that
|
||||||
|
* further distinguishes it. The subtype will be integrated into the
|
||||||
|
* analyzer::Tag that the manager associates with this analyzer, and
|
||||||
|
* analyzer instances can accordingly access it via analyzer::Tag().
|
||||||
|
* If not used, leave at zero.
|
||||||
*/
|
*/
|
||||||
Component(const char* name, factory_callback factory);
|
Component(const char* name, factory_callback factory, Tag::subtype_t subtype = 0);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Copy constructor.
|
* Copy constructor.
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
// See the file "COPYING" in the main distribution directory for copyright.
|
// See the file "COPYING" in the main distribution directory for copyright.
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
|
#include <algorithm>
|
||||||
|
|
||||||
#include "File.h"
|
#include "File.h"
|
||||||
#include "FileTimer.h"
|
#include "FileTimer.h"
|
||||||
|
@ -82,7 +83,7 @@ File::File(const string& file_id, Connection* conn, analyzer::Tag tag,
|
||||||
{
|
{
|
||||||
StaticInit();
|
StaticInit();
|
||||||
|
|
||||||
DBG_LOG(DBG_FILE_ANALYSIS, "Creating new File object %s", file_id.c_str());
|
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Creating new File object", file_id.c_str());
|
||||||
|
|
||||||
val = new RecordVal(fa_file_type);
|
val = new RecordVal(fa_file_type);
|
||||||
val->Assign(id_idx, new StringVal(file_id.c_str()));
|
val->Assign(id_idx, new StringVal(file_id.c_str()));
|
||||||
|
@ -100,7 +101,7 @@ File::File(const string& file_id, Connection* conn, analyzer::Tag tag,
|
||||||
|
|
||||||
File::~File()
|
File::~File()
|
||||||
{
|
{
|
||||||
DBG_LOG(DBG_FILE_ANALYSIS, "Destroying File object %s", id.c_str());
|
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Destroying File object", id.c_str());
|
||||||
Unref(val);
|
Unref(val);
|
||||||
|
|
||||||
// Queue may not be empty in the case where only content gaps were seen.
|
// Queue may not be empty in the case where only content gaps were seen.
|
||||||
|
@ -229,6 +230,7 @@ void File::IncrementByteCount(uint64 size, int field_idx)
|
||||||
|
|
||||||
void File::SetTotalBytes(uint64 size)
|
void File::SetTotalBytes(uint64 size)
|
||||||
{
|
{
|
||||||
|
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Total bytes %" PRIu64, id.c_str(), size);
|
||||||
val->Assign(total_bytes_idx, new Val(size, TYPE_COUNT));
|
val->Assign(total_bytes_idx, new Val(size, TYPE_COUNT));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -251,11 +253,17 @@ void File::ScheduleInactivityTimer() const
|
||||||
|
|
||||||
bool File::AddAnalyzer(file_analysis::Tag tag, RecordVal* args)
|
bool File::AddAnalyzer(file_analysis::Tag tag, RecordVal* args)
|
||||||
{
|
{
|
||||||
|
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Queuing addition of %s analyzer",
|
||||||
|
id.c_str(), file_mgr->GetComponentName(tag));
|
||||||
|
|
||||||
return done ? false : analyzers.QueueAdd(tag, args);
|
return done ? false : analyzers.QueueAdd(tag, args);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool File::RemoveAnalyzer(file_analysis::Tag tag, RecordVal* args)
|
bool File::RemoveAnalyzer(file_analysis::Tag tag, RecordVal* args)
|
||||||
{
|
{
|
||||||
|
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Queuing remove of %s analyzer",
|
||||||
|
id.c_str(), file_mgr->GetComponentName(tag));
|
||||||
|
|
||||||
return done ? false : analyzers.QueueRemove(tag, args);
|
return done ? false : analyzers.QueueRemove(tag, args);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -284,16 +292,18 @@ bool File::DetectMIME(const u_char* data, uint64 len)
|
||||||
|
|
||||||
if ( mime )
|
if ( mime )
|
||||||
{
|
{
|
||||||
|
// strip off charset
|
||||||
const char* mime_end = strchr(mime, ';');
|
const char* mime_end = strchr(mime, ';');
|
||||||
|
|
||||||
if ( mime_end )
|
StringVal* mime_val = mime_end ?
|
||||||
// strip off charset
|
new StringVal(mime_end - mime, mime) :
|
||||||
val->Assign(mime_type_idx, new StringVal(mime_end - mime, mime));
|
new StringVal(mime);
|
||||||
else
|
|
||||||
val->Assign(mime_type_idx, new StringVal(mime));
|
val->Assign(mime_type_idx, mime_val);
|
||||||
|
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Detected MIME type %s", id.c_str(), mime_val->CheckString());
|
||||||
}
|
}
|
||||||
|
|
||||||
return mime;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void File::ReplayBOF()
|
void File::ReplayBOF()
|
||||||
|
@ -314,7 +324,6 @@ void File::ReplayBOF()
|
||||||
val->Assign(bof_buffer_idx, new StringVal(bs));
|
val->Assign(bof_buffer_idx, new StringVal(bs));
|
||||||
|
|
||||||
DetectMIME(bs->Bytes(), bs->Len());
|
DetectMIME(bs->Bytes(), bs->Len());
|
||||||
|
|
||||||
FileEvent(file_new);
|
FileEvent(file_new);
|
||||||
|
|
||||||
for ( size_t i = 0; i < bof_buffer.chunks.size(); ++i )
|
for ( size_t i = 0; i < bof_buffer.chunks.size(); ++i )
|
||||||
|
@ -333,6 +342,11 @@ void File::DataIn(const u_char* data, uint64 len, uint64 offset)
|
||||||
first_chunk = false;
|
first_chunk = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] %" PRIu64 " bytes in at offset" PRIu64 "; %s [%s]",
|
||||||
|
id.c_str(), len, offset,
|
||||||
|
IsComplete() ? "complete" : "incomplete",
|
||||||
|
fmt_bytes((const char*) data, min((uint64)40, len)), len > 40 ? "..." : "");
|
||||||
|
|
||||||
file_analysis::Analyzer* a = 0;
|
file_analysis::Analyzer* a = 0;
|
||||||
IterCookie* c = analyzers.InitForIteration();
|
IterCookie* c = analyzers.InitForIteration();
|
||||||
|
|
||||||
|
@ -367,6 +381,11 @@ void File::DataIn(const u_char* data, uint64 len)
|
||||||
missed_bof = false;
|
missed_bof = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] %" PRIu64 " bytes in; %s [%s]",
|
||||||
|
id.c_str(), len,
|
||||||
|
IsComplete() ? "complete" : "incomplete",
|
||||||
|
fmt_bytes((const char*) data, min((uint64)40, len)), len > 40 ? "..." : "");
|
||||||
|
|
||||||
file_analysis::Analyzer* a = 0;
|
file_analysis::Analyzer* a = 0;
|
||||||
IterCookie* c = analyzers.InitForIteration();
|
IterCookie* c = analyzers.InitForIteration();
|
||||||
|
|
||||||
|
@ -391,6 +410,8 @@ void File::DataIn(const u_char* data, uint64 len)
|
||||||
|
|
||||||
void File::EndOfFile()
|
void File::EndOfFile()
|
||||||
{
|
{
|
||||||
|
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] End of file", id.c_str());
|
||||||
|
|
||||||
if ( done )
|
if ( done )
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
@ -417,6 +438,9 @@ void File::EndOfFile()
|
||||||
|
|
||||||
void File::Gap(uint64 offset, uint64 len)
|
void File::Gap(uint64 offset, uint64 len)
|
||||||
{
|
{
|
||||||
|
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Gap of size %" PRIu64 " at offset %" PRIu64,
|
||||||
|
id.c_str(), offset, len);
|
||||||
|
|
||||||
analyzers.DrainModifications();
|
analyzers.DrainModifications();
|
||||||
|
|
||||||
// If we were buffering the beginning of the file, a gap means we've got
|
// If we were buffering the beginning of the file, a gap means we've got
|
||||||
|
|
|
@ -229,7 +229,7 @@ protected:
|
||||||
* field in #val.
|
* field in #val.
|
||||||
* @param data pointer to a chunk of file data.
|
* @param data pointer to a chunk of file data.
|
||||||
* @param len number of bytes in the data chunk.
|
* @param len number of bytes in the data chunk.
|
||||||
* @return whether mime type was available.
|
* @return true if mime type available.
|
||||||
*/
|
*/
|
||||||
bool DetectMIME(const u_char* data, uint64 len);
|
bool DetectMIME(const u_char* data, uint64 len);
|
||||||
|
|
||||||
|
|
|
@ -12,10 +12,12 @@
|
||||||
#include "UID.h"
|
#include "UID.h"
|
||||||
|
|
||||||
#include "plugin/Manager.h"
|
#include "plugin/Manager.h"
|
||||||
|
#include "analyzer/Manager.h"
|
||||||
|
|
||||||
using namespace file_analysis;
|
using namespace file_analysis;
|
||||||
|
|
||||||
TableVal* Manager::disabled = 0;
|
TableVal* Manager::disabled = 0;
|
||||||
|
TableType* Manager::tag_set_type = 0;
|
||||||
string Manager::salt;
|
string Manager::salt;
|
||||||
|
|
||||||
Manager::Manager()
|
Manager::Manager()
|
||||||
|
@ -27,15 +29,13 @@ Manager::Manager()
|
||||||
Manager::~Manager()
|
Manager::~Manager()
|
||||||
{
|
{
|
||||||
Terminate();
|
Terminate();
|
||||||
|
|
||||||
|
for ( MIMEMap::iterator i = mime_types.begin(); i != mime_types.end(); i++ )
|
||||||
|
delete i->second;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Manager::InitPreScript()
|
void Manager::InitPreScript()
|
||||||
{
|
{
|
||||||
std::list<Component*> analyzers = plugin_mgr->Components<Component>();
|
|
||||||
|
|
||||||
for ( std::list<Component*>::const_iterator i = analyzers.begin();
|
|
||||||
i != analyzers.end(); ++i )
|
|
||||||
RegisterComponent(*i, "ANALYZER_");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void Manager::InitPostScript()
|
void Manager::InitPostScript()
|
||||||
|
@ -72,6 +72,7 @@ void Manager::SetHandle(const string& handle)
|
||||||
if ( handle.empty() )
|
if ( handle.empty() )
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
DBG_LOG(DBG_FILE_ANALYSIS, "Set current handle to %s", handle.c_str());
|
||||||
current_file_id = HashHandle(handle);
|
current_file_id = HashHandle(handle);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -205,6 +206,28 @@ bool Manager::AddAnalyzer(const string& file_id, file_analysis::Tag tag,
|
||||||
return file->AddAnalyzer(tag, args);
|
return file->AddAnalyzer(tag, args);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TableVal* Manager::AddAnalyzersForMIMEType(const string& file_id, const string& mtype,
|
||||||
|
RecordVal* args)
|
||||||
|
{
|
||||||
|
if ( ! tag_set_type )
|
||||||
|
tag_set_type = internal_type("files_tag_set")->AsTableType();
|
||||||
|
|
||||||
|
TableVal* sval = new TableVal(tag_set_type);
|
||||||
|
TagSet* l = LookupMIMEType(mtype, false);
|
||||||
|
|
||||||
|
if ( ! l )
|
||||||
|
return sval;
|
||||||
|
|
||||||
|
for ( TagSet::const_iterator i = l->begin(); i != l->end(); i++ )
|
||||||
|
{
|
||||||
|
file_analysis::Tag tag = *i;
|
||||||
|
if ( AddAnalyzer(file_id, tag, args) )
|
||||||
|
sval->Assign(tag.AsEnumVal(), 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
return sval;
|
||||||
|
}
|
||||||
|
|
||||||
bool Manager::RemoveAnalyzer(const string& file_id, file_analysis::Tag tag,
|
bool Manager::RemoveAnalyzer(const string& file_id, file_analysis::Tag tag,
|
||||||
RecordVal* args) const
|
RecordVal* args) const
|
||||||
{
|
{
|
||||||
|
@ -327,6 +350,9 @@ void Manager::GetFileHandle(analyzer::Tag tag, Connection* c, bool is_orig)
|
||||||
if ( ! get_file_handle )
|
if ( ! get_file_handle )
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
DBG_LOG(DBG_FILE_ANALYSIS, "Raise get_file_handle() for protocol analyzer %s",
|
||||||
|
analyzer_mgr->GetComponentName(tag));
|
||||||
|
|
||||||
EnumVal* tagval = tag.AsEnumVal();
|
EnumVal* tagval = tag.AsEnumVal();
|
||||||
Ref(tagval);
|
Ref(tagval);
|
||||||
|
|
||||||
|
@ -376,5 +402,72 @@ Analyzer* Manager::InstantiateAnalyzer(Tag tag, RecordVal* args, File* f) const
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
return c->Factory()(args, f);
|
DBG_LOG(DBG_FILE_ANALYSIS, "Instantiate analyzer %s for file %s",
|
||||||
|
GetComponentName(tag), f->id.c_str());
|
||||||
|
|
||||||
|
Analyzer* a = c->Factory()(args, f);
|
||||||
|
|
||||||
|
if ( ! a )
|
||||||
|
reporter->InternalError("file analyzer instantiation failed");
|
||||||
|
|
||||||
|
a->SetAnalyzerTag(tag);
|
||||||
|
|
||||||
|
return a;
|
||||||
|
}
|
||||||
|
|
||||||
|
Manager::TagSet* Manager::LookupMIMEType(const string& mtype, bool add_if_not_found)
|
||||||
|
{
|
||||||
|
MIMEMap::const_iterator i = mime_types.find(to_upper(mtype));
|
||||||
|
|
||||||
|
if ( i != mime_types.end() )
|
||||||
|
return i->second;
|
||||||
|
|
||||||
|
if ( ! add_if_not_found )
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
TagSet* l = new TagSet;
|
||||||
|
mime_types.insert(std::make_pair(to_upper(mtype), l));
|
||||||
|
return l;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Manager::RegisterAnalyzerForMIMEType(EnumVal* tag, StringVal* mtype)
|
||||||
|
{
|
||||||
|
Component* p = Lookup(tag);
|
||||||
|
|
||||||
|
if ( ! p )
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return RegisterAnalyzerForMIMEType(p->Tag(), mtype->CheckString());
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Manager::RegisterAnalyzerForMIMEType(Tag tag, const string& mtype)
|
||||||
|
{
|
||||||
|
TagSet* l = LookupMIMEType(mtype, true);
|
||||||
|
|
||||||
|
DBG_LOG(DBG_FILE_ANALYSIS, "Register analyzer %s for MIME type %s",
|
||||||
|
GetComponentName(tag), mtype.c_str());
|
||||||
|
|
||||||
|
l->insert(tag);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Manager::UnregisterAnalyzerForMIMEType(EnumVal* tag, StringVal* mtype)
|
||||||
|
{
|
||||||
|
Component* p = Lookup(tag);
|
||||||
|
|
||||||
|
if ( ! p )
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return UnregisterAnalyzerForMIMEType(p->Tag(), mtype->CheckString());
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Manager::UnregisterAnalyzerForMIMEType(Tag tag, const string& mtype)
|
||||||
|
{
|
||||||
|
TagSet* l = LookupMIMEType(mtype, true);
|
||||||
|
|
||||||
|
DBG_LOG(DBG_FILE_ANALYSIS, "Unregister analyzer %s for MIME type %s",
|
||||||
|
GetComponentName(tag), mtype.c_str());
|
||||||
|
|
||||||
|
l->erase(tag);
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
|
@ -198,6 +198,18 @@ public:
|
||||||
bool AddAnalyzer(const string& file_id, file_analysis::Tag tag,
|
bool AddAnalyzer(const string& file_id, file_analysis::Tag tag,
|
||||||
RecordVal* args) const;
|
RecordVal* args) const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Queue attachment of an all analyzers associated with a given MIME
|
||||||
|
* type to the file identifier.
|
||||||
|
*
|
||||||
|
* @param file_id the file identifier/hash.
|
||||||
|
* @param mtype the MIME type; comparisions will be performanced case-insensitive.
|
||||||
|
* @param args a \c AnalyzerArgs value which describes a file analyzer.
|
||||||
|
* @return A ref'ed \c set[Tag] with all added analyzers.
|
||||||
|
*/
|
||||||
|
TableVal* AddAnalyzersForMIMEType(const string& file_id, const string& mtype,
|
||||||
|
RecordVal* args);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Queue removal of an analyzer for a given file identifier.
|
* Queue removal of an analyzer for a given file identifier.
|
||||||
* @param file_id the file identifier/hash.
|
* @param file_id the file identifier/hash.
|
||||||
|
@ -224,6 +236,62 @@ public:
|
||||||
*/
|
*/
|
||||||
Analyzer* InstantiateAnalyzer(Tag tag, RecordVal* args, File* f) const;
|
Analyzer* InstantiateAnalyzer(Tag tag, RecordVal* args, File* f) const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Registers a MIME type for an analyzer. Once registered, files of
|
||||||
|
* that MIME type will automatically get a corresponding analyzer
|
||||||
|
* assigned.
|
||||||
|
*
|
||||||
|
* @param tag The analyzer's tag as an enum of script type \c
|
||||||
|
* Files::Tag.
|
||||||
|
*
|
||||||
|
* @param mtype The MIME type. It will be matched case-insenistive.
|
||||||
|
*
|
||||||
|
* @return True if successful.
|
||||||
|
*/
|
||||||
|
bool RegisterAnalyzerForMIMEType(EnumVal* tag, StringVal* mtype);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Registers a MIME type for an analyzer. Once registered, files of
|
||||||
|
* that MIME type will automatically get a corresponding analyzer
|
||||||
|
* assigned.
|
||||||
|
*
|
||||||
|
* @param tag The analyzer's tag as an enum of script type \c
|
||||||
|
* Files::Tag.
|
||||||
|
*
|
||||||
|
* @param mtype The MIME type. It will be matched case-insenistive.
|
||||||
|
*
|
||||||
|
* @return True if successful.
|
||||||
|
*/
|
||||||
|
bool RegisterAnalyzerForMIMEType(Tag tag, const string& mtype);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Unregisters a MIME type for an analyzer.
|
||||||
|
*
|
||||||
|
* @param tag The analyzer's tag as an enum of script type \c
|
||||||
|
* Files::Tag.
|
||||||
|
*
|
||||||
|
* @param mtype The MIME type. It will be matched case-insenistive.
|
||||||
|
*
|
||||||
|
* @return True if successful (incl. when the type wasn't actually
|
||||||
|
* registered for the analyzer).
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
bool UnregisterAnalyzerForMIMEType(EnumVal* tag, StringVal* mtype);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Unregisters a MIME type for an analyzer.
|
||||||
|
*
|
||||||
|
* @param tag The analyzer's tag as an enum of script type \c
|
||||||
|
* Files::Tag.
|
||||||
|
*
|
||||||
|
* @param mtype The MIME type. It will be matched case-insenistive.
|
||||||
|
*
|
||||||
|
* @return True if successful (incl. when the type wasn't actually
|
||||||
|
* registered for the analyzer).
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
bool UnregisterAnalyzerForMIMEType(Tag tag, const string& mtype);
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
friend class FileTimer;
|
friend class FileTimer;
|
||||||
|
|
||||||
|
@ -297,12 +365,18 @@ protected:
|
||||||
static bool IsDisabled(analyzer::Tag tag);
|
static bool IsDisabled(analyzer::Tag tag);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
typedef set<Tag> TagSet;
|
||||||
|
typedef map<string, TagSet*> MIMEMap;
|
||||||
|
|
||||||
|
TagSet* LookupMIMEType(const string& mtype, bool add_if_not_found);
|
||||||
|
|
||||||
IDMap id_map; /**< Map file ID to file_analysis::File records. */
|
IDMap id_map; /**< Map file ID to file_analysis::File records. */
|
||||||
IDSet ignored; /**< Ignored files. Will be finally removed on EOF. */
|
IDSet ignored; /**< Ignored files. Will be finally removed on EOF. */
|
||||||
string current_file_id; /**< Hash of what get_file_handle event sets. */
|
string current_file_id; /**< Hash of what get_file_handle event sets. */
|
||||||
|
MIMEMap mime_types;/**< Mapping of MIME types to analyzers. */
|
||||||
|
|
||||||
static TableVal* disabled; /**< Table of disabled analyzers. */
|
static TableVal* disabled; /**< Table of disabled analyzers. */
|
||||||
|
static TableType* tag_set_type; /**< Type for set[tag]. */
|
||||||
static string salt; /**< A salt added to file handles before hashing. */
|
static string salt; /**< A salt added to file handles before hashing. */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -26,6 +26,16 @@ function Files::__add_analyzer%(file_id: string, tag: Files::Tag, args: any%): b
|
||||||
return new Val(result, TYPE_BOOL);
|
return new Val(result, TYPE_BOOL);
|
||||||
%}
|
%}
|
||||||
|
|
||||||
|
## :bro:see:`Files::add_analyzers_for_mime_type`.
|
||||||
|
function Files::__add_analyzers_for_mime_type%(file_id: string, mtype: string, args: any%): files_tag_set
|
||||||
|
%{
|
||||||
|
using BifType::Record::Files::AnalyzerArgs;
|
||||||
|
RecordVal* rv = args->AsRecordVal()->CoerceTo(AnalyzerArgs);
|
||||||
|
Val* analyzers = file_mgr->AddAnalyzersForMIMEType(file_id->CheckString(), mtype->CheckString(), rv);
|
||||||
|
Unref(rv);
|
||||||
|
return analyzers;
|
||||||
|
%}
|
||||||
|
|
||||||
## :bro:see:`Files::remove_analyzer`.
|
## :bro:see:`Files::remove_analyzer`.
|
||||||
function Files::__remove_analyzer%(file_id: string, tag: Files::Tag, args: any%): bool
|
function Files::__remove_analyzer%(file_id: string, tag: Files::Tag, args: any%): bool
|
||||||
%{
|
%{
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue