Merge remote-tracking branch 'origin/master' into topic/seth/sumstats-updates

This commit is contained in:
Seth Hall 2013-08-02 13:17:48 -04:00
commit d6edbd27b1
96 changed files with 3085 additions and 839 deletions

50
CHANGES
View file

@ -1,4 +1,54 @@
2.1-1007 | 2013-08-01 15:41:54 -0700
* More function documentation. (Bernhard Amann)
2.1-1004 | 2013-08-01 14:37:43 -0700
* Adding a probabilistic data structure for computing "top k"
elements. (Bernhard Amann)
The corresponding functions are:
topk_init(size: count): opaque of topk
topk_add(handle: opaque of topk, value: any)
topk_get_top(handle: opaque of topk, k: count)
topk_count(handle: opaque of topk, value: any): count
topk_epsilon(handle: opaque of topk, value: any): count
topk_size(handle: opaque of topk): count
topk_sum(handle: opaque of topk): count
topk_merge(handle1: opaque of topk, handle2: opaque of topk)
topk_merge_prune(handle1: opaque of topk, handle2: opaque of topk)
2.1-971 | 2013-08-01 13:28:32 -0700
* Fix some build errors. (Jon Siwek)
* Internal refactoring of how plugin components are tagged/managed.
(Jon Siwek)
* Fix various documentation, mostly related to file analysis. (Jon
Siwek)
* Changing the Bloom filter hashing so that it's independent of
CompositeHash. (Robin Sommer)
2.1-951 | 2013-08-01 11:19:23 -0400
* Small fix to deal with a bug in the SSL log delay mechanism.
2.1-948 | 2013-07-31 20:08:28 -0700
* Fix segfault caused by merging an empty bloom-filter with a
bloom-filter already containing values. (Bernhard Amann)
2.1-945 | 2013-07-30 10:05:10 -0700
* Make hashers serializable. (Matthias Vallentin)
* Add docs and use default value for hasher names. (Matthias
Vallentin)
2.1-939 | 2013-07-29 15:42:38 -0700 2.1-939 | 2013-07-29 15:42:38 -0700
* Added Exec, Dir, and ActiveHTTP modules. (Seth Hall) * Added Exec, Dir, and ActiveHTTP modules. (Seth Hall)

16
NEWS
View file

@ -113,6 +113,7 @@ New Functionality
the frequency of elements. The corresponding functions are: the frequency of elements. The corresponding functions are:
bloomfilter_basic_init(fp: double, capacity: count, name: string &default=""): opaque of bloomfilter bloomfilter_basic_init(fp: double, capacity: count, name: string &default=""): opaque of bloomfilter
bloomfilter_basic_init2(k: count, cells: count, name: string &default=""): opaque of bloomfilter
bloomfilter_counting_init(k: count, cells: count, max: count, name: string &default=""): opaque of bloomfilter bloomfilter_counting_init(k: count, cells: count, max: count, name: string &default=""): opaque of bloomfilter
bloomfilter_add(bf: opaque of bloomfilter, x: any) bloomfilter_add(bf: opaque of bloomfilter, x: any)
bloomfilter_lookup(bf: opaque of bloomfilter, x: any): count bloomfilter_lookup(bf: opaque of bloomfilter, x: any): count
@ -121,6 +122,21 @@ New Functionality
See <INSERT LINK> for full documentation. See <INSERT LINK> for full documentation.
- Bro now provides a probabilistic data structure for computing
"top k" elements. The corresponding functions are:
topk_init(size: count): opaque of topk
topk_add(handle: opaque of topk, value: any)
topk_get_top(handle: opaque of topk, k: count)
topk_count(handle: opaque of topk, value: any): count
topk_epsilon(handle: opaque of topk, value: any): count
topk_size(handle: opaque of topk): count
topk_sum(handle: opaque of topk): count
topk_merge(handle1: opaque of topk, handle2: opaque of topk)
topk_merge_prune(handle1: opaque of topk, handle2: opaque of topk)
See <INSERT LINK> for full documentation.
- base/utils/exec.bro provides a module to start external processes - base/utils/exec.bro provides a module to start external processes
asynchronously and retrieve their output on termination. asynchronously and retrieve their output on termination.
base/utils/dir.bro uses it to monitor a directory for changes, and base/utils/dir.bro uses it to monitor a directory for changes, and

View file

@ -1 +1 @@
2.1-939 2.1-1007

@ -1 +1 @@
Subproject commit 91d258cc8b2f74cd02fc93dfe61f73ec9f0dd489 Subproject commit d9963983c0b4d426b24836f8d154d014d5aecbba

@ -1 +1 @@
Subproject commit ce366206e3407e534a786ad572c342e9f9fef26b Subproject commit 69606f8f3cc84d694ca1da14868a5fecd4abbc96

View file

@ -82,9 +82,9 @@ attached, they start receiving the contents of the file as Bro extracts
it from an ongoing network connection. What they do with the file it from an ongoing network connection. What they do with the file
contents is up to the particular file analyzer implementation, but contents is up to the particular file analyzer implementation, but
they'll typically either report further information about the file via they'll typically either report further information about the file via
events (e.g. :bro:see:`FileAnalysis::ANALYZER_MD5` will report the events (e.g. :bro:see:`Files::ANALYZER_MD5` will report the
file's MD5 checksum via :bro:see:`file_hash` once calculated) or they'll file's MD5 checksum via :bro:see:`file_hash` once calculated) or they'll
have some side effect (e.g. :bro:see:`FileAnalysis::ANALYZER_EXTRACT` have some side effect (e.g. :bro:see:`Files::ANALYZER_EXTRACT`
will write the contents of the file out to the local file system). will write the contents of the file out to the local file system).
In the future there may be file analyzers that automatically attach to In the future there may be file analyzers that automatically attach to
@ -98,7 +98,7 @@ explicit attachment decision:
{ {
print "new file", f$id; print "new file", f$id;
if ( f?$mime_type && f$mime_type == "text/plain" ) if ( f?$mime_type && f$mime_type == "text/plain" )
FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_MD5]); Files::add_analyzer(f, Files::ANALYZER_MD5);
} }
event file_hash(f: fa_file, kind: string, hash: string) event file_hash(f: fa_file, kind: string, hash: string)
@ -113,26 +113,27 @@ output::
file_hash, Cx92a0ym5R8, md5, 397168fd09991a0e712254df7bc639ac file_hash, Cx92a0ym5R8, md5, 397168fd09991a0e712254df7bc639ac
Some file analyzers might have tunable parameters that need to be Some file analyzers might have tunable parameters that need to be
specified in the call to :bro:see:`FileAnalysis::add_analyzer`: specified in the call to :bro:see:`Files::add_analyzer`:
.. code:: bro .. code:: bro
event file_new(f: fa_file) event file_new(f: fa_file)
{ {
FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT, Files::add_analyzer(f, Files::ANALYZER_EXTRACT,
$extract_filename="./myfile"]); [$extract_filename="myfile"]);
} }
In this case, the file extraction analyzer doesn't generate any further In this case, the file extraction analyzer doesn't generate any further
events, but does have the side effect of writing out the file contents events, but does have the effect of writing out the file contents to the
to the local file system at the specified location of ``./myfile``. Of local file system at the location resulting from the concatenation of
course, for a network with more than a single file being transferred, the path specified by :bro:see:`FileExtract::prefix` and the string,
it's probably preferable to specify a different extraction path for each ``myfile``. Of course, for a network with more than a single file being
file, unlike this example. transferred, it's probably preferable to specify a different extraction
path for each file, unlike this example.
Regardless of which file analyzers end up acting on a file, general Regardless of which file analyzers end up acting on a file, general
information about the file (e.g. size, time of last data transferred, information about the file (e.g. size, time of last data transferred,
MIME type, etc.) are logged in ``file_analysis.log``. MIME type, etc.) are logged in ``files.log``.
Input Framework Integration Input Framework Integration
=========================== ===========================
@ -150,7 +151,7 @@ a network interface it's monitoring. It only requires a call to
event file_new(f: fa_file) event file_new(f: fa_file)
{ {
print "new file", f$id; print "new file", f$id;
FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_MD5]); Files::add_analyzer(f, Files::ANALYZER_MD5);
} }
event file_state_remove(f: fa_file) event file_state_remove(f: fa_file)

View file

@ -47,6 +47,7 @@ Script Reference
scripts/index scripts/index
scripts/builtins scripts/builtins
scripts/proto-analyzers scripts/proto-analyzers
scripts/file-analyzers
Other Bro Components Other Bro Components
-------------------- --------------------

View file

@ -124,8 +124,10 @@ endmacro(REST_TARGET)
# Schedule Bro scripts for which to generate documentation. # Schedule Bro scripts for which to generate documentation.
include(DocSourcesList.cmake) include(DocSourcesList.cmake)
# This reST target is independent of a particular Bro script... # Macro for generating reST docs that are independent of any particular Bro
add_custom_command(OUTPUT proto-analyzers.rst # script.
macro(INDEPENDENT_REST_TARGET reST_file)
add_custom_command(OUTPUT ${reST_file}
# delete any leftover state from previous bro runs # delete any leftover state from previous bro runs
COMMAND "${CMAKE_COMMAND}" COMMAND "${CMAKE_COMMAND}"
ARGS -E remove_directory .state ARGS -E remove_directory .state
@ -137,15 +139,19 @@ add_custom_command(OUTPUT proto-analyzers.rst
COMMAND "${CMAKE_COMMAND}" COMMAND "${CMAKE_COMMAND}"
ARGS -E make_directory ${dstDir} ARGS -E make_directory ${dstDir}
COMMAND "${CMAKE_COMMAND}" COMMAND "${CMAKE_COMMAND}"
ARGS -E copy proto-analyzers.rst ${dstDir} ARGS -E copy ${reST_file} ${dstDir}
# clean up the build directory # clean up the build directory
COMMAND rm COMMAND rm
ARGS -rf .state *.log *.rst ARGS -rf .state *.log *.rst
DEPENDS bro DEPENDS bro
WORKING_DIRECTORY ${CMAKE_BINARY_DIR} WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
COMMENT "[Bro] Generating reST docs for proto-analyzers.rst" COMMENT "[Bro] Generating reST docs for ${reST_file}"
) )
list(APPEND ALL_REST_OUTPUTS proto-analyzers.rst) list(APPEND ALL_REST_OUTPUTS ${reST_file})
endmacro(INDEPENDENT_REST_TARGET)
independent_rest_target(proto-analyzers.rst)
independent_rest_target(file-analyzers.rst)
# create temporary list of all docs to include in the master policy/index file # create temporary list of all docs to include in the master policy/index file
file(WRITE ${MASTER_POLICY_INDEX} "${MASTER_POLICY_INDEX_TEXT}") file(WRITE ${MASTER_POLICY_INDEX} "${MASTER_POLICY_INDEX_TEXT}")

View file

@ -73,6 +73,7 @@ rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/plugins/Bro_UDP.events.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/plugins/Bro_ZIP.events.bif.bro) rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/plugins/Bro_ZIP.events.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/reporter.bif.bro) rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/reporter.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/strings.bif.bro) rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/strings.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/top-k.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/types.bif.bro) rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/types.bif.bro)
rest_target(${psd} base/files/extract/main.bro) rest_target(${psd} base/files/extract/main.bro)
rest_target(${psd} base/files/hash/main.bro) rest_target(${psd} base/files/hash/main.bro)
@ -129,6 +130,7 @@ rest_target(${psd} base/frameworks/sumstats/plugins/min.bro)
rest_target(${psd} base/frameworks/sumstats/plugins/sample.bro) rest_target(${psd} base/frameworks/sumstats/plugins/sample.bro)
rest_target(${psd} base/frameworks/sumstats/plugins/std-dev.bro) rest_target(${psd} base/frameworks/sumstats/plugins/std-dev.bro)
rest_target(${psd} base/frameworks/sumstats/plugins/sum.bro) rest_target(${psd} base/frameworks/sumstats/plugins/sum.bro)
rest_target(${psd} base/frameworks/sumstats/plugins/topk.bro)
rest_target(${psd} base/frameworks/sumstats/plugins/unique.bro) rest_target(${psd} base/frameworks/sumstats/plugins/unique.bro)
rest_target(${psd} base/frameworks/sumstats/plugins/variance.bro) rest_target(${psd} base/frameworks/sumstats/plugins/variance.bro)
rest_target(${psd} base/frameworks/tunnels/main.bro) rest_target(${psd} base/frameworks/tunnels/main.bro)
@ -141,6 +143,7 @@ rest_target(${psd} base/protocols/dns/consts.bro)
rest_target(${psd} base/protocols/dns/main.bro) rest_target(${psd} base/protocols/dns/main.bro)
rest_target(${psd} base/protocols/ftp/files.bro) rest_target(${psd} base/protocols/ftp/files.bro)
rest_target(${psd} base/protocols/ftp/gridftp.bro) rest_target(${psd} base/protocols/ftp/gridftp.bro)
rest_target(${psd} base/protocols/ftp/info.bro)
rest_target(${psd} base/protocols/ftp/main.bro) rest_target(${psd} base/protocols/ftp/main.bro)
rest_target(${psd} base/protocols/ftp/utils-commands.bro) rest_target(${psd} base/protocols/ftp/utils-commands.bro)
rest_target(${psd} base/protocols/ftp/utils.bro) rest_target(${psd} base/protocols/ftp/utils.bro)

View file

@ -204,7 +204,7 @@ export {
## ##
## tag: Tag for the protocol analyzer having a callback being registered. ## tag: Tag for the protocol analyzer having a callback being registered.
## ##
## reg: A :bro:see:`ProtoRegistration` record. ## reg: A :bro:see:`Files::ProtoRegistration` record.
## ##
## Returns: true if the protocol being registered was not previously registered. ## Returns: true if the protocol being registered was not previously registered.
global register_protocol: function(tag: Analyzer::Tag, reg: ProtoRegistration): bool; global register_protocol: function(tag: Analyzer::Tag, reg: ProtoRegistration): bool;
@ -228,11 +228,6 @@ redef record fa_file += {
info: Info &optional; info: Info &optional;
}; };
redef record AnalyzerArgs += {
# This is used interally for the core file analyzer api.
tag: Files::Tag &optional;
};
# Store the callbacks for protocol analyzers that have files. # Store the callbacks for protocol analyzers that have files.
global registered_protocols: table[Analyzer::Tag] of ProtoRegistration = table(); global registered_protocols: table[Analyzer::Tag] of ProtoRegistration = table();
@ -275,14 +270,12 @@ function set_timeout_interval(f: fa_file, t: interval): bool
function add_analyzer(f: fa_file, tag: Files::Tag, args: AnalyzerArgs): bool function add_analyzer(f: fa_file, tag: Files::Tag, args: AnalyzerArgs): bool
{ {
# This is to construct the correct args for the core API.
args$tag = tag;
add f$info$analyzers[Files::analyzer_name(tag)]; add f$info$analyzers[Files::analyzer_name(tag)];
if ( tag in analyzer_add_callbacks ) if ( tag in analyzer_add_callbacks )
analyzer_add_callbacks[tag](f, args); analyzer_add_callbacks[tag](f, args);
if ( ! __add_analyzer(f$id, args) ) if ( ! __add_analyzer(f$id, tag, args) )
{ {
Reporter::warning(fmt("Analyzer %s not added successfully to file %s.", tag, f$id)); Reporter::warning(fmt("Analyzer %s not added successfully to file %s.", tag, f$id));
return F; return F;
@ -297,8 +290,7 @@ function register_analyzer_add_callback(tag: Files::Tag, callback: function(f: f
function remove_analyzer(f: fa_file, tag: Files::Tag, args: AnalyzerArgs): bool function remove_analyzer(f: fa_file, tag: Files::Tag, args: AnalyzerArgs): bool
{ {
args$tag = tag; return __remove_analyzer(f$id, tag, args);
return __remove_analyzer(f$id, args);
} }
function stop(f: fa_file): bool function stop(f: fa_file): bool

View file

@ -5,5 +5,6 @@
@load ./sample @load ./sample
@load ./std-dev @load ./std-dev
@load ./sum @load ./sum
@load ./topk
@load ./unique @load ./unique
@load ./variance @load ./variance

View file

@ -0,0 +1,50 @@
@load base/frameworks/sumstats
module SumStats;
export {
redef record Reducer += {
## number of elements to keep in the top-k list
topk_size: count &default=500;
};
redef enum Calculation += {
TOPK
};
redef record ResultVal += {
topk: opaque of topk &optional;
};
}
hook init_resultval_hook(r: Reducer, rv: ResultVal)
{
if ( TOPK in r$apply && ! rv?$topk )
rv$topk = topk_init(r$topk_size);
}
hook observe_hook(r: Reducer, val: double, obs: Observation, rv: ResultVal)
{
if ( TOPK in r$apply )
topk_add(rv$topk, obs);
}
hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal)
{
if ( rv1?$topk )
{
result$topk = topk_init(topk_size(rv1$topk));
topk_merge(result$topk, rv1$topk);
if ( rv2?$topk )
topk_merge(result$topk, rv2$topk);
}
else if ( rv2?$topk )
{
result$topk = topk_init(topk_size(rv2$topk));
topk_merge(result$topk, rv2$topk);
}
}

View file

@ -531,22 +531,19 @@ type record_field_table: table[string] of record_field;
# dependent on the names remaining as they are now. # dependent on the names remaining as they are now.
## Set of BPF capture filters to use for capturing, indexed by a user-definable ## Set of BPF capture filters to use for capturing, indexed by a user-definable
## ID (which must be unique). If Bro is *not* configured to examine ## ID (which must be unique). If Bro is *not* configured with
## :bro:id:`PacketFilter::all_packets`, all packets matching at least ## :bro:id:`PacketFilter::enable_auto_protocol_capture_filters`,
## one of the filters in this table (and all in :bro:id:`restrict_filters`) ## all packets matching at least one of the filters in this table (and all in
## will be analyzed. ## :bro:id:`restrict_filters`) will be analyzed.
## ##
## .. bro:see:: PacketFilter PacketFilter::all_packets ## .. bro:see:: PacketFilter PacketFilter::enable_auto_protocol_capture_filters
## PacketFilter::unrestricted_filter restrict_filters ## PacketFilter::unrestricted_filter restrict_filters
global capture_filters: table[string] of string &redef; global capture_filters: table[string] of string &redef;
## Set of BPF filters to restrict capturing, indexed by a user-definable ID (which ## Set of BPF filters to restrict capturing, indexed by a user-definable ID (which
## must be unique). If Bro is *not* configured to examine ## must be unique).
## :bro:id:`PacketFilter::all_packets`, only packets matching *all* of the
## filters in this table (and any in :bro:id:`capture_filters`) will be
## analyzed.
## ##
## .. bro:see:: PacketFilter PacketFilter::all_packets ## .. bro:see:: PacketFilter PacketFilter::enable_auto_protocol_capture_filters
## PacketFilter::unrestricted_filter capture_filters ## PacketFilter::unrestricted_filter capture_filters
global restrict_filters: table[string] of string &redef; global restrict_filters: table[string] of string &redef;
@ -3042,6 +3039,11 @@ module GLOBAL;
## Number of bytes per packet to capture from live interfaces. ## Number of bytes per packet to capture from live interfaces.
const snaplen = 8192 &redef; const snaplen = 8192 &redef;
## Seed for hashes computed internally for probabilistic data structures. Using
## the same value here will make the hashes compatible between independent Bro
## instances. If left unset, Bro will use a temporary local seed.
const global_hash_seed: string = "" &redef;
# Load BiFs defined by plugins. # Load BiFs defined by plugins.
@load base/bif/plugins @load base/bif/plugins

View file

@ -1,4 +1,5 @@
@load ./utils-commands @load ./utils-commands
@load ./info
@load ./main @load ./main
@load ./utils @load ./utils
@load ./files @load ./files

View file

@ -1,3 +1,4 @@
@load ./info
@load ./main @load ./main
@load ./utils @load ./utils
@load base/utils/conn-ids @load base/utils/conn-ids

View file

@ -19,6 +19,7 @@
##! sizes are not logged, but at the benefit of saving CPU cycles that ##! sizes are not logged, but at the benefit of saving CPU cycles that
##! otherwise go to analyzing the large (and likely benign) connections. ##! otherwise go to analyzing the large (and likely benign) connections.
@load ./info
@load ./main @load ./main
@load base/protocols/conn @load base/protocols/conn
@load base/protocols/ssl @load base/protocols/ssl

View file

@ -0,0 +1,72 @@
##! Defines data structures for tracking and logging FTP sessions.
module FTP;
@load ./utils-commands
export {
## This setting changes if passwords used in FTP sessions are
## captured or not.
const default_capture_password = F &redef;
## The expected endpoints of an FTP data channel.
type ExpectedDataChannel: record {
## Whether PASV mode is toggled for control channel.
passive: bool &log;
## The host that will be initiating the data connection.
orig_h: addr &log;
## The host that will be accepting the data connection.
resp_h: addr &log;
## The port at which the acceptor is listening for the data connection.
resp_p: port &log;
};
type Info: record {
## Time when the command was sent.
ts: time &log;
## Unique ID for the connection.
uid: string &log;
## The connection's 4-tuple of endpoint addresses/ports.
id: conn_id &log;
## User name for the current FTP session.
user: string &log &default="<unknown>";
## Password for the current FTP session if captured.
password: string &log &optional;
## Command given by the client.
command: string &log &optional;
## Argument for the command if one is given.
arg: string &log &optional;
## Libmagic "sniffed" file type if the command indicates a file transfer.
mime_type: string &log &optional;
## Size of the file if the command indicates a file transfer.
file_size: count &log &optional;
## Reply code from the server in response to the command.
reply_code: count &log &optional;
## Reply message from the server in response to the command.
reply_msg: string &log &optional;
## Expected FTP data channel.
data_channel: ExpectedDataChannel &log &optional;
## Current working directory that this session is in. By making
## the default value '.', we can indicate that unless something
## more concrete is discovered that the existing but unknown
## directory is ok to use.
cwd: string &default=".";
## Command that is currently waiting for a response.
cmdarg: CmdArg &optional;
## Queue for commands that have been sent but not yet responded to
## are tracked here.
pending_commands: PendingCmds;
## Indicates if the session is in active or passive mode.
passive: bool &default=F;
## Determines if the password will be captured for this request.
capture_password: bool &default=default_capture_password;
};
}

View file

@ -3,6 +3,8 @@
##! will take on the full path that the client is at along with the requested ##! will take on the full path that the client is at along with the requested
##! file name. ##! file name.
@load ./info
@load ./utils
@load ./utils-commands @load ./utils-commands
@load base/utils/paths @load base/utils/paths
@load base/utils/numbers @load base/utils/numbers
@ -20,72 +22,9 @@ export {
"EPSV" "EPSV"
} &redef; } &redef;
## This setting changes if passwords used in FTP sessions are captured or not.
const default_capture_password = F &redef;
## User IDs that can be considered "anonymous". ## User IDs that can be considered "anonymous".
const guest_ids = { "anonymous", "ftp", "ftpuser", "guest" } &redef; const guest_ids = { "anonymous", "ftp", "ftpuser", "guest" } &redef;
## The expected endpoints of an FTP data channel.
type ExpectedDataChannel: record {
## Whether PASV mode is toggled for control channel.
passive: bool &log;
## The host that will be initiating the data connection.
orig_h: addr &log;
## The host that will be accepting the data connection.
resp_h: addr &log;
## The port at which the acceptor is listening for the data connection.
resp_p: port &log;
};
type Info: record {
## Time when the command was sent.
ts: time &log;
## Unique ID for the connection.
uid: string &log;
## The connection's 4-tuple of endpoint addresses/ports.
id: conn_id &log;
## User name for the current FTP session.
user: string &log &default="<unknown>";
## Password for the current FTP session if captured.
password: string &log &optional;
## Command given by the client.
command: string &log &optional;
## Argument for the command if one is given.
arg: string &log &optional;
## Libmagic "sniffed" file type if the command indicates a file transfer.
mime_type: string &log &optional;
## Size of the file if the command indicates a file transfer.
file_size: count &log &optional;
## Reply code from the server in response to the command.
reply_code: count &log &optional;
## Reply message from the server in response to the command.
reply_msg: string &log &optional;
## Expected FTP data channel.
data_channel: ExpectedDataChannel &log &optional;
## Current working directory that this session is in. By making
## the default value '.', we can indicate that unless something
## more concrete is discovered that the existing but unknown
## directory is ok to use.
cwd: string &default=".";
## Command that is currently waiting for a response.
cmdarg: CmdArg &optional;
## Queue for commands that have been sent but not yet responded to
## are tracked here.
pending_commands: PendingCmds;
## Indicates if the session is in active or passive mode.
passive: bool &default=F;
## Determines if the password will be captured for this request.
capture_password: bool &default=default_capture_password;
};
## This record is to hold a parsed FTP reply code. For example, for the ## This record is to hold a parsed FTP reply code. For example, for the
## 201 status code, the digits would be parsed as: x->2, y->0, z=>1. ## 201 status code, the digits would be parsed as: x->2, y->0, z=>1.
type ReplyCode: record { type ReplyCode: record {
@ -102,8 +41,6 @@ export {
global log_ftp: event(rec: Info); global log_ftp: event(rec: Info);
} }
@load ./utils
# Add the state tracking information variable to the connection record # Add the state tracking information variable to the connection record
redef record connection += { redef record connection += {
ftp: Info &optional; ftp: Info &optional;

View file

@ -1,7 +1,8 @@
##! Utilities specific for FTP processing. ##! Utilities specific for FTP processing.
@load ./main @load ./info
@load base/utils/addrs @load base/utils/addrs
@load base/utils/paths
module FTP; module FTP;

View file

@ -67,11 +67,8 @@ export {
## (especially with large file transfers). ## (especially with large file transfers).
const disable_analyzer_after_detection = T &redef; const disable_analyzer_after_detection = T &redef;
## The maximum amount of time a script can delay records from being logged.
const max_log_delay = 15secs &redef;
## Delays an SSL record for a specific token: the record will not be logged ## Delays an SSL record for a specific token: the record will not be logged
## as longs the token exists or until :bro:id:`SSL::max_log_delay` elapses. ## as longs the token exists or until 15 seconds elapses.
global delay_log: function(info: Info, token: string); global delay_log: function(info: Info, token: string);
## Undelays an SSL record for a previously inserted token, allowing the ## Undelays an SSL record for a previously inserted token, allowing the
@ -90,7 +87,7 @@ redef record connection += {
redef record Info += { redef record Info += {
# Adding a string "token" to this set will cause the SSL script # Adding a string "token" to this set will cause the SSL script
# to delay logging the record until either the token has been removed or # to delay logging the record until either the token has been removed or
# the record has been delayed for :bro:id:`SSL::max_log_delay`. # the record has been delayed.
delay_tokens: set[string] &optional; delay_tokens: set[string] &optional;
}; };
@ -138,7 +135,7 @@ function log_record(info: Info)
{ {
log_record(info); log_record(info);
} }
timeout SSL::max_log_delay timeout 15secs
{ {
Reporter::info(fmt("SSL delay tokens not released in time (%s tokens remaining)", Reporter::info(fmt("SSL delay tokens not released in time (%s tokens remaining)",
|info$delay_tokens|)); |info$delay_tokens|));

View file

@ -34,8 +34,8 @@ export {
global current_shunted_host_pairs: function(): set[conn_id]; global current_shunted_host_pairs: function(): set[conn_id];
redef enum Notice::Type += { redef enum Notice::Type += {
## Indicative that :bro:id:`max_bpf_shunts` connections are already ## Indicative that :bro:id:`PacketFilter::max_bpf_shunts` connections
## being shunted with BPF filters and no more are allowed. ## are already being shunted with BPF filters and no more are allowed.
No_More_Conn_Shunts_Available, No_More_Conn_Shunts_Available,
## Limitations in BPF make shunting some connections with BPF impossible. ## Limitations in BPF make shunting some connections with BPF impossible.

View file

@ -12,12 +12,12 @@ export {
## Apply BPF filters to each worker in a way that causes them to ## Apply BPF filters to each worker in a way that causes them to
## automatically flow balance traffic between them. ## automatically flow balance traffic between them.
AUTO_BPF, AUTO_BPF,
## Load balance traffic across the workers by making each one apply # Load balance traffic across the workers by making each one apply
## a restrict filter to only listen to a single MAC address. This # a restrict filter to only listen to a single MAC address. This
## is a somewhat common deployment option for sites doing network # is a somewhat common deployment option for sites doing network
## based load balancing with MAC address rewriting and passing the # based load balancing with MAC address rewriting and passing the
## traffic to a single interface. Multiple MAC addresses will show # traffic to a single interface. Multiple MAC addresses will show
## up on the same interface and need filtered to a single address. # up on the same interface and need filtered to a single address.
#MAC_ADDR_BPF, #MAC_ADDR_BPF,
}; };

View file

@ -1,10 +1,10 @@
## Capture TCP fragments, but not UDP (or ICMP), since those are a lot more # Capture TCP fragments, but not UDP (or ICMP), since those are a lot more
## common due to high-volume, fragmenting protocols such as NFS :-(. # common due to high-volume, fragmenting protocols such as NFS :-(.
## This normally isn't used because of the default open packet filter # This normally isn't used because of the default open packet filter
## but we set it anyway in case the user is using a packet filter. # but we set it anyway in case the user is using a packet filter.
## Note: This was removed because the default model now is to have a wide # Note: This was removed because the default model now is to have a wide
## open packet filter. # open packet filter.
#redef capture_filters += { ["frag"] = "(ip[6:2] & 0x3fff != 0) and tcp" }; #redef capture_filters += { ["frag"] = "(ip[6:2] & 0x3fff != 0) and tcp" };
## Shorten the fragment timeout from never expiring to expiring fragments after ## Shorten the fragment timeout from never expiring to expiring fragments after

View file

@ -11,6 +11,7 @@
#include "plugin/Manager.h" #include "plugin/Manager.h"
#include "analyzer/Manager.h" #include "analyzer/Manager.h"
#include "analyzer/Component.h" #include "analyzer/Component.h"
#include "file_analysis/Manager.h"
BroDoc::BroDoc(const std::string& rel, const std::string& abs) BroDoc::BroDoc(const std::string& rel, const std::string& abs)
{ {
@ -479,6 +480,17 @@ static void WriteAnalyzerComponent(FILE* f, const analyzer::Component* c)
fprintf(f, ":bro:enum:`Analyzer::%s`\n\n", tag.c_str()); fprintf(f, ":bro:enum:`Analyzer::%s`\n\n", tag.c_str());
} }
static void WriteAnalyzerComponent(FILE* f, const file_analysis::Component* c)
{
EnumType* atag = file_mgr->GetTagEnumType();
string tag = fmt("ANALYZER_%s", c->CanonicalName());
if ( atag->Lookup("Files", tag.c_str()) < 0 )
reporter->InternalError("missing analyzer tag for %s", tag.c_str());
fprintf(f, ":bro:enum:`Files::%s`\n\n", tag.c_str());
}
static void WritePluginComponents(FILE* f, const plugin::Plugin* p) static void WritePluginComponents(FILE* f, const plugin::Plugin* p)
{ {
plugin::Plugin::component_list components = p->Components(); plugin::Plugin::component_list components = p->Components();
@ -494,6 +506,10 @@ static void WritePluginComponents(FILE* f, const plugin::Plugin* p)
WriteAnalyzerComponent(f, WriteAnalyzerComponent(f,
dynamic_cast<const analyzer::Component*>(*it)); dynamic_cast<const analyzer::Component*>(*it));
break; break;
case plugin::component::FILE_ANALYZER:
WriteAnalyzerComponent(f,
dynamic_cast<const file_analysis::Component*>(*it));
break;
case plugin::component::READER: case plugin::component::READER:
reporter->InternalError("docs for READER component unimplemented"); reporter->InternalError("docs for READER component unimplemented");
case plugin::component::WRITER: case plugin::component::WRITER:
@ -537,30 +553,35 @@ static void WritePluginBifItems(FILE* f, const plugin::Plugin* p,
} }
} }
static void WriteAnalyzerTagDefn(FILE* f, EnumType* e) static void WriteAnalyzerTagDefn(FILE* f, EnumType* e, const string& module)
{ {
string tag_id= module + "::Tag";
e = new CommentedEnumType(e); e = new CommentedEnumType(e);
e->SetTypeID(copy_string("Analyzer::Tag")); e->SetTypeID(copy_string(tag_id.c_str()));
ID* dummy_id = new ID(copy_string("Analyzer::Tag"), SCOPE_GLOBAL, true); ID* dummy_id = new ID(copy_string(tag_id.c_str()), SCOPE_GLOBAL, true);
dummy_id->SetType(e); dummy_id->SetType(e);
dummy_id->MakeType(); dummy_id->MakeType();
list<string>* r = new list<string>(); list<string>* r = new list<string>();
r->push_back("Unique identifiers for protocol analyzers."); r->push_back("Unique identifiers for analyzers.");
BroDocObj bdo(dummy_id, r, true); BroDocObj bdo(dummy_id, r, true);
bdo.WriteReST(f); bdo.WriteReST(f);
} }
static bool IsAnalyzerPlugin(const plugin::Plugin* p) static bool ComponentsMatch(const plugin::Plugin* p, plugin::component::Type t,
bool match_empty = false)
{ {
plugin::Plugin::component_list components = p->Components(); plugin::Plugin::component_list components = p->Components();
plugin::Plugin::component_list::const_iterator it; plugin::Plugin::component_list::const_iterator it;
if ( components.empty() )
return match_empty;
for ( it = components.begin(); it != components.end(); ++it ) for ( it = components.begin(); it != components.end(); ++it )
if ( (*it)->Type() != plugin::component::ANALYZER ) if ( (*it)->Type() != t )
return false; return false;
return true; return true;
@ -573,14 +594,44 @@ void CreateProtoAnalyzerDoc(const char* filename)
fprintf(f, "Protocol Analyzer Reference\n"); fprintf(f, "Protocol Analyzer Reference\n");
fprintf(f, "===========================\n\n"); fprintf(f, "===========================\n\n");
WriteAnalyzerTagDefn(f, analyzer_mgr->GetTagEnumType()); WriteAnalyzerTagDefn(f, analyzer_mgr->GetTagEnumType(), "Analyzer");
plugin::Manager::plugin_list plugins = plugin_mgr->Plugins(); plugin::Manager::plugin_list plugins = plugin_mgr->Plugins();
plugin::Manager::plugin_list::const_iterator it; plugin::Manager::plugin_list::const_iterator it;
for ( it = plugins.begin(); it != plugins.end(); ++it ) for ( it = plugins.begin(); it != plugins.end(); ++it )
{ {
if ( ! IsAnalyzerPlugin(*it) ) if ( ! ComponentsMatch(*it, plugin::component::ANALYZER, true) )
continue;
WritePluginSectionHeading(f, *it);
WritePluginComponents(f, *it);
WritePluginBifItems(f, *it, plugin::BifItem::CONSTANT,
"Options/Constants");
WritePluginBifItems(f, *it, plugin::BifItem::GLOBAL, "Globals");
WritePluginBifItems(f, *it, plugin::BifItem::TYPE, "Types");
WritePluginBifItems(f, *it, plugin::BifItem::EVENT, "Events");
WritePluginBifItems(f, *it, plugin::BifItem::FUNCTION, "Functions");
}
fclose(f);
}
void CreateFileAnalyzerDoc(const char* filename)
{
FILE* f = fopen(filename, "w");
fprintf(f, "File Analyzer Reference\n");
fprintf(f, "=======================\n\n");
WriteAnalyzerTagDefn(f, file_mgr->GetTagEnumType(), "Files");
plugin::Manager::plugin_list plugins = plugin_mgr->Plugins();
plugin::Manager::plugin_list::const_iterator it;
for ( it = plugins.begin(); it != plugins.end(); ++it )
{
if ( ! ComponentsMatch(*it, plugin::component::FILE_ANALYZER) )
continue; continue;
WritePluginSectionHeading(f, *it); WritePluginSectionHeading(f, *it);

View file

@ -413,4 +413,10 @@ private:
*/ */
void CreateProtoAnalyzerDoc(const char* filename); void CreateProtoAnalyzerDoc(const char* filename);
/**
* Writes out plugin index documentation for all file analyzer plugins.
* @param filename the name of the file to write.
*/
void CreateFileAnalyzerDoc(const char* filename);
#endif #endif

View file

@ -319,6 +319,7 @@ set(bro_SRCS
StateAccess.cc StateAccess.cc
Stats.cc Stats.cc
Stmt.cc Stmt.cc
Tag.cc
Timer.cc Timer.cc
Traverse.cc Traverse.cc
Trigger.cc Trigger.cc
@ -362,6 +363,8 @@ set(bro_SRCS
3rdparty/sqlite3.c 3rdparty/sqlite3.c
plugin/Component.cc plugin/Component.cc
plugin/ComponentManager.h
plugin/TaggedComponent.h
plugin/Manager.cc plugin/Manager.cc
plugin/Plugin.cc plugin/Plugin.cc
plugin/Macros.h plugin/Macros.h

View file

@ -16,7 +16,8 @@ DebugLogger::Stream DebugLogger::streams[NUM_DBGS] = {
{ "notifiers", 0, false }, { "main-loop", 0, false }, { "notifiers", 0, false }, { "main-loop", 0, false },
{ "dpd", 0, false }, { "tm", 0, false }, { "dpd", 0, false }, { "tm", 0, false },
{ "logging", 0, false }, {"input", 0, false }, { "logging", 0, false }, {"input", 0, false },
{ "threading", 0, false }, { "file_analysis", 0, false } { "threading", 0, false }, { "file_analysis", 0, false },
{ "plugins", 0, false}
}; };
DebugLogger::DebugLogger(const char* filename) DebugLogger::DebugLogger(const char* filename)

View file

@ -27,6 +27,7 @@ enum DebugStream {
DBG_INPUT, // Input streams DBG_INPUT, // Input streams
DBG_THREADING, // Threading system DBG_THREADING, // Threading system
DBG_FILE_ANALYSIS, // File analysis DBG_FILE_ANALYSIS, // File analysis
DBG_PLUGINS,
NUM_DBGS // Has to be last NUM_DBGS // Has to be last
}; };

View file

@ -238,10 +238,13 @@ TableType* record_field_table;
StringVal* cmd_line_bpf_filter; StringVal* cmd_line_bpf_filter;
StringVal* global_hash_seed;
OpaqueType* md5_type; OpaqueType* md5_type;
OpaqueType* sha1_type; OpaqueType* sha1_type;
OpaqueType* sha256_type; OpaqueType* sha256_type;
OpaqueType* entropy_type; OpaqueType* entropy_type;
OpaqueType* topk_type;
OpaqueType* bloomfilter_type; OpaqueType* bloomfilter_type;
#include "const.bif.netvar_def" #include "const.bif.netvar_def"
@ -304,10 +307,13 @@ void init_general_global_var()
cmd_line_bpf_filter = cmd_line_bpf_filter =
internal_val("cmd_line_bpf_filter")->AsStringVal(); internal_val("cmd_line_bpf_filter")->AsStringVal();
global_hash_seed = opt_internal_string("global_hash_seed");
md5_type = new OpaqueType("md5"); md5_type = new OpaqueType("md5");
sha1_type = new OpaqueType("sha1"); sha1_type = new OpaqueType("sha1");
sha256_type = new OpaqueType("sha256"); sha256_type = new OpaqueType("sha256");
entropy_type = new OpaqueType("entropy"); entropy_type = new OpaqueType("entropy");
topk_type = new OpaqueType("topk");
bloomfilter_type = new OpaqueType("bloomfilter"); bloomfilter_type = new OpaqueType("bloomfilter");
} }

View file

@ -242,11 +242,14 @@ extern TableType* record_field_table;
extern StringVal* cmd_line_bpf_filter; extern StringVal* cmd_line_bpf_filter;
extern StringVal* global_hash_seed;
class OpaqueType; class OpaqueType;
extern OpaqueType* md5_type; extern OpaqueType* md5_type;
extern OpaqueType* sha1_type; extern OpaqueType* sha1_type;
extern OpaqueType* sha256_type; extern OpaqueType* sha256_type;
extern OpaqueType* entropy_type; extern OpaqueType* entropy_type;
extern OpaqueType* topk_type;
extern OpaqueType* bloomfilter_type; extern OpaqueType* bloomfilter_type;
// Initializes globals that don't pertain to network/event analysis. // Initializes globals that don't pertain to network/event analysis.

View file

@ -566,14 +566,14 @@ BroType* BloomFilterVal::Type() const
void BloomFilterVal::Add(const Val* val) void BloomFilterVal::Add(const Val* val)
{ {
HashKey* key = hash->ComputeHash(val, 1); HashKey* key = hash->ComputeHash(val, 1);
bloom_filter->Add(key->Hash()); bloom_filter->Add(key);
delete key; delete key;
} }
size_t BloomFilterVal::Count(const Val* val) const size_t BloomFilterVal::Count(const Val* val) const
{ {
HashKey* key = hash->ComputeHash(val, 1); HashKey* key = hash->ComputeHash(val, 1);
size_t cnt = bloom_filter->Count(key->Hash()); size_t cnt = bloom_filter->Count(key);
delete key; delete key;
return cnt; return cnt;
} }
@ -588,10 +588,17 @@ bool BloomFilterVal::Empty() const
return bloom_filter->Empty(); return bloom_filter->Empty();
} }
string BloomFilterVal::InternalState() const
{
return bloom_filter->InternalState();
}
BloomFilterVal* BloomFilterVal::Merge(const BloomFilterVal* x, BloomFilterVal* BloomFilterVal::Merge(const BloomFilterVal* x,
const BloomFilterVal* y) const BloomFilterVal* y)
{ {
if ( ! same_type(x->Type(), y->Type()) ) if ( x->Type() && // any one 0 is ok here
y->Type() &&
! same_type(x->Type(), y->Type()) )
{ {
reporter->Error("cannot merge Bloom filters with different types"); reporter->Error("cannot merge Bloom filters with different types");
return 0; return 0;
@ -613,7 +620,7 @@ BloomFilterVal* BloomFilterVal::Merge(const BloomFilterVal* x,
BloomFilterVal* merged = new BloomFilterVal(copy); BloomFilterVal* merged = new BloomFilterVal(copy);
if ( ! merged->Typify(x->Type()) ) if ( x->Type() && ! merged->Typify(x->Type()) )
{ {
reporter->Error("failed to set type on merged Bloom filter"); reporter->Error("failed to set type on merged Bloom filter");
return 0; return 0;

View file

@ -127,6 +127,7 @@ public:
size_t Count(const Val* val) const; size_t Count(const Val* val) const;
void Clear(); void Clear();
bool Empty() const; bool Empty() const;
string InternalState() const;
static BloomFilterVal* Merge(const BloomFilterVal* x, static BloomFilterVal* Merge(const BloomFilterVal* x,
const BloomFilterVal* y); const BloomFilterVal* y);

View file

@ -40,7 +40,7 @@ RuleActionAnalyzer::RuleActionAnalyzer(const char* arg_analyzer)
string str(arg_analyzer); string str(arg_analyzer);
string::size_type pos = str.find(':'); string::size_type pos = str.find(':');
string arg = str.substr(0, pos); string arg = str.substr(0, pos);
analyzer = analyzer_mgr->GetAnalyzerTag(arg.c_str()); analyzer = analyzer_mgr->GetComponentTag(arg.c_str());
if ( ! analyzer ) if ( ! analyzer )
reporter->Warning("unknown analyzer '%s' specified in rule", arg.c_str()); reporter->Warning("unknown analyzer '%s' specified in rule", arg.c_str());
@ -48,7 +48,7 @@ RuleActionAnalyzer::RuleActionAnalyzer(const char* arg_analyzer)
if ( pos != string::npos ) if ( pos != string::npos )
{ {
arg = str.substr(pos + 1); arg = str.substr(pos + 1);
child_analyzer = analyzer_mgr->GetAnalyzerTag(arg.c_str()); child_analyzer = analyzer_mgr->GetComponentTag(arg.c_str());
if ( ! child_analyzer ) if ( ! child_analyzer )
reporter->Warning("unknown analyzer '%s' specified in rule", arg.c_str()); reporter->Warning("unknown analyzer '%s' specified in rule", arg.c_str());
@ -60,11 +60,11 @@ RuleActionAnalyzer::RuleActionAnalyzer(const char* arg_analyzer)
void RuleActionAnalyzer::PrintDebug() void RuleActionAnalyzer::PrintDebug()
{ {
if ( ! child_analyzer ) if ( ! child_analyzer )
fprintf(stderr, "|%s|\n", analyzer_mgr->GetAnalyzerName(analyzer)); fprintf(stderr, "|%s|\n", analyzer_mgr->GetComponentName(analyzer));
else else
fprintf(stderr, "|%s:%s|\n", fprintf(stderr, "|%s:%s|\n",
analyzer_mgr->GetAnalyzerName(analyzer), analyzer_mgr->GetComponentName(analyzer),
analyzer_mgr->GetAnalyzerName(child_analyzer)); analyzer_mgr->GetComponentName(child_analyzer));
} }

View file

@ -52,6 +52,7 @@ SERIAL_IS(RE_MATCHER, 0x1400)
SERIAL_IS(BITVECTOR, 0x1500) SERIAL_IS(BITVECTOR, 0x1500)
SERIAL_IS(COUNTERVECTOR, 0x1600) SERIAL_IS(COUNTERVECTOR, 0x1600)
SERIAL_IS(BLOOMFILTER, 0x1700) SERIAL_IS(BLOOMFILTER, 0x1700)
SERIAL_IS(HASHER, 0x1800)
// These are the externally visible types. // These are the externally visible types.
const SerialType SER_NONE = 0; const SerialType SER_NONE = 0;
@ -107,7 +108,8 @@ SERIAL_VAL(MD5_VAL, 16)
SERIAL_VAL(SHA1_VAL, 17) SERIAL_VAL(SHA1_VAL, 17)
SERIAL_VAL(SHA256_VAL, 18) SERIAL_VAL(SHA256_VAL, 18)
SERIAL_VAL(ENTROPY_VAL, 19) SERIAL_VAL(ENTROPY_VAL, 19)
SERIAL_VAL(BLOOMFILTER_VAL, 20) SERIAL_VAL(TOPK_VAL, 20)
SERIAL_VAL(BLOOMFILTER_VAL, 21)
#define SERIAL_EXPR(name, val) SERIAL_CONST(name, val, EXPR) #define SERIAL_EXPR(name, val) SERIAL_CONST(name, val, EXPR)
SERIAL_EXPR(EXPR, 1) SERIAL_EXPR(EXPR, 1)
@ -206,6 +208,11 @@ SERIAL_BLOOMFILTER(BLOOMFILTER, 1)
SERIAL_BLOOMFILTER(BASICBLOOMFILTER, 2) SERIAL_BLOOMFILTER(BASICBLOOMFILTER, 2)
SERIAL_BLOOMFILTER(COUNTINGBLOOMFILTER, 3) SERIAL_BLOOMFILTER(COUNTINGBLOOMFILTER, 3)
#define SERIAL_HASHER(name, val) SERIAL_CONST(name, val, HASHER)
SERIAL_HASHER(HASHER, 1)
SERIAL_HASHER(DEFAULTHASHER, 2)
SERIAL_HASHER(DOUBLEHASHER, 3)
SERIAL_CONST2(ID) SERIAL_CONST2(ID)
SERIAL_CONST2(STATE_ACCESS) SERIAL_CONST2(STATE_ACCESS)
SERIAL_CONST2(CASE) SERIAL_CONST2(CASE)

82
src/Tag.cc Normal file
View file

@ -0,0 +1,82 @@
// See the file "COPYING" in the main distribution directory for copyright.
#include "Tag.h"
#include "Val.h"
Tag::Tag(EnumType* etype, type_t arg_type, subtype_t arg_subtype)
{
assert(arg_type > 0);
type = arg_type;
subtype = arg_subtype;
int64_t i = (int64)(type) | ((int64)subtype << 31);
Ref(etype);
val = new EnumVal(i, etype);
}
Tag::Tag(EnumVal* arg_val)
{
assert(arg_val);
val = arg_val;
Ref(val);
int64 i = val->InternalInt();
type = i & 0xffffffff;
subtype = (i >> 31) & 0xffffffff;
}
Tag::Tag(const Tag& other)
{
type = other.type;
subtype = other.subtype;
val = other.val;
if ( val )
Ref(val);
}
Tag::Tag()
{
type = 0;
subtype = 0;
val = 0;
}
Tag::~Tag()
{
Unref(val);
val = 0;
}
Tag& Tag::operator=(const Tag& other)
{
if ( this != &other )
{
type = other.type;
subtype = other.subtype;
val = other.val;
if ( val )
Ref(val);
}
return *this;
}
EnumVal* Tag::AsEnumVal(EnumType* etype) const
{
if ( ! val )
{
assert(type == 0 && subtype == 0);
Ref(etype);
val = new EnumVal(0, etype);
}
return val;
}
std::string Tag::AsString() const
{
return fmt("%" PRIu32 "/%" PRIu32, type, subtype);
}

138
src/Tag.h Normal file
View file

@ -0,0 +1,138 @@
// See the file "COPYING" in the main distribution directory for copyright.
#ifndef TAG_H
#define TAG_H
#include "config.h"
#include "util.h"
#include "Type.h"
class EnumVal;
/**
* Class to identify an analyzer type.
*
* Each analyzer type gets a tag consisting of a main type and subtype. The
* former is an identifier that's unique across all analyzer classes. The latter is
* passed through to the analyzer instances for their use, yet not further
* interpreted by the analyzer infrastructure; it allows an analyzer to
* branch out into a set of sub-analyzers internally. Jointly, main type and
* subtype form an analyzer "tag". Each unique tag corresponds to a single
* "analyzer" from the user's perspective. At the script layer, these tags
* are mapped into enums of type \c Analyzer::Tag or Files::Tag. Internally,
* the analyzer::Manager and file_analysis::Manager maintain the mapping of tag
* to analyzer (and it also assigns them their main types), and
* analyzer::Component and file_analysis::Component create new tag.
*
* The Tag class supports all operations necessary to act as an index in a
* \c std::map.
*/
class Tag {
public:
/**
* Type for the analyzer's main type.
*/
typedef uint32 type_t;
/**
* Type for the analyzer's subtype.
*/
typedef uint32 subtype_t;
/**
* Returns the tag's main type.
*/
type_t Type() const { return type; }
/**
* Returns the tag's subtype.
*/
subtype_t Subtype() const { return subtype; }
/**
* Returns the numerical values for main and subtype inside a string
* suitable for printing. This is primarily for debugging.
*/
std::string AsString() const;
protected:
/*
* Copy constructor.
*/
Tag(const Tag& other);
/**
* Default constructor. This initializes the tag with an error value
* that will make \c operator \c bool return false.
*/
Tag();
/**
* Destructor.
*/
~Tag();
/**
* Assignment operator.
*/
Tag& operator=(const Tag& other);
/**
* Compares two tags for equality.
*/
bool operator==(const Tag& other) const
{
return type == other.type && subtype == other.subtype;
}
/**
* Compares two tags for inequality.
*/
bool operator!=(const Tag& other) const
{
return type != other.type || subtype != other.subtype;
}
/**
* Compares two tags for less-than relationship.
*/
bool operator<(const Tag& other) const
{
return type != other.type ? type < other.type : (subtype < other.subtype);
}
/**
* Returns the script-layer enum that corresponds to this tag.
* The returned value does not have its ref-count increased.
*
* @param etype the script-layer enum type associated with the tag.
*/
EnumVal* AsEnumVal(EnumType* etype) const;
/**
* Constructor.
*
* @param etype the script-layer enum type associated with the tag.
*
* @param type The main type. Note that the manager class manages the
* the value space internally, so noone else should assign main types.
*
* @param subtype The sub type, which is left to an analyzer for
* interpretation. By default it's set to zero.
*/
Tag(EnumType* etype, type_t type, subtype_t subtype = 0);
/**
* Constructor.
*
* @param val An enum value of script type \c Analyzer::Tag.
*/
Tag(EnumVal* val);
private:
type_t type; // Main type.
subtype_t subtype; // Subtype.
mutable EnumVal* val; // Script-layer value.
};
#endif

View file

@ -70,12 +70,12 @@ void AnalyzerTimer::Init(Analyzer* arg_analyzer, analyzer_timer_func arg_timer,
Ref(analyzer->Conn()); Ref(analyzer->Conn());
} }
analyzer::ID Analyzer::id_counter = 0;; analyzer::ID Analyzer::id_counter = 0;
const char* Analyzer::GetAnalyzerName() const const char* Analyzer::GetAnalyzerName() const
{ {
assert(tag); assert(tag);
return analyzer_mgr->GetAnalyzerName(tag); return analyzer_mgr->GetComponentName(tag);
} }
void Analyzer::SetAnalyzerTag(const Tag& arg_tag) void Analyzer::SetAnalyzerTag(const Tag& arg_tag)
@ -87,7 +87,7 @@ void Analyzer::SetAnalyzerTag(const Tag& arg_tag)
bool Analyzer::IsAnalyzer(const char* name) bool Analyzer::IsAnalyzer(const char* name)
{ {
assert(tag); assert(tag);
return strcmp(analyzer_mgr->GetAnalyzerName(tag), name) == 0; return strcmp(analyzer_mgr->GetComponentName(tag), name) == 0;
} }
// Used in debugging output. // Used in debugging output.
@ -98,7 +98,7 @@ static string fmt_analyzer(Analyzer* a)
Analyzer::Analyzer(const char* name, Connection* conn) Analyzer::Analyzer(const char* name, Connection* conn)
{ {
Tag tag = analyzer_mgr->GetAnalyzerTag(name); Tag tag = analyzer_mgr->GetComponentTag(name);
if ( ! tag ) if ( ! tag )
reporter->InternalError("unknown analyzer name %s; mismatch with tag analyzer::Component?", name); reporter->InternalError("unknown analyzer name %s; mismatch with tag analyzer::Component?", name);
@ -494,7 +494,7 @@ Analyzer* Analyzer::FindChild(Tag arg_tag)
Analyzer* Analyzer::FindChild(const char* name) Analyzer* Analyzer::FindChild(const char* name)
{ {
Tag tag = analyzer_mgr->GetAnalyzerTag(name); Tag tag = analyzer_mgr->GetComponentTag(name);
return tag ? FindChild(tag) : 0; return tag ? FindChild(tag) : 0;
} }

View file

@ -8,29 +8,26 @@
using namespace analyzer; using namespace analyzer;
Tag::type_t Component::type_counter = 0;
Component::Component(const char* arg_name, factory_callback arg_factory, Tag::subtype_t arg_subtype, bool arg_enabled, bool arg_partial) Component::Component(const char* arg_name, factory_callback arg_factory, Tag::subtype_t arg_subtype, bool arg_enabled, bool arg_partial)
: plugin::Component(plugin::component::ANALYZER) : plugin::Component(plugin::component::ANALYZER),
plugin::TaggedComponent<analyzer::Tag>(arg_subtype)
{ {
name = copy_string(arg_name); name = copy_string(arg_name);
canon_name = canonify_name(arg_name); canon_name = canonify_name(arg_name);
factory = arg_factory; factory = arg_factory;
enabled = arg_enabled; enabled = arg_enabled;
partial = arg_partial; partial = arg_partial;
tag = analyzer::Tag(++type_counter, arg_subtype);
} }
Component::Component(const Component& other) Component::Component(const Component& other)
: plugin::Component(Type()) : plugin::Component(Type()),
plugin::TaggedComponent<analyzer::Tag>(other)
{ {
name = copy_string(other.name); name = copy_string(other.name);
canon_name = copy_string(other.canon_name); canon_name = copy_string(other.canon_name);
factory = other.factory; factory = other.factory;
enabled = other.enabled; enabled = other.enabled;
partial = other.partial; partial = other.partial;
tag = other.tag;
} }
Component::~Component() Component::~Component()
@ -39,11 +36,6 @@ Component::~Component()
delete [] canon_name; delete [] canon_name;
} }
analyzer::Tag Component::Tag() const
{
return tag;
}
void Component::Describe(ODesc* d) const void Component::Describe(ODesc* d) const
{ {
plugin::Component::Describe(d); plugin::Component::Describe(d);
@ -63,13 +55,14 @@ void Component::Describe(ODesc* d) const
Component& Component::operator=(const Component& other) Component& Component::operator=(const Component& other)
{ {
plugin::TaggedComponent<analyzer::Tag>::operator=(other);
if ( &other != this ) if ( &other != this )
{ {
name = copy_string(other.name); name = copy_string(other.name);
factory = other.factory; factory = other.factory;
enabled = other.enabled; enabled = other.enabled;
partial = other.partial; partial = other.partial;
tag = other.tag;
} }
return *this; return *this;

View file

@ -5,6 +5,7 @@
#include "Tag.h" #include "Tag.h"
#include "plugin/Component.h" #include "plugin/Component.h"
#include "plugin/TaggedComponent.h"
#include "../config.h" #include "../config.h"
#include "../util.h" #include "../util.h"
@ -21,7 +22,8 @@ class Analyzer;
* A plugin can provide a specific protocol analyzer by registering this * A plugin can provide a specific protocol analyzer by registering this
* analyzer component, describing the analyzer. * analyzer component, describing the analyzer.
*/ */
class Component : public plugin::Component { class Component : public plugin::Component,
public plugin::TaggedComponent<analyzer::Tag> {
public: public:
typedef Analyzer* (*factory_callback)(Connection* conn); typedef Analyzer* (*factory_callback)(Connection* conn);
@ -100,13 +102,6 @@ public:
*/ */
bool Enabled() const { return enabled; } bool Enabled() const { return enabled; }
/**
* Returns the analyzer's tag. Note that this is automatically
* generated for each new Components, and hence unique across all of
* them.
*/
analyzer::Tag Tag() const;
/** /**
* Enables or disables this analyzer. * Enables or disables this analyzer.
* *
@ -128,11 +123,7 @@ private:
const char* canon_name; // The analyzer's canonical name. const char* canon_name; // The analyzer's canonical name.
factory_callback factory; // The analyzer's factory callback. factory_callback factory; // The analyzer's factory callback.
bool partial; // True if the analyzer supports partial connections. bool partial; // True if the analyzer supports partial connections.
analyzer::Tag tag; // The automatically assigned analyzer tag.
bool enabled; // True if the analyzer is enabled. bool enabled; // True if the analyzer is enabled.
// Global counter used to generate unique tags.
static analyzer::Tag::type_t type_counter;
}; };
} }

View file

@ -60,10 +60,8 @@ bool Manager::ConnIndex::operator<(const ConnIndex& other) const
} }
Manager::Manager() Manager::Manager()
: plugin::ComponentManager<analyzer::Tag, analyzer::Component>("Analyzer")
{ {
tag_enum_type = new EnumType("Analyzer::Tag");
::ID* id = install_ID("Tag", "Analyzer", true, true);
add_type(id, tag_enum_type, 0, 0);
} }
Manager::~Manager() Manager::~Manager()
@ -91,14 +89,14 @@ void Manager::InitPreScript()
std::list<Component*> analyzers = plugin_mgr->Components<Component>(); std::list<Component*> analyzers = plugin_mgr->Components<Component>();
for ( std::list<Component*>::const_iterator i = analyzers.begin(); i != analyzers.end(); i++ ) for ( std::list<Component*>::const_iterator i = analyzers.begin(); i != analyzers.end(); i++ )
RegisterAnalyzerComponent(*i); RegisterComponent(*i, "ANALYZER_");
// Cache these tags. // Cache these tags.
analyzer_backdoor = GetAnalyzerTag("BACKDOOR"); analyzer_backdoor = GetComponentTag("BACKDOOR");
analyzer_connsize = GetAnalyzerTag("CONNSIZE"); analyzer_connsize = GetComponentTag("CONNSIZE");
analyzer_interconn = GetAnalyzerTag("INTERCONN"); analyzer_interconn = GetComponentTag("INTERCONN");
analyzer_stepping = GetAnalyzerTag("STEPPINGSTONE"); analyzer_stepping = GetComponentTag("STEPPINGSTONE");
analyzer_tcpstats = GetAnalyzerTag("TCPSTATS"); analyzer_tcpstats = GetComponentTag("TCPSTATS");
} }
void Manager::InitPostScript() void Manager::InitPostScript()
@ -109,8 +107,9 @@ void Manager::DumpDebug()
{ {
#ifdef DEBUG #ifdef DEBUG
DBG_LOG(DBG_ANALYZER, "Available analyzers after bro_init():"); DBG_LOG(DBG_ANALYZER, "Available analyzers after bro_init():");
for ( analyzer_map_by_name::const_iterator i = analyzers_by_name.begin(); i != analyzers_by_name.end(); i++ ) list<Component*> all_analyzers = GetComponents();
DBG_LOG(DBG_ANALYZER, " %s (%s)", i->second->Name(), IsEnabled(i->second->Tag()) ? "enabled" : "disabled"); for ( list<Component*>::const_iterator i = all_analyzers.begin(); i != all_analyzers.end(); ++i )
DBG_LOG(DBG_ANALYZER, " %s (%s)", (*i)->Name(), IsEnabled((*i)->Tag()) ? "enabled" : "disabled");
DBG_LOG(DBG_ANALYZER, ""); DBG_LOG(DBG_ANALYZER, "");
DBG_LOG(DBG_ANALYZER, "Analyzers by port:"); DBG_LOG(DBG_ANALYZER, "Analyzers by port:");
@ -120,7 +119,7 @@ void Manager::DumpDebug()
string s; string s;
for ( tag_set::const_iterator j = i->second->begin(); j != i->second->end(); j++ ) for ( tag_set::const_iterator j = i->second->begin(); j != i->second->end(); j++ )
s += string(GetAnalyzerName(*j)) + " "; s += string(GetComponentName(*j)) + " ";
DBG_LOG(DBG_ANALYZER, " %d/tcp: %s", i->first, s.c_str()); DBG_LOG(DBG_ANALYZER, " %d/tcp: %s", i->first, s.c_str());
} }
@ -130,7 +129,7 @@ void Manager::DumpDebug()
string s; string s;
for ( tag_set::const_iterator j = i->second->begin(); j != i->second->end(); j++ ) for ( tag_set::const_iterator j = i->second->begin(); j != i->second->end(); j++ )
s += string(GetAnalyzerName(*j)) + " "; s += string(GetComponentName(*j)) + " ";
DBG_LOG(DBG_ANALYZER, " %d/udp: %s", i->first, s.c_str()); DBG_LOG(DBG_ANALYZER, " %d/udp: %s", i->first, s.c_str());
} }
@ -142,25 +141,6 @@ void Manager::Done()
{ {
} }
void Manager::RegisterAnalyzerComponent(Component* component)
{
const char* cname = component->CanonicalName();
if ( Lookup(cname) )
reporter->FatalError("Analyzer %s defined more than once", cname);
DBG_LOG(DBG_ANALYZER, "Registering analyzer %s (tag %s)",
component->Name(), component->Tag().AsString().c_str());
analyzers_by_name.insert(std::make_pair(cname, component));
analyzers_by_tag.insert(std::make_pair(component->Tag(), component));
analyzers_by_val.insert(std::make_pair(component->Tag().AsEnumVal()->InternalInt(), component));
// Install enum "Analyzer::ANALYZER_*"
string id = fmt("ANALYZER_%s", cname);
tag_enum_type->AddName("Analyzer", id.c_str(), component->Tag().AsEnumVal()->InternalInt(), true);
}
bool Manager::EnableAnalyzer(Tag tag) bool Manager::EnableAnalyzer(Tag tag)
{ {
Component* p = Lookup(tag); Component* p = Lookup(tag);
@ -217,8 +197,9 @@ void Manager::DisableAllAnalyzers()
{ {
DBG_LOG(DBG_ANALYZER, "Disabling all analyzers"); DBG_LOG(DBG_ANALYZER, "Disabling all analyzers");
for ( analyzer_map_by_tag::const_iterator i = analyzers_by_tag.begin(); i != analyzers_by_tag.end(); i++ ) list<Component*> all_analyzers = GetComponents();
i->second->SetEnabled(false); for ( list<Component*>::const_iterator i = all_analyzers.begin(); i != all_analyzers.end(); ++i )
(*i)->SetEnabled(false);
} }
bool Manager::IsEnabled(Tag tag) bool Manager::IsEnabled(Tag tag)
@ -270,7 +251,7 @@ bool Manager::RegisterAnalyzerForPort(Tag tag, TransportProto proto, uint32 port
tag_set* l = LookupPort(proto, port, true); tag_set* l = LookupPort(proto, port, true);
#ifdef DEBUG #ifdef DEBUG
const char* name = GetAnalyzerName(tag); const char* name = GetComponentName(tag);
DBG_LOG(DBG_ANALYZER, "Registering analyzer %s for port %" PRIu32 "/%d", name, port, proto); DBG_LOG(DBG_ANALYZER, "Registering analyzer %s for port %" PRIu32 "/%d", name, port, proto);
#endif #endif
@ -283,7 +264,7 @@ bool Manager::UnregisterAnalyzerForPort(Tag tag, TransportProto proto, uint32 po
tag_set* l = LookupPort(proto, port, true); tag_set* l = LookupPort(proto, port, true);
#ifdef DEBUG #ifdef DEBUG
const char* name = GetAnalyzerName(tag); const char* name = GetComponentName(tag);
DBG_LOG(DBG_ANALYZER, "Unregistering analyzer %s for port %" PRIu32 "/%d", name, port, proto); DBG_LOG(DBG_ANALYZER, "Unregistering analyzer %s for port %" PRIu32 "/%d", name, port, proto);
#endif #endif
@ -302,7 +283,7 @@ Analyzer* Manager::InstantiateAnalyzer(Tag tag, Connection* conn)
return 0; return 0;
if ( ! c->Factory() ) if ( ! c->Factory() )
reporter->InternalError("analyzer %s cannot be instantiated dynamically", GetAnalyzerName(tag)); reporter->InternalError("analyzer %s cannot be instantiated dynamically", GetComponentName(tag));
Analyzer* a = c->Factory()(conn); Analyzer* a = c->Factory()(conn);
@ -316,59 +297,10 @@ Analyzer* Manager::InstantiateAnalyzer(Tag tag, Connection* conn)
Analyzer* Manager::InstantiateAnalyzer(const char* name, Connection* conn) Analyzer* Manager::InstantiateAnalyzer(const char* name, Connection* conn)
{ {
Tag tag = GetAnalyzerTag(name); Tag tag = GetComponentTag(name);
return tag ? InstantiateAnalyzer(tag, conn) : 0; return tag ? InstantiateAnalyzer(tag, conn) : 0;
} }
const char* Manager::GetAnalyzerName(Tag tag)
{
static const char* error = "<error>";
if ( ! tag )
return error;
Component* c = Lookup(tag);
if ( ! c )
reporter->InternalError("request for name of unknown analyzer tag %s", tag.AsString().c_str());
return c->CanonicalName();
}
const char* Manager::GetAnalyzerName(Val* val)
{
return GetAnalyzerName(Tag(val->AsEnumVal()));
}
Tag Manager::GetAnalyzerTag(const char* name)
{
Component* c = Lookup(name);
return c ? c->Tag() : Tag();
}
EnumType* Manager::GetTagEnumType()
{
return tag_enum_type;
}
Component* Manager::Lookup(const char* name)
{
analyzer_map_by_name::const_iterator i = analyzers_by_name.find(to_upper(name));
return i != analyzers_by_name.end() ? i->second : 0;
}
Component* Manager::Lookup(const Tag& tag)
{
analyzer_map_by_tag::const_iterator i = analyzers_by_tag.find(tag);
return i != analyzers_by_tag.end() ? i->second : 0;
}
Component* Manager::Lookup(EnumVal* val)
{
analyzer_map_by_val::const_iterator i = analyzers_by_val.find(val->InternalInt());
return i != analyzers_by_val.end() ? i->second : 0;
}
Manager::tag_set* Manager::LookupPort(TransportProto proto, uint32 port, bool add_if_not_found) Manager::tag_set* Manager::LookupPort(TransportProto proto, uint32 port, bool add_if_not_found)
{ {
analyzer_map_by_port* m = 0; analyzer_map_by_port* m = 0;
@ -461,7 +393,7 @@ bool Manager::BuildInitialAnalyzerTree(Connection* conn)
root->AddChildAnalyzer(analyzer, false); root->AddChildAnalyzer(analyzer, false);
DBG_ANALYZER_ARGS(conn, "activated %s analyzer as scheduled", DBG_ANALYZER_ARGS(conn, "activated %s analyzer as scheduled",
analyzer_mgr->GetAnalyzerName(*i)); analyzer_mgr->GetComponentName(*i));
} }
} }
@ -487,7 +419,7 @@ bool Manager::BuildInitialAnalyzerTree(Connection* conn)
root->AddChildAnalyzer(analyzer, false); root->AddChildAnalyzer(analyzer, false);
DBG_ANALYZER_ARGS(conn, "activated %s analyzer due to port %d", DBG_ANALYZER_ARGS(conn, "activated %s analyzer due to port %d",
analyzer_mgr->GetAnalyzerName(*j), resp_port); analyzer_mgr->GetComponentName(*j), resp_port);
} }
} }
} }
@ -613,7 +545,7 @@ void Manager::ExpireScheduledAnalyzers()
conns.erase(i); conns.erase(i);
DBG_LOG(DBG_ANALYZER, "Expiring expected analyzer %s for connection %s", DBG_LOG(DBG_ANALYZER, "Expiring expected analyzer %s for connection %s",
analyzer_mgr->GetAnalyzerName(a->analyzer), analyzer_mgr->GetComponentName(a->analyzer),
fmt_conn_id(a->conn.orig, 0, a->conn.resp, a->conn.resp_p)); fmt_conn_id(a->conn.orig, 0, a->conn.resp, a->conn.resp_p));
delete a; delete a;
@ -655,7 +587,7 @@ void Manager::ScheduleAnalyzer(const IPAddr& orig, const IPAddr& resp,
TransportProto proto, const char* analyzer, TransportProto proto, const char* analyzer,
double timeout) double timeout)
{ {
Tag tag = GetAnalyzerTag(analyzer); Tag tag = GetComponentTag(analyzer);
if ( tag != Tag() ) if ( tag != Tag() )
ScheduleAnalyzer(orig, resp, resp_p, proto, tag, timeout); ScheduleAnalyzer(orig, resp, resp_p, proto, tag, timeout);

View file

@ -26,6 +26,7 @@
#include "Analyzer.h" #include "Analyzer.h"
#include "Component.h" #include "Component.h"
#include "Tag.h" #include "Tag.h"
#include "plugin/ComponentManager.h"
#include "../Dict.h" #include "../Dict.h"
#include "../net_util.h" #include "../net_util.h"
@ -49,7 +50,7 @@ namespace analyzer {
* classes. This allows to external analyzer code to potentially use a * classes. This allows to external analyzer code to potentially use a
* different C++ standard library. * different C++ standard library.
*/ */
class Manager { class Manager : public plugin::ComponentManager<Tag, Component> {
public: public:
/** /**
* Constructor. * Constructor.
@ -231,42 +232,6 @@ public:
*/ */
Analyzer* InstantiateAnalyzer(const char* name, Connection* c); Analyzer* InstantiateAnalyzer(const char* name, Connection* c);
/**
* Translates an analyzer tag into corresponding analyzer name.
*
* @param tag The analyzer tag.
*
* @return The name, or an empty string if the tag is invalid.
*/
const char* GetAnalyzerName(Tag tag);
/**
* Translates an script-level analyzer tag into corresponding
* analyzer name.
*
* @param val The analyzer tag as an script-level enum value of type
* \c Analyzer::Tag.
*
* @return The name, or an empty string if the tag is invalid.
*/
const char* GetAnalyzerName(Val* val);
/**
* Translates an analyzer name into the corresponding tag.
*
* @param name The name.
*
* @return The tag. If the name does not correspond to a valid
* analyzer, the returned tag will evaluate to false.
*/
Tag GetAnalyzerTag(const char* name);
/**
* Returns the enum type that corresponds to the script-level type \c
* Analyzer::Tag.
*/
EnumType* GetTagEnumType();
/** /**
* Given the first packet of a connection, builds its initial * Given the first packet of a connection, builds its initial
* analyzer tree. * analyzer tree.
@ -350,18 +315,8 @@ public:
private: private:
typedef set<Tag> tag_set; typedef set<Tag> tag_set;
typedef map<string, Component*> analyzer_map_by_name;
typedef map<Tag, Component*> analyzer_map_by_tag;
typedef map<int, Component*> analyzer_map_by_val;
typedef map<uint32, tag_set*> analyzer_map_by_port; typedef map<uint32, tag_set*> analyzer_map_by_port;
void RegisterAnalyzerComponent(Component* component); // Takes ownership.
Component* Lookup(const string& name);
Component* Lookup(const char* name);
Component* Lookup(const Tag& tag);
Component* Lookup(EnumVal* val);
tag_set* LookupPort(PortVal* val, bool add_if_not_found); tag_set* LookupPort(PortVal* val, bool add_if_not_found);
tag_set* LookupPort(TransportProto proto, uint32 port, bool add_if_not_found); tag_set* LookupPort(TransportProto proto, uint32 port, bool add_if_not_found);
@ -370,9 +325,6 @@ private:
analyzer_map_by_port analyzers_by_port_tcp; analyzer_map_by_port analyzers_by_port_tcp;
analyzer_map_by_port analyzers_by_port_udp; analyzer_map_by_port analyzers_by_port_udp;
analyzer_map_by_name analyzers_by_name;
analyzer_map_by_tag analyzers_by_tag;
analyzer_map_by_val analyzers_by_val;
Tag analyzer_backdoor; Tag analyzer_backdoor;
Tag analyzer_connsize; Tag analyzer_connsize;
@ -380,8 +332,6 @@ private:
Tag analyzer_stepping; Tag analyzer_stepping;
Tag analyzer_tcpstats; Tag analyzer_tcpstats;
EnumType* tag_enum_type;
//// Data structures to track analyzed scheduled for future connections. //// Data structures to track analyzed scheduled for future connections.
// The index for a scheduled connection. // The index for a scheduled connection.

View file

@ -3,90 +3,20 @@
#include "Tag.h" #include "Tag.h"
#include "Manager.h" #include "Manager.h"
#include "../NetVar.h" analyzer::Tag analyzer::Tag::Error;
using namespace analyzer; analyzer::Tag::Tag(type_t type, subtype_t subtype)
: ::Tag(analyzer_mgr->GetTagEnumType(), type, subtype)
Tag Tag::Error;
Tag::Tag(type_t arg_type, subtype_t arg_subtype)
{ {
assert(arg_type > 0);
type = arg_type;
subtype = arg_subtype;
int64_t i = (int64)(type) | ((int64)subtype << 31);
EnumType* etype = analyzer_mgr->GetTagEnumType();
Ref(etype);
val = new EnumVal(i, etype);
} }
Tag::Tag(EnumVal* arg_val) analyzer::Tag& analyzer::Tag::operator=(const analyzer::Tag& other)
{ {
assert(arg_val); ::Tag::operator=(other);
val = arg_val;
Ref(val);
int64 i = val->InternalInt();
type = i & 0xffffffff;
subtype = (i >> 31) & 0xffffffff;
}
Tag::Tag(const Tag& other)
{
type = other.type;
subtype = other.subtype;
val = other.val;
if ( val )
Ref(val);
}
Tag::Tag()
{
type = 0;
subtype = 0;
val = 0;
}
Tag::~Tag()
{
Unref(val);
val = 0;
}
Tag& Tag::operator=(const Tag& other)
{
if ( this != &other )
{
type = other.type;
subtype = other.subtype;
val = other.val;
if ( val )
Ref(val);
}
return *this; return *this;
} }
EnumVal* Tag::AsEnumVal() const EnumVal* analyzer::Tag::AsEnumVal() const
{ {
if ( ! val ) return ::Tag::AsEnumVal(analyzer_mgr->GetTagEnumType());
{
assert(analyzer_mgr);
assert(type == 0 && subtype == 0);
EnumType* etype = analyzer_mgr->GetTagEnumType();
Ref(etype);
val = new EnumVal(0, etype);
}
return val;
}
std::string Tag::AsString() const
{
return fmt("%" PRIu32 "/%" PRIu32, type, subtype);
} }

View file

@ -5,90 +5,46 @@
#include "config.h" #include "config.h"
#include "util.h" #include "util.h"
#include "../Tag.h"
#include "plugin/TaggedComponent.h"
#include "plugin/ComponentManager.h"
class EnumVal; class EnumVal;
namespace file_analysis {
class Manager;
class Component;
}
namespace analyzer { namespace analyzer {
class Manager; class Manager;
class Component; class Component;
/** /**
* Class to identify an analyzer type. * Class to identify a protocol analyzer type.
* *
* Each analyzer type gets a tag consisting of a main type and subtype. The * The script-layer analogue is Analyzer::Tag.
* former is an identifier that's unique all analyzer classes. The latter is
* passed through to the analyzer instances for their use, yet not further
* interpreted by the analyzer infrastructure; it allows an analyzer to
* branch out into a set of sub-analyzers internally. Jointly, main type and
* subtype form an analyzer "tag". Each unique tag corresponds to a single
* "analyzer" from the user's perspective. At the script layer, these tags
* are mapped into enums of type \c Analyzer::Tag. Internally, the
* analyzer::Manager maintains the mapping of tag to analyzer (and it also
* assigns them their main types), and analyzer::Component creates new
* tags.
*
* The Tag class supports all operations necessary to act as an index in a
* \c std::map.
*/ */
class Tag { class Tag : public ::Tag {
public: public:
/**
* Type for the analyzer's main type.
*/
typedef uint32 type_t;
/**
* Type for the analyzer's subtype.
*/
typedef uint32 subtype_t;
/* /*
* Copy constructor. * Copy constructor.
*/ */
Tag(const Tag& other); Tag(const Tag& other) : ::Tag(other) {}
/** /**
* Default constructor. This initializes the tag with an error value * Default constructor. This initializes the tag with an error value
* that will make \c operator \c bool return false. * that will make \c operator \c bool return false.
*/ */
Tag(); Tag() : ::Tag() {}
/** /**
* Destructor. * Destructor.
*/ */
~Tag(); ~Tag() {}
/**
* Returns the tag's main type.
*/
type_t Type() const { return type; }
/**
* Returns the tag's subtype.
*/
subtype_t Subtype() const { return subtype; }
/**
* Returns the \c Analyzer::Tag enum that corresponds to this tag.
* The returned value is \a does not have its ref-count increased.
*/
EnumVal* AsEnumVal() const;
/**
* Returns the numerical values for main and subtype inside a string
* suitable for printing. This is primarily for debugging.
*/
std::string AsString() const;
/** /**
* Returns false if the tag represents an error value rather than a * Returns false if the tag represents an error value rather than a
* legal analyzer type. * legal analyzer type.
* TODO: make this conversion operator "explicit" (C++11) or use a
* "safe bool" idiom (not necessary if "explicit" is available),
* otherwise this may allow nonsense/undesired comparison operations.
*/ */
operator bool() const { return *this != Tag(); } operator bool() const { return *this != Tag(); }
@ -102,7 +58,7 @@ public:
*/ */
bool operator==(const Tag& other) const bool operator==(const Tag& other) const
{ {
return type == other.type && subtype == other.subtype; return ::Tag::operator==(other);
} }
/** /**
@ -110,7 +66,7 @@ public:
*/ */
bool operator!=(const Tag& other) const bool operator!=(const Tag& other) const
{ {
return type != other.type || subtype != other.subtype; return ::Tag::operator!=(other);
} }
/** /**
@ -118,23 +74,30 @@ public:
*/ */
bool operator<(const Tag& other) const bool operator<(const Tag& other) const
{ {
return type != other.type ? type < other.type : (subtype < other.subtype); return ::Tag::operator<(other);
} }
/**
* Returns the \c Analyzer::Tag enum that corresponds to this tag.
* The returned value does not have its ref-count increased.
*
* @param etype the script-layer enum type associated with the tag.
*/
EnumVal* AsEnumVal() const;
static Tag Error; static Tag Error;
protected: protected:
friend class analyzer::Manager; friend class analyzer::Manager;
friend class analyzer::Component; friend class plugin::ComponentManager<Tag, Component>;
friend class file_analysis::Manager; friend class plugin::TaggedComponent<Tag>;
friend class file_analysis::Component;
/** /**
* Constructor. * Constructor.
* *
* @param type The main type. Note that the \a analyzer::Manager * @param type The main type. Note that the \a analyzer::Manager
* manages the value space internally, so noone else should assign * manages the value space internally, so noone else should assign
* any main tyoes. * any main types.
* *
* @param subtype The sub type, which is left to an analyzer for * @param subtype The sub type, which is left to an analyzer for
* interpretation. By default it's set to zero. * interpretation. By default it's set to zero.
@ -144,14 +107,9 @@ protected:
/** /**
* Constructor. * Constructor.
* *
* @param val An enuam value of script type \c Analyzer::Tag. * @param val An enum value of script type \c Analyzer::Tag.
*/ */
Tag(EnumVal* val); Tag(EnumVal* val) : ::Tag(val) {}
private:
type_t type; // Main type.
subtype_t subtype; // Subtype.
mutable EnumVal* val; // Analyzer::Tag value.
}; };
} }

View file

@ -41,11 +41,11 @@ function Analyzer::__schedule_analyzer%(orig: addr, resp: addr, resp_p: port,
function __name%(atype: Analyzer::Tag%) : string function __name%(atype: Analyzer::Tag%) : string
%{ %{
return new StringVal(analyzer_mgr->GetAnalyzerName(atype)); return new StringVal(analyzer_mgr->GetComponentName(atype));
%} %}
function __tag%(name: string%) : Analyzer::Tag function __tag%(name: string%) : Analyzer::Tag
%{ %{
analyzer::Tag t = analyzer_mgr->GetAnalyzerTag(name->CheckString()); analyzer::Tag t = analyzer_mgr->GetComponentTag(name->CheckString());
return t.AsEnumVal()->Ref(); return t.AsEnumVal()->Ref();
%} %}

View file

@ -0,0 +1,11 @@
// See the file "COPYING" in the main distribution directory for copyright.
#include "Analyzer.h"
#include "Manager.h"
file_analysis::Analyzer::~Analyzer()
{
DBG_LOG(DBG_FILE_ANALYSIS, "Destroy file analyzer %s",
file_mgr->GetComponentName(tag));
Unref(args);
}

View file

@ -5,14 +5,12 @@
#include "Val.h" #include "Val.h"
#include "NetVar.h" #include "NetVar.h"
#include "analyzer/Tag.h" #include "Tag.h"
#include "file_analysis/file_analysis.bif.h" #include "file_analysis/file_analysis.bif.h"
namespace file_analysis { namespace file_analysis {
typedef int FA_Tag;
class File; class File;
/** /**
@ -25,11 +23,7 @@ public:
* Destructor. Nothing special about it. Virtual since we definitely expect * Destructor. Nothing special about it. Virtual since we definitely expect
* to delete instances of derived classes via pointers to this class. * to delete instances of derived classes via pointers to this class.
*/ */
virtual ~Analyzer() virtual ~Analyzer();
{
DBG_LOG(DBG_FILE_ANALYSIS, "Destroy file analyzer %d", tag);
Unref(args);
}
/** /**
* Subclasses may override this metod to receive file data non-sequentially. * Subclasses may override this metod to receive file data non-sequentially.
@ -76,7 +70,7 @@ public:
/** /**
* @return the analyzer type enum value. * @return the analyzer type enum value.
*/ */
FA_Tag Tag() const { return tag; } file_analysis::Tag Tag() const { return tag; }
/** /**
* @return the AnalyzerArgs associated with the analyzer. * @return the AnalyzerArgs associated with the analyzer.
@ -88,18 +82,6 @@ public:
*/ */
File* GetFile() const { return file; } File* GetFile() const { return file; }
/**
* Retrieves an analyzer tag field from full analyzer argument record.
* @param args an \c AnalyzerArgs (script-layer type) value.
* @return the analyzer tag equivalent of the 'tag' field from the
* \c AnalyzerArgs value \a args.
*/
static FA_Tag ArgsTag(const RecordVal* args)
{
using BifType::Record::Files::AnalyzerArgs;
return args->Lookup(AnalyzerArgs->FieldOffset("tag"))->AsEnum();
}
protected: protected:
/** /**
@ -108,15 +90,15 @@ protected:
* tunable options, if any, related to a particular analyzer type. * tunable options, if any, related to a particular analyzer type.
* @param arg_file the file to which the the analyzer is being attached. * @param arg_file the file to which the the analyzer is being attached.
*/ */
Analyzer(RecordVal* arg_args, File* arg_file) Analyzer(file_analysis::Tag arg_tag, RecordVal* arg_args, File* arg_file)
: tag(file_analysis::Analyzer::ArgsTag(arg_args)), : tag(arg_tag),
args(arg_args->Ref()->AsRecordVal()), args(arg_args->Ref()->AsRecordVal()),
file(arg_file) file(arg_file)
{} {}
private: private:
FA_Tag tag; /**< The particular analyzer type of the analyzer instance. */ file_analysis::Tag tag; /**< The particular type of the analyzer instance. */
RecordVal* args; /**< \c AnalyzerArgs val gives tunable analyzer params. */ RecordVal* args; /**< \c AnalyzerArgs val gives tunable analyzer params. */
File* file; /**< The file to which the analyzer is attached. */ File* file; /**< The file to which the analyzer is attached. */
}; };

View file

@ -15,6 +15,7 @@ static void analyzer_del_func(void* v)
AnalyzerSet::AnalyzerSet(File* arg_file) : file(arg_file) AnalyzerSet::AnalyzerSet(File* arg_file) : file(arg_file)
{ {
TypeList* t = new TypeList(); TypeList* t = new TypeList();
t->Append(file_mgr->GetTagEnumType()->Ref());
t->Append(BifType::Record::Files::AnalyzerArgs->Ref()); t->Append(BifType::Record::Files::AnalyzerArgs->Ref());
analyzer_hash = new CompositeHash(t); analyzer_hash = new CompositeHash(t);
Unref(t); Unref(t);
@ -34,20 +35,20 @@ AnalyzerSet::~AnalyzerSet()
delete analyzer_hash; delete analyzer_hash;
} }
bool AnalyzerSet::Add(RecordVal* args) bool AnalyzerSet::Add(file_analysis::Tag tag, RecordVal* args)
{ {
HashKey* key = GetKey(args); HashKey* key = GetKey(tag, args);
if ( analyzer_map.Lookup(key) ) if ( analyzer_map.Lookup(key) )
{ {
DBG_LOG(DBG_FILE_ANALYSIS, "Instantiate analyzer %d skipped for file id" DBG_LOG(DBG_FILE_ANALYSIS, "Instantiate analyzer %s skipped for file id"
" %s: already exists", file_analysis::Analyzer::ArgsTag(args), " %s: already exists", file_mgr->GetComponentName(tag),
file->GetID().c_str()); file->GetID().c_str());
delete key; delete key;
return true; return true;
} }
file_analysis::Analyzer* a = InstantiateAnalyzer(args); file_analysis::Analyzer* a = InstantiateAnalyzer(tag, args);
if ( ! a ) if ( ! a )
{ {
@ -60,10 +61,10 @@ bool AnalyzerSet::Add(RecordVal* args)
return true; return true;
} }
bool AnalyzerSet::QueueAdd(RecordVal* args) bool AnalyzerSet::QueueAdd(file_analysis::Tag tag, RecordVal* args)
{ {
HashKey* key = GetKey(args); HashKey* key = GetKey(tag, args);
file_analysis::Analyzer* a = InstantiateAnalyzer(args); file_analysis::Analyzer* a = InstantiateAnalyzer(tag, args);
if ( ! a ) if ( ! a )
{ {
@ -80,8 +81,9 @@ bool AnalyzerSet::AddMod::Perform(AnalyzerSet* set)
{ {
if ( set->analyzer_map.Lookup(key) ) if ( set->analyzer_map.Lookup(key) )
{ {
DBG_LOG(DBG_FILE_ANALYSIS, "Add analyzer %d skipped for file id" DBG_LOG(DBG_FILE_ANALYSIS, "Add analyzer %s skipped for file id"
" %s: already exists", a->Tag(), a->GetFile()->GetID().c_str()); " %s: already exists", file_mgr->GetComponentName(a->Tag()),
a->GetFile()->GetID().c_str());
Abort(); Abort();
return true; return true;
@ -91,12 +93,12 @@ bool AnalyzerSet::AddMod::Perform(AnalyzerSet* set)
return true; return true;
} }
bool AnalyzerSet::Remove(const RecordVal* args) bool AnalyzerSet::Remove(file_analysis::Tag tag, RecordVal* args)
{ {
return Remove(file_analysis::Analyzer::ArgsTag(args), GetKey(args)); return Remove(tag, GetKey(tag, args));
} }
bool AnalyzerSet::Remove(FA_Tag tag, HashKey* key) bool AnalyzerSet::Remove(file_analysis::Tag tag, HashKey* key)
{ {
file_analysis::Analyzer* a = file_analysis::Analyzer* a =
(file_analysis::Analyzer*) analyzer_map.Remove(key); (file_analysis::Analyzer*) analyzer_map.Remove(key);
@ -105,22 +107,22 @@ bool AnalyzerSet::Remove(FA_Tag tag, HashKey* key)
if ( ! a ) if ( ! a )
{ {
DBG_LOG(DBG_FILE_ANALYSIS, "Skip remove analyzer %d for file id %s", DBG_LOG(DBG_FILE_ANALYSIS, "Skip remove analyzer %s for file id %s",
tag, file->GetID().c_str()); file_mgr->GetComponentName(tag), file->GetID().c_str());
return false; return false;
} }
DBG_LOG(DBG_FILE_ANALYSIS, "Remove analyzer %d for file id %s", a->Tag(), DBG_LOG(DBG_FILE_ANALYSIS, "Remove analyzer %s for file id %s",
file_mgr->GetComponentName(tag),
file->GetID().c_str()); file->GetID().c_str());
delete a; delete a;
return true; return true;
} }
bool AnalyzerSet::QueueRemove(const RecordVal* args) bool AnalyzerSet::QueueRemove(file_analysis::Tag tag, RecordVal* args)
{ {
HashKey* key = GetKey(args); HashKey* key = GetKey(tag, args);
FA_Tag tag = file_analysis::Analyzer::ArgsTag(args);
mod_queue.push(new RemoveMod(tag, key)); mod_queue.push(new RemoveMod(tag, key));
@ -132,24 +134,28 @@ bool AnalyzerSet::RemoveMod::Perform(AnalyzerSet* set)
return set->Remove(tag, key); return set->Remove(tag, key);
} }
HashKey* AnalyzerSet::GetKey(const RecordVal* args) const HashKey* AnalyzerSet::GetKey(file_analysis::Tag t, RecordVal* args) const
{ {
HashKey* key = analyzer_hash->ComputeHash(args, 1); ListVal* lv = new ListVal(TYPE_ANY);
lv->Append(t.AsEnumVal()->Ref());
lv->Append(args->Ref());
HashKey* key = analyzer_hash->ComputeHash(lv, 1);
Unref(lv);
if ( ! key ) if ( ! key )
reporter->InternalError("AnalyzerArgs type mismatch"); reporter->InternalError("AnalyzerArgs type mismatch");
return key; return key;
} }
file_analysis::Analyzer* AnalyzerSet::InstantiateAnalyzer(RecordVal* args) const file_analysis::Analyzer* AnalyzerSet::InstantiateAnalyzer(Tag tag,
RecordVal* args) const
{ {
FA_Tag tag = file_analysis::Analyzer::ArgsTag(args);
file_analysis::Analyzer* a = file_mgr->InstantiateAnalyzer(tag, args, file); file_analysis::Analyzer* a = file_mgr->InstantiateAnalyzer(tag, args, file);
if ( ! a ) if ( ! a )
{ {
reporter->Error("Failed file analyzer %s instantiation for file id %s", reporter->Error("Failed file analyzer %s instantiation for file id %s",
file_mgr->GetAnalyzerName(tag), file->GetID().c_str()); file_mgr->GetComponentName(tag), file->GetID().c_str());
return 0; return 0;
} }
@ -158,8 +164,8 @@ file_analysis::Analyzer* AnalyzerSet::InstantiateAnalyzer(RecordVal* args) const
void AnalyzerSet::Insert(file_analysis::Analyzer* a, HashKey* key) void AnalyzerSet::Insert(file_analysis::Analyzer* a, HashKey* key)
{ {
DBG_LOG(DBG_FILE_ANALYSIS, "Add analyzer %d for file id %s", a->Tag(), DBG_LOG(DBG_FILE_ANALYSIS, "Add analyzer %s for file id %s",
file->GetID().c_str()); file_mgr->GetComponentName(a->Tag()), file->GetID().c_str());
analyzer_map.Insert(key, a); analyzer_map.Insert(key, a);
delete key; delete key;
} }

View file

@ -9,6 +9,7 @@
#include "Dict.h" #include "Dict.h"
#include "CompHash.h" #include "CompHash.h"
#include "Val.h" #include "Val.h"
#include "Tag.h"
namespace file_analysis { namespace file_analysis {
@ -38,31 +39,35 @@ public:
/** /**
* Attach an analyzer to #file immediately. * Attach an analyzer to #file immediately.
* @param tag the analyzer tag of the file analyzer to add.
* @param args an \c AnalyzerArgs value which specifies an analyzer. * @param args an \c AnalyzerArgs value which specifies an analyzer.
* @return true if analyzer was instantiated/attached, else false. * @return true if analyzer was instantiated/attached, else false.
*/ */
bool Add(RecordVal* args); bool Add(file_analysis::Tag tag, RecordVal* args);
/** /**
* Queue the attachment of an analyzer to #file. * Queue the attachment of an analyzer to #file.
* @param tag the analyzer tag of the file analyzer to add.
* @param args an \c AnalyzerArgs value which specifies an analyzer. * @param args an \c AnalyzerArgs value which specifies an analyzer.
* @return true if analyzer was able to be instantiated, else false. * @return true if analyzer was able to be instantiated, else false.
*/ */
bool QueueAdd(RecordVal* args); bool QueueAdd(file_analysis::Tag tag, RecordVal* args);
/** /**
* Remove an analyzer from #file immediately. * Remove an analyzer from #file immediately.
* @param tag the analyzer tag of the file analyzer to remove.
* @param args an \c AnalyzerArgs value which specifies an analyzer. * @param args an \c AnalyzerArgs value which specifies an analyzer.
* @return false if analyzer didn't exist and so wasn't removed, else true. * @return false if analyzer didn't exist and so wasn't removed, else true.
*/ */
bool Remove(const RecordVal* args); bool Remove(file_analysis::Tag tag, RecordVal* args);
/** /**
* Queue the removal of an analyzer from #file. * Queue the removal of an analyzer from #file.
* @param tag the analyzer tag of the file analyzer to remove.
* @param args an \c AnalyzerArgs value which specifies an analyzer. * @param args an \c AnalyzerArgs value which specifies an analyzer.
* @return true if analyzer exists at time of call, else false; * @return true if analyzer exists at time of call, else false;
*/ */
bool QueueRemove(const RecordVal* args); bool QueueRemove(file_analysis::Tag tag, RecordVal* args);
/** /**
* Perform all queued modifications to the current analyzer set. * Perform all queued modifications to the current analyzer set.
@ -91,17 +96,20 @@ protected:
/** /**
* Get a hash key which represents an analyzer instance. * Get a hash key which represents an analyzer instance.
* @param tag the file analyzer tag.
* @param args an \c AnalyzerArgs value which specifies an analyzer. * @param args an \c AnalyzerArgs value which specifies an analyzer.
* @return the hash key calculated from \a args * @return the hash key calculated from \a args
*/ */
HashKey* GetKey(const RecordVal* args) const; HashKey* GetKey(file_analysis::Tag tag, RecordVal* args) const;
/** /**
* Create an instance of a file analyzer. * Create an instance of a file analyzer.
* @param tag the tag of a file analyzer.
* @param args an \c AnalyzerArgs value which specifies an analyzer. * @param args an \c AnalyzerArgs value which specifies an analyzer.
* @return a new file analyzer instance. * @return a new file analyzer instance.
*/ */
file_analysis::Analyzer* InstantiateAnalyzer(RecordVal* args) const; file_analysis::Analyzer* InstantiateAnalyzer(file_analysis::Tag tag,
RecordVal* args) const;
/** /**
* Insert an analyzer instance in to the set. * Insert an analyzer instance in to the set.
@ -116,7 +124,7 @@ protected:
* just used for debugging messages. * just used for debugging messages.
* @param key the hash key which represents the analyzer's \c AnalyzerArgs. * @param key the hash key which represents the analyzer's \c AnalyzerArgs.
*/ */
bool Remove(FA_Tag tag, HashKey* key); bool Remove(file_analysis::Tag tag, HashKey* key);
private: private:
@ -175,14 +183,14 @@ private:
* @param arg_a an analyzer instance to add to an analyzer set. * @param arg_a an analyzer instance to add to an analyzer set.
* @param arg_key hash key representing the analyzer's \c AnalyzerArgs. * @param arg_key hash key representing the analyzer's \c AnalyzerArgs.
*/ */
RemoveMod(FA_Tag arg_tag, HashKey* arg_key) RemoveMod(file_analysis::Tag arg_tag, HashKey* arg_key)
: Modification(), tag(arg_tag), key(arg_key) {} : Modification(), tag(arg_tag), key(arg_key) {}
virtual ~RemoveMod() {} virtual ~RemoveMod() {}
virtual bool Perform(AnalyzerSet* set); virtual bool Perform(AnalyzerSet* set);
virtual void Abort() { delete key; } virtual void Abort() { delete key; }
protected: protected:
FA_Tag tag; file_analysis::Tag tag;
HashKey* key; HashKey* key;
}; };

View file

@ -11,9 +11,10 @@ set(file_analysis_SRCS
Manager.cc Manager.cc
File.cc File.cc
FileTimer.cc FileTimer.cc
Analyzer.h Analyzer.cc
AnalyzerSet.cc AnalyzerSet.cc
Component.cc Component.cc
Tag.cc
) )
bif_target(file_analysis.bif) bif_target(file_analysis.bif)

View file

@ -8,26 +8,22 @@
using namespace file_analysis; using namespace file_analysis;
analyzer::Tag::type_t Component::type_counter = 0; Component::Component(const char* arg_name, factory_callback arg_factory)
: plugin::Component(plugin::component::FILE_ANALYZER),
Component::Component(const char* arg_name, factory_callback arg_factory, plugin::TaggedComponent<file_analysis::Tag>()
analyzer::Tag::subtype_t arg_subtype)
: plugin::Component(plugin::component::FILE_ANALYZER)
{ {
name = copy_string(arg_name); name = copy_string(arg_name);
canon_name = canonify_name(arg_name); canon_name = canonify_name(arg_name);
factory = arg_factory; factory = arg_factory;
tag = analyzer::Tag(++type_counter, arg_subtype);
} }
Component::Component(const Component& other) Component::Component(const Component& other)
: plugin::Component(Type()) : plugin::Component(Type()),
plugin::TaggedComponent<file_analysis::Tag>(other)
{ {
name = copy_string(other.name); name = copy_string(other.name);
canon_name = copy_string(other.canon_name); canon_name = copy_string(other.canon_name);
factory = other.factory; factory = other.factory;
tag = other.tag;
} }
Component::~Component() Component::~Component()
@ -36,11 +32,6 @@ Component::~Component()
delete [] canon_name; delete [] canon_name;
} }
analyzer::Tag Component::Tag() const
{
return tag;
}
void Component::Describe(ODesc* d) const void Component::Describe(ODesc* d) const
{ {
plugin::Component::Describe(d); plugin::Component::Describe(d);
@ -58,11 +49,12 @@ void Component::Describe(ODesc* d) const
Component& Component::operator=(const Component& other) Component& Component::operator=(const Component& other)
{ {
plugin::TaggedComponent<file_analysis::Tag>::operator=(other);
if ( &other != this ) if ( &other != this )
{ {
name = copy_string(other.name); name = copy_string(other.name);
factory = other.factory; factory = other.factory;
tag = other.tag;
} }
return *this; return *this;

View file

@ -3,8 +3,9 @@
#ifndef FILE_ANALYZER_PLUGIN_COMPONENT_H #ifndef FILE_ANALYZER_PLUGIN_COMPONENT_H
#define FILE_ANALYZER_PLUGIN_COMPONENT_H #define FILE_ANALYZER_PLUGIN_COMPONENT_H
#include "analyzer/Tag.h" #include "Tag.h"
#include "plugin/Component.h" #include "plugin/Component.h"
#include "plugin/TaggedComponent.h"
#include "Val.h" #include "Val.h"
@ -22,7 +23,8 @@ class Analyzer;
* A plugin can provide a specific file analyzer by registering this * A plugin can provide a specific file analyzer by registering this
* analyzer component, describing the analyzer. * analyzer component, describing the analyzer.
*/ */
class Component : public plugin::Component { class Component : public plugin::Component,
public plugin::TaggedComponent<file_analysis::Tag> {
public: public:
typedef Analyzer* (*factory_callback)(RecordVal* args, File* file); typedef Analyzer* (*factory_callback)(RecordVal* args, File* file);
@ -38,15 +40,8 @@ public:
* from file_analysis::Analyzer. This is typically a static \c * from file_analysis::Analyzer. This is typically a static \c
* Instatiate() method inside the class that just allocates and * Instatiate() method inside the class that just allocates and
* returns a new instance. * returns a new instance.
*
* @param subtype A subtype associated with this component that
* further distinguishes it. The subtype will be integrated into
* the analyzer::Tag that the manager associates with this analyzer,
* and analyzer instances can accordingly access it via analyzer::Tag().
* If not used, leave at zero.
*/ */
Component(const char* name, factory_callback factory, Component(const char* name, factory_callback factory);
analyzer::Tag::subtype_t subtype = 0);
/** /**
* Copy constructor. * Copy constructor.
@ -79,13 +74,6 @@ public:
*/ */
factory_callback Factory() const { return factory; } factory_callback Factory() const { return factory; }
/**
* Returns the analyzer's tag. Note that this is automatically
* generated for each new Components, and hence unique across all of
* them.
*/
analyzer::Tag Tag() const;
/** /**
* Generates a human-readable description of the component's main * Generates a human-readable description of the component's main
* parameters. This goes into the output of \c "bro -NN". * parameters. This goes into the output of \c "bro -NN".
@ -98,10 +86,6 @@ private:
const char* name; // The analyzer's name. const char* name; // The analyzer's name.
const char* canon_name; // The analyzer's canonical name. const char* canon_name; // The analyzer's canonical name.
factory_callback factory; // The analyzer's factory callback. factory_callback factory; // The analyzer's factory callback.
analyzer::Tag tag; // The automatically assigned analyzer tag.
// Global counter used to generate unique tags.
static analyzer::Tag::type_t type_counter;
}; };
} }

View file

@ -88,7 +88,7 @@ File::File(const string& file_id, Connection* conn, analyzer::Tag tag,
if ( conn ) if ( conn )
{ {
// add source, connection, is_orig fields // add source, connection, is_orig fields
SetSource(analyzer_mgr->GetAnalyzerName(tag)); SetSource(analyzer_mgr->GetComponentName(tag));
val->Assign(is_orig_idx, new Val(is_orig, TYPE_BOOL)); val->Assign(is_orig_idx, new Val(is_orig, TYPE_BOOL));
UpdateConnectionFields(conn, is_orig); UpdateConnectionFields(conn, is_orig);
} }
@ -231,14 +231,14 @@ void File::ScheduleInactivityTimer() const
timer_mgr->Add(new FileTimer(network_time, id, GetTimeoutInterval())); timer_mgr->Add(new FileTimer(network_time, id, GetTimeoutInterval()));
} }
bool File::AddAnalyzer(RecordVal* args) bool File::AddAnalyzer(file_analysis::Tag tag, RecordVal* args)
{ {
return done ? false : analyzers.QueueAdd(args); return done ? false : analyzers.QueueAdd(tag, args);
} }
bool File::RemoveAnalyzer(const RecordVal* args) bool File::RemoveAnalyzer(file_analysis::Tag tag, RecordVal* args)
{ {
return done ? false : analyzers.QueueRemove(args); return done ? false : analyzers.QueueRemove(tag, args);
} }
bool File::BufferBOF(const u_char* data, uint64 len) bool File::BufferBOF(const u_char* data, uint64 len)
@ -321,7 +321,7 @@ void File::DataIn(const u_char* data, uint64 len, uint64 offset)
while ( (a = analyzers.NextEntry(c)) ) while ( (a = analyzers.NextEntry(c)) )
{ {
if ( ! a->DeliverChunk(data, len, offset) ) if ( ! a->DeliverChunk(data, len, offset) )
analyzers.QueueRemove(a->Args()); analyzers.QueueRemove(a->Tag(), a->Args());
} }
analyzers.DrainModifications(); analyzers.DrainModifications();
@ -356,7 +356,7 @@ void File::DataIn(const u_char* data, uint64 len)
{ {
if ( ! a->DeliverStream(data, len) ) if ( ! a->DeliverStream(data, len) )
{ {
analyzers.QueueRemove(a->Args()); analyzers.QueueRemove(a->Tag(), a->Args());
continue; continue;
} }
@ -364,7 +364,7 @@ void File::DataIn(const u_char* data, uint64 len)
LookupFieldDefaultCount(missing_bytes_idx); LookupFieldDefaultCount(missing_bytes_idx);
if ( ! a->DeliverChunk(data, len, offset) ) if ( ! a->DeliverChunk(data, len, offset) )
analyzers.QueueRemove(a->Args()); analyzers.QueueRemove(a->Tag(), a->Args());
} }
analyzers.DrainModifications(); analyzers.DrainModifications();
@ -389,7 +389,7 @@ void File::EndOfFile()
while ( (a = analyzers.NextEntry(c)) ) while ( (a = analyzers.NextEntry(c)) )
{ {
if ( ! a->EndOfFile() ) if ( ! a->EndOfFile() )
analyzers.QueueRemove(a->Args()); analyzers.QueueRemove(a->Tag(), a->Args());
} }
FileEvent(file_state_remove); FileEvent(file_state_remove);
@ -411,7 +411,7 @@ void File::Gap(uint64 offset, uint64 len)
while ( (a = analyzers.NextEntry(c)) ) while ( (a = analyzers.NextEntry(c)) )
{ {
if ( ! a->Undelivered(offset, len) ) if ( ! a->Undelivered(offset, len) )
analyzers.QueueRemove(a->Args()); analyzers.QueueRemove(a->Tag(), a->Args());
} }
if ( FileEventAvailable(file_gap) ) if ( FileEventAvailable(file_gap) )

View file

@ -10,6 +10,7 @@
#include "Conn.h" #include "Conn.h"
#include "Val.h" #include "Val.h"
#include "Tag.h"
#include "AnalyzerSet.h" #include "AnalyzerSet.h"
#include "BroString.h" #include "BroString.h"
@ -94,17 +95,19 @@ public:
/** /**
* Queues attaching an analyzer. Only one analyzer per type can be attached * Queues attaching an analyzer. Only one analyzer per type can be attached
* at a time unless the arguments differ. * at a time unless the arguments differ.
* @param tag the analyzer tag of the file analyzer to add.
* @param args an \c AnalyzerArgs value representing a file analyzer. * @param args an \c AnalyzerArgs value representing a file analyzer.
* @return false if analyzer can't be instantiated, else true. * @return false if analyzer can't be instantiated, else true.
*/ */
bool AddAnalyzer(RecordVal* args); bool AddAnalyzer(file_analysis::Tag tag, RecordVal* args);
/** /**
* Queues removal of an analyzer. * Queues removal of an analyzer.
* @param tag the analyzer tag of the file analyzer to remove.
* @param args an \c AnalyzerArgs value representing a file analyzer. * @param args an \c AnalyzerArgs value representing a file analyzer.
* @return true if analyzer was active at time of call, else false. * @return true if analyzer was active at time of call, else false.
*/ */
bool RemoveAnalyzer(const RecordVal* args); bool RemoveAnalyzer(file_analysis::Tag tag, RecordVal* args);
/** /**
* Pass in non-sequential data and deliver to attached analyzers. * Pass in non-sequential data and deliver to attached analyzers.

View file

@ -14,7 +14,7 @@ FileTimer::FileTimer(double t, const string& id, double interval)
void FileTimer::Dispatch(double t, int is_expire) void FileTimer::Dispatch(double t, int is_expire)
{ {
File* file = file_mgr->Lookup(file_id); File* file = file_mgr->LookupFile(file_id);
if ( ! file ) if ( ! file )
return; return;

View file

@ -18,10 +18,9 @@ TableVal* Manager::disabled = 0;
string Manager::salt; string Manager::salt;
Manager::Manager() Manager::Manager()
: plugin::ComponentManager<file_analysis::Tag,
file_analysis::Component>("Files")
{ {
tag_enum_type = new EnumType("Files::Tag");
::ID* id = install_ID("Tag", "Files", true, true);
add_type(id, tag_enum_type, 0, 0);
} }
Manager::~Manager() Manager::~Manager()
@ -35,27 +34,7 @@ void Manager::InitPreScript()
for ( std::list<Component*>::const_iterator i = analyzers.begin(); for ( std::list<Component*>::const_iterator i = analyzers.begin();
i != analyzers.end(); ++i ) i != analyzers.end(); ++i )
RegisterAnalyzerComponent(*i); RegisterComponent(*i, "ANALYZER_");
}
void Manager::RegisterAnalyzerComponent(Component* component)
{
const char* cname = component->CanonicalName();
if ( tag_enum_type->Lookup("Files", cname) != -1 )
reporter->FatalError("File Analyzer %s defined more than once", cname);
DBG_LOG(DBG_FILE_ANALYSIS, "Registering analyzer %s (tag %s)",
component->Name(), component->Tag().AsString().c_str());
analyzers_by_name.insert(std::make_pair(cname, component));
analyzers_by_tag.insert(std::make_pair(component->Tag(), component));
analyzers_by_val.insert(std::make_pair(
component->Tag().AsEnumVal()->InternalInt(), component));
string id = fmt("ANALYZER_%s", cname);
tag_enum_type->AddName("Files", id.c_str(),
component->Tag().AsEnumVal()->InternalInt(), true);
} }
void Manager::InitPostScript() void Manager::InitPostScript()
@ -193,7 +172,7 @@ void Manager::SetSize(uint64 size, analyzer::Tag tag, Connection* conn,
bool Manager::SetTimeoutInterval(const string& file_id, double interval) const bool Manager::SetTimeoutInterval(const string& file_id, double interval) const
{ {
File* file = Lookup(file_id); File* file = LookupFile(file_id);
if ( ! file ) if ( ! file )
return false; return false;
@ -205,24 +184,26 @@ bool Manager::SetTimeoutInterval(const string& file_id, double interval) const
return true; return true;
} }
bool Manager::AddAnalyzer(const string& file_id, RecordVal* args) const bool Manager::AddAnalyzer(const string& file_id, file_analysis::Tag tag,
RecordVal* args) const
{ {
File* file = Lookup(file_id); File* file = LookupFile(file_id);
if ( ! file ) if ( ! file )
return false; return false;
return file->AddAnalyzer(args); return file->AddAnalyzer(tag, args);
} }
bool Manager::RemoveAnalyzer(const string& file_id, const RecordVal* args) const bool Manager::RemoveAnalyzer(const string& file_id, file_analysis::Tag tag,
RecordVal* args) const
{ {
File* file = Lookup(file_id); File* file = LookupFile(file_id);
if ( ! file ) if ( ! file )
return false; return false;
return file->RemoveAnalyzer(args); return file->RemoveAnalyzer(tag, args);
} }
File* Manager::GetFile(const string& file_id, Connection* conn, File* Manager::GetFile(const string& file_id, Connection* conn,
@ -255,7 +236,7 @@ File* Manager::GetFile(const string& file_id, Connection* conn,
return rval; return rval;
} }
File* Manager::Lookup(const string& file_id) const File* Manager::LookupFile(const string& file_id) const
{ {
IDMap::const_iterator it = id_map.find(file_id); IDMap::const_iterator it = id_map.find(file_id);
@ -267,7 +248,7 @@ File* Manager::Lookup(const string& file_id) const
void Manager::Timeout(const string& file_id, bool is_terminating) void Manager::Timeout(const string& file_id, bool is_terminating)
{ {
File* file = Lookup(file_id); File* file = LookupFile(file_id);
if ( ! file ) if ( ! file )
return; return;
@ -366,15 +347,13 @@ bool Manager::IsDisabled(analyzer::Tag tag)
return rval; return rval;
} }
Analyzer* Manager::InstantiateAnalyzer(int tag, RecordVal* args, File* f) const Analyzer* Manager::InstantiateAnalyzer(Tag tag, RecordVal* args, File* f) const
{ {
analyzer_map_by_val::const_iterator it = analyzers_by_val.find(tag); Component* c = Lookup(tag);
if ( it == analyzers_by_val.end() ) if ( ! c )
reporter->InternalError("cannot instantiate unknown file analyzer: %d", reporter->InternalError("cannot instantiate unknown file analyzer: %s",
tag); tag.AsString().c_str());
Component* c = it->second;
if ( ! c->Factory() ) if ( ! c->Factory() )
reporter->InternalError("file analyzer %s cannot be instantiated " reporter->InternalError("file analyzer %s cannot be instantiated "
@ -382,14 +361,3 @@ Analyzer* Manager::InstantiateAnalyzer(int tag, RecordVal* args, File* f) const
return c->Factory()(args, f); return c->Factory()(args, f);
} }
const char* Manager::GetAnalyzerName(int tag) const
{
analyzer_map_by_val::const_iterator it = analyzers_by_val.find(tag);
if ( it == analyzers_by_val.end() )
reporter->InternalError("cannot get name of unknown file analyzer: %d",
tag);
return it->second->CanonicalName();
}

View file

@ -18,7 +18,8 @@
#include "File.h" #include "File.h"
#include "FileTimer.h" #include "FileTimer.h"
#include "Component.h" #include "Component.h"
#include "Tag.h"
#include "plugin/ComponentManager.h"
#include "analyzer/Tag.h" #include "analyzer/Tag.h"
#include "file_analysis/file_analysis.bif.h" #include "file_analysis/file_analysis.bif.h"
@ -28,7 +29,7 @@ namespace file_analysis {
/** /**
* Main entry point for interacting with file analysis. * Main entry point for interacting with file analysis.
*/ */
class Manager { class Manager : public plugin::ComponentManager<Tag, Component> {
public: public:
/** /**
@ -177,18 +178,22 @@ public:
* analyzers of a given type can be attached per file identifier at a time * analyzers of a given type can be attached per file identifier at a time
* as long as the arguments differ. * as long as the arguments differ.
* @param file_id the file identifier/hash. * @param file_id the file identifier/hash.
* @param tag the analyzer tag of the file analyzer to add.
* @param args a \c AnalyzerArgs value which describes a file analyzer. * @param args a \c AnalyzerArgs value which describes a file analyzer.
* @return false if the analyzer failed to be instantiated, else true. * @return false if the analyzer failed to be instantiated, else true.
*/ */
bool AddAnalyzer(const string& file_id, RecordVal* args) const; bool AddAnalyzer(const string& file_id, file_analysis::Tag tag,
RecordVal* args) const;
/** /**
* Queue removal of an analyzer for a given file identifier. * Queue removal of an analyzer for a given file identifier.
* @param file_id the file identifier/hash. * @param file_id the file identifier/hash.
* @param tag the analyzer tag of the file analyzer to remove.
* @param args a \c AnalyzerArgs value which describes a file analyzer. * @param args a \c AnalyzerArgs value which describes a file analyzer.
* @return true if the analyzer is active at the time of call, else false. * @return true if the analyzer is active at the time of call, else false.
*/ */
bool RemoveAnalyzer(const string& file_id, const RecordVal* args) const; bool RemoveAnalyzer(const string& file_id, file_analysis::Tag tag,
RecordVal* args) const;
/** /**
* Tells whether analysis for a file is active or ignored. * Tells whether analysis for a file is active or ignored.
@ -204,15 +209,7 @@ public:
* @param f The file analzer is to be associated with. * @param f The file analzer is to be associated with.
* @return The new analyzer instance or null if tag is invalid. * @return The new analyzer instance or null if tag is invalid.
*/ */
Analyzer* InstantiateAnalyzer(int tag, RecordVal* args, File* f) const; Analyzer* InstantiateAnalyzer(Tag tag, RecordVal* args, File* f) const;
/**
* Translates a script-level file analyzer tag in to corresponding file
* analyzer name.
* @param tag The enum val of a file analyzer.
* @return The human-readable name of the file analyzer.
*/
const char* GetAnalyzerName(int tag) const;
protected: protected:
friend class FileTimer; friend class FileTimer;
@ -247,7 +244,7 @@ protected:
* @return the File object mapped to \a file_id, or a null pointer if no * @return the File object mapped to \a file_id, or a null pointer if no
* mapping exists. * mapping exists.
*/ */
File* Lookup(const string& file_id) const; File* LookupFile(const string& file_id) const;
/** /**
* Evaluate timeout policy for a file and remove the File object mapped to * Evaluate timeout policy for a file and remove the File object mapped to
@ -287,20 +284,10 @@ protected:
static bool IsDisabled(analyzer::Tag tag); static bool IsDisabled(analyzer::Tag tag);
private: private:
typedef map<string, Component*> analyzer_map_by_name;
typedef map<analyzer::Tag, Component*> analyzer_map_by_tag;
typedef map<int, Component*> analyzer_map_by_val;
void RegisterAnalyzerComponent(Component* component);
IDMap id_map; /**< Map file ID to file_analysis::File records. */ IDMap id_map; /**< Map file ID to file_analysis::File records. */
IDSet ignored; /**< Ignored files. Will be finally removed on EOF. */ IDSet ignored; /**< Ignored files. Will be finally removed on EOF. */
string current_file_id; /**< Hash of what get_file_handle event sets. */ string current_file_id; /**< Hash of what get_file_handle event sets. */
EnumType* tag_enum_type; /**< File analyzer tag type. */
analyzer_map_by_name analyzers_by_name;
analyzer_map_by_tag analyzers_by_tag;
analyzer_map_by_val analyzers_by_val;
static TableVal* disabled; /**< Table of disabled analyzers. */ static TableVal* disabled; /**< Table of disabled analyzers. */
static string salt; /**< A salt added to file handles before hashing. */ static string salt; /**< A salt added to file handles before hashing. */

24
src/file_analysis/Tag.cc Normal file
View file

@ -0,0 +1,24 @@
// See the file "COPYING" in the main distribution directory for copyright.
#include "Tag.h"
#include "Manager.h"
using namespace file_analysis;
file_analysis::Tag file_analysis::Tag::Error;
file_analysis::Tag::Tag(type_t type, subtype_t subtype)
: ::Tag(file_mgr->GetTagEnumType(), type, subtype)
{
}
file_analysis::Tag& file_analysis::Tag::operator=(const file_analysis::Tag& other)
{
::Tag::operator=(other);
return *this;
}
EnumVal* file_analysis::Tag::AsEnumVal() const
{
return ::Tag::AsEnumVal(file_mgr->GetTagEnumType());
}

116
src/file_analysis/Tag.h Normal file
View file

@ -0,0 +1,116 @@
// See the file "COPYING" in the main distribution directory for copyright.
#ifndef FILE_ANALYZER_TAG_H
#define FILE_ANALYZER_TAG_H
#include "config.h"
#include "util.h"
#include "../Tag.h"
#include "plugin/TaggedComponent.h"
#include "plugin/ComponentManager.h"
class EnumVal;
namespace file_analysis {
class Component;
/**
* Class to identify a file analyzer type.
*
* The script-layer analogue is Files::Tag.
*/
class Tag : public ::Tag {
public:
/*
* Copy constructor.
*/
Tag(const Tag& other) : ::Tag(other) {}
/**
* Default constructor. This initializes the tag with an error value
* that will make \c operator \c bool return false.
*/
Tag() : ::Tag() {}
/**
* Destructor.
*/
~Tag() {}
/**
* Returns false if the tag represents an error value rather than a
* legal analyzer type.
* TODO: make this conversion operator "explicit" (C++11) or use a
* "safe bool" idiom (not necessary if "explicit" is available),
* otherwise this may allow nonsense/undesired comparison operations.
*
*/
operator bool() const { return *this != Tag(); }
/**
* Assignment operator.
*/
Tag& operator=(const Tag& other);
/**
* Compares two tags for equality.
*/
bool operator==(const Tag& other) const
{
return ::Tag::operator==(other);
}
/**
* Compares two tags for inequality.
*/
bool operator!=(const Tag& other) const
{
return ::Tag::operator!=(other);
}
/**
* Compares two tags for less-than relationship.
*/
bool operator<(const Tag& other) const
{
return ::Tag::operator<(other);
}
/**
* Returns the \c Files::Tag enum that corresponds to this tag.
* The returned value does not have its ref-count increased.
*
* @param etype the script-layer enum type associated with the tag.
*/
EnumVal* AsEnumVal() const;
static Tag Error;
protected:
friend class plugin::ComponentManager<Tag, Component>;
friend class plugin::TaggedComponent<Tag>;
/**
* Constructor.
*
* @param type The main type. Note that the \a file_analysis::Manager
* manages the value space internally, so noone else should assign
* main types.
*
* @param subtype The sub type, which is left to an analyzer for
* interpretation. By default it's set to zero.
*/
Tag(type_t type, subtype_t subtype = 0);
/**
* Constructor.
*
* @param val An enum value of script type \c Files::Tag.
*/
Tag(EnumVal* val) : ::Tag(val) {}
};
}
#endif

View file

@ -4,5 +4,5 @@ include_directories(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}
${CMAKE_CURRENT_BINARY_DIR}) ${CMAKE_CURRENT_BINARY_DIR})
bro_plugin_begin(Bro FileDataEvent) bro_plugin_begin(Bro FileDataEvent)
bro_plugin_cc(DataEvent.cc Plugin.cc) bro_plugin_cc(DataEvent.cc Plugin.cc ../../Analyzer.cc)
bro_plugin_end() bro_plugin_end()

View file

@ -6,12 +6,15 @@
#include "EventRegistry.h" #include "EventRegistry.h"
#include "Event.h" #include "Event.h"
#include "util.h" #include "util.h"
#include "file_analysis/Manager.h"
using namespace file_analysis; using namespace file_analysis;
DataEvent::DataEvent(RecordVal* args, File* file, DataEvent::DataEvent(RecordVal* args, File* file,
EventHandlerPtr ce, EventHandlerPtr se) EventHandlerPtr ce, EventHandlerPtr se)
: file_analysis::Analyzer(args, file), chunk_event(ce), stream_event(se) : file_analysis::Analyzer(file_mgr->GetComponentTag("DATA_EVENT"),
args, file),
chunk_event(ce), stream_event(se)
{ {
} }

View file

@ -4,5 +4,5 @@ include_directories(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}
${CMAKE_CURRENT_BINARY_DIR}) ${CMAKE_CURRENT_BINARY_DIR})
bro_plugin_begin(Bro FileExtract) bro_plugin_begin(Bro FileExtract)
bro_plugin_cc(Extract.cc Plugin.cc) bro_plugin_cc(Extract.cc Plugin.cc ../../Analyzer.cc)
bro_plugin_end() bro_plugin_end()

View file

@ -4,11 +4,13 @@
#include "Extract.h" #include "Extract.h"
#include "util.h" #include "util.h"
#include "file_analysis/Manager.h"
using namespace file_analysis; using namespace file_analysis;
Extract::Extract(RecordVal* args, File* file, const string& arg_filename) Extract::Extract(RecordVal* args, File* file, const string& arg_filename)
: file_analysis::Analyzer(args, file), filename(arg_filename) : file_analysis::Analyzer(file_mgr->GetComponentTag("EXTRACT"), args, file),
filename(arg_filename)
{ {
fd = open(filename.c_str(), O_WRONLY | O_CREAT | O_TRUNC, 0666); fd = open(filename.c_str(), O_WRONLY | O_CREAT | O_TRUNC, 0666);

View file

@ -4,6 +4,6 @@ include_directories(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}
${CMAKE_CURRENT_BINARY_DIR}) ${CMAKE_CURRENT_BINARY_DIR})
bro_plugin_begin(Bro FileHash) bro_plugin_begin(Bro FileHash)
bro_plugin_cc(Hash.cc Plugin.cc) bro_plugin_cc(Hash.cc Plugin.cc ../../Analyzer.cc)
bro_plugin_bif(events.bif) bro_plugin_bif(events.bif)
bro_plugin_end() bro_plugin_end()

View file

@ -5,11 +5,12 @@
#include "Hash.h" #include "Hash.h"
#include "util.h" #include "util.h"
#include "Event.h" #include "Event.h"
#include "file_analysis/Manager.h"
using namespace file_analysis; using namespace file_analysis;
Hash::Hash(RecordVal* args, File* file, HashVal* hv, const char* arg_kind) Hash::Hash(RecordVal* args, File* file, HashVal* hv, const char* arg_kind)
: file_analysis::Analyzer(args, file), hash(hv), fed(false), kind(arg_kind) : file_analysis::Analyzer(file_mgr->GetComponentTag(to_upper(arg_kind).c_str()), args, file), hash(hv), fed(false), kind(arg_kind)
{ {
hash->Init(); hash->Init();
} }

View file

@ -16,21 +16,23 @@ function Files::__set_timeout_interval%(file_id: string, t: interval%): bool
%} %}
## :bro:see:`Files::add_analyzer`. ## :bro:see:`Files::add_analyzer`.
function Files::__add_analyzer%(file_id: string, args: any%): bool function Files::__add_analyzer%(file_id: string, tag: Files::Tag, args: any%): bool
%{ %{
using BifType::Record::Files::AnalyzerArgs; using BifType::Record::Files::AnalyzerArgs;
RecordVal* rv = args->AsRecordVal()->CoerceTo(AnalyzerArgs); RecordVal* rv = args->AsRecordVal()->CoerceTo(AnalyzerArgs);
bool result = file_mgr->AddAnalyzer(file_id->CheckString(), rv); bool result = file_mgr->AddAnalyzer(file_id->CheckString(),
file_mgr->GetComponentTag(tag), rv);
Unref(rv); Unref(rv);
return new Val(result, TYPE_BOOL); return new Val(result, TYPE_BOOL);
%} %}
## :bro:see:`Files::remove_analyzer`. ## :bro:see:`Files::remove_analyzer`.
function Files::__remove_analyzer%(file_id: string, args: any%): bool function Files::__remove_analyzer%(file_id: string, tag: Files::Tag, args: any%): bool
%{ %{
using BifType::Record::Files::AnalyzerArgs; using BifType::Record::Files::AnalyzerArgs;
RecordVal* rv = args->AsRecordVal()->CoerceTo(AnalyzerArgs); RecordVal* rv = args->AsRecordVal()->CoerceTo(AnalyzerArgs);
bool result = file_mgr->RemoveAnalyzer(file_id->CheckString(), rv); bool result = file_mgr->RemoveAnalyzer(file_id->CheckString(),
file_mgr->GetComponentTag(tag) , rv);
Unref(rv); Unref(rv);
return new Val(result, TYPE_BOOL); return new Val(result, TYPE_BOOL);
%} %}
@ -45,7 +47,7 @@ function Files::__stop%(file_id: string%): bool
## :bro:see:`Files::analyzer_name`. ## :bro:see:`Files::analyzer_name`.
function Files::__analyzer_name%(tag: Files::Tag%) : string function Files::__analyzer_name%(tag: Files::Tag%) : string
%{ %{
return new StringVal(file_mgr->GetAnalyzerName(tag->InternalInt())); return new StringVal(file_mgr->GetComponentName(tag));
%} %}
module GLOBAL; module GLOBAL;

View file

@ -872,6 +872,7 @@ int main(int argc, char** argv)
if ( generate_documentation ) if ( generate_documentation )
{ {
CreateProtoAnalyzerDoc("proto-analyzers.rst"); CreateProtoAnalyzerDoc("proto-analyzers.rst");
CreateFileAnalyzerDoc("file-analyzers.rst");
std::list<BroDoc*>::iterator it; std::list<BroDoc*>::iterator it;

View file

@ -0,0 +1,248 @@
#ifndef PLUGIN_COMPONENT_MANAGER_H
#define PLUGIN_COMPONENT_MANAGER_H
#include <map>
#include <list>
#include <string>
#include "Type.h"
#include "ID.h"
#include "Var.h"
#include "Val.h"
#include "Reporter.h"
namespace plugin {
/**
* A class that manages tracking of plugin components (e.g. analyzers) and
* installs identifiers in the script-layer to identify them by a unique tag,
* (a script-layer enum value).
*
* @tparam T A ::Tag type or derivative.
* @tparam C A plugin::TaggedComponent type derivative.
*/
template <class T, class C>
class ComponentManager {
public:
/**
* Constructor creates a new enum type called a "Tag" to associate with
* a component.
*
* @param module The script-layer module in which to install the "Tag" ID
* representing an enum type.
*/
ComponentManager(const string& module);
/**
* @return The script-layer module in which the component's "Tag" ID lives.
*/
const char* GetModule() const;
/**
* @return A list of all registered components.
*/
list<C*> GetComponents() const;
/**
* @return The enum type associated with the script-layer "Tag".
*/
EnumType* GetTagEnumType() const;
/**
* Get a component name from its tag.
*
* @param tag A component's tag.
* @return The canonical component name.
*/
const char* GetComponentName(T tag) const;
/**
* Get a component name from it's enum value.
*
* @param val A component's enum value.
* @return The canonical component name.
*/
const char* GetComponentName(Val* val) const;
/**
* Get a component tag from its name.
*
* @param name A component's canonical name.
* @return The component's tag, or a tag representing an error if
* no such component assoicated with the name exists.
*/
T GetComponentTag(const string& name) const;
/**
* Get a component tag from its enum value.
*
* @param v A component's enum value.
* @return The component's tag, or a tag representing an error if
* no such component assoicated with the value exists.
*/
T GetComponentTag(Val* v) const;
protected:
/**
* Add a component the internal maps used to keep track of it and create
* a script-layer ID for the component's enum value.
*
* @param component A component to track.
* @param prefix The script-layer ID associated with the component's enum
* value will be a concatenation of this prefix and the component's
* canonical name.
*/
void RegisterComponent(C* component, const string& prefix = "");
/**
* @param name The canonical name of a component.
* @return The component associated with the name or a null pointer if no
* such component exists.
*/
C* Lookup(const string& name) const;
/**
* @param name A component tag.
* @return The component associated with the tag or a null pointer if no
* such component exists.
*/
C* Lookup(const T& tag) const;
/**
* @param name A component's enum value.
* @return The component associated with the value or a null pointer if no
* such component exists.
*/
C* Lookup(EnumVal* val) const;
private:
string module; /**< Script layer module in which component tags live. */
EnumType* tag_enum_type; /**< Enum type of component tags. */
map<string, C*> components_by_name;
map<T, C*> components_by_tag;
map<int, C*> components_by_val;
};
template <class T, class C>
ComponentManager<T, C>::ComponentManager(const string& arg_module)
: module(arg_module)
{
tag_enum_type = new EnumType(module + "::Tag");
::ID* id = install_ID("Tag", module.c_str(), true, true);
add_type(id, tag_enum_type, 0, 0);
}
template <class T, class C>
const char* ComponentManager<T, C>::GetModule() const
{
return module.c_str();
}
template <class T, class C>
list<C*> ComponentManager<T, C>::GetComponents() const
{
list<C*> rval;
typename map<T, C*>::const_iterator i;
for ( i = components_by_tag.begin(); i != components_by_tag.end(); ++i )
rval.push_back(i->second);
return rval;
}
template <class T, class C>
EnumType* ComponentManager<T, C>::GetTagEnumType() const
{
return tag_enum_type;
}
template <class T, class C>
const char* ComponentManager<T, C>::GetComponentName(T tag) const
{
static const char* error = "<error>";
if ( ! tag )
return error;
C* c = Lookup(tag);
if ( ! c )
reporter->InternalError("request for name of unknown component tag %s",
tag.AsString().c_str());
return c->CanonicalName();
}
template <class T, class C>
const char* ComponentManager<T, C>::GetComponentName(Val* val) const
{
return GetComponentName(T(val->AsEnumVal()));
}
template <class T, class C>
T ComponentManager<T, C>::GetComponentTag(const string& name) const
{
C* c = Lookup(name);
return c ? c->Tag() : T();
}
template <class T, class C>
T ComponentManager<T, C>::GetComponentTag(Val* v) const
{
C* c = Lookup(v->AsEnumVal());
return c ? c->Tag() : T();
}
template <class T, class C>
C* ComponentManager<T, C>::Lookup(const string& name) const
{
typename map<string, C*>::const_iterator i =
components_by_name.find(to_upper(name));
return i != components_by_name.end() ? i->second : 0;
}
template <class T, class C>
C* ComponentManager<T, C>::Lookup(const T& tag) const
{
typename map<T, C*>::const_iterator i = components_by_tag.find(tag);
return i != components_by_tag.end() ? i->second : 0;
}
template <class T, class C>
C* ComponentManager<T, C>::Lookup(EnumVal* val) const
{
typename map<int, C*>::const_iterator i =
components_by_val.find(val->InternalInt());
return i != components_by_val.end() ? i->second : 0;
}
template <class T, class C>
void ComponentManager<T, C>::RegisterComponent(C* component,
const string& prefix)
{
const char* cname = component->CanonicalName();
if ( Lookup(cname) )
reporter->FatalError("Component '%s::%s' defined more than once",
module.c_str(), cname);
DBG_LOG(DBG_PLUGINS, "Registering component %s (tag %s)",
component->Name(), component->Tag().AsString().c_str());
components_by_name.insert(std::make_pair(cname, component));
components_by_tag.insert(std::make_pair(component->Tag(), component));
components_by_val.insert(std::make_pair(
component->Tag().AsEnumVal()->InternalInt(), component));
// Install an identfier for enum value
string id = fmt("%s%s", prefix.c_str(), cname);
tag_enum_type->AddName(module, id.c_str(),
component->Tag().AsEnumVal()->InternalInt(), true);
}
} // namespace plugin
#endif

View file

@ -0,0 +1,85 @@
#ifndef PLUGIN_TAGGED_COMPONENT_H
#define PLUGIN_TAGGED_COMPONENT_H
namespace plugin {
/**
* A class which has a tag of a given type associated with it.
*
* @tparam T A ::Tag type or derivative.
*/
template <class T>
class TaggedComponent {
public:
/**
* Constructor creates a unique tag value for this component.
*
* @param subtype A subtype associated with this component that
* further distinguishes it. The subtype will be integrated into
* the Tag that the manager associates with this component,
* and component instances can accordingly access it via Tag().
* If not used, leave at zero.
*/
TaggedComponent(typename T::subtype_t subtype = 0);
/**
* Copy constructor.
*
* @param other Another component from which to copy its tag value.
*/
TaggedComponent(const TaggedComponent& other);
/**
* Assignment operator.
*
* @param other A component to assign.
* @return The assigned object.
*/
TaggedComponent& operator=(const TaggedComponent& other);
/**
* @return The component's tag.
*/
T Tag() const;
private:
T tag; /**< The automatically assigned analyzer tag. */
static typename T::type_t type_counter; /**< Used to generate globally
unique tags. */
};
template <class T>
TaggedComponent<T>::TaggedComponent(typename T::subtype_t subtype)
{
tag = T(++type_counter, subtype);
}
template <class T>
TaggedComponent<T>::TaggedComponent(const TaggedComponent<T>& other)
{
tag = other.tag;
}
template <class T>
TaggedComponent<T>&
TaggedComponent<T>::operator =(const TaggedComponent<T>& other)
{
if ( &other != this )
tag = other.tag;
return *this;
}
template <class T>
T TaggedComponent<T>::Tag() const
{
return tag;
}
template <class T> typename T::type_t TaggedComponent<T>::type_counter(0);
} // namespace plugin
#endif

View file

@ -1,10 +1,12 @@
// See the file "COPYING" in the main distribution directory for copyright. // See the file "COPYING" in the main distribution directory for copyright.
#include "BitVector.h" #include <openssl/sha.h>
#include <cassert> #include <cassert>
#include <limits> #include <limits>
#include "BitVector.h"
#include "Serializer.h" #include "Serializer.h"
#include "digest.h"
using namespace probabilistic; using namespace probabilistic;
@ -490,6 +492,21 @@ BitVector::size_type BitVector::FindNext(size_type i) const
return block ? bi * bits_per_block + lowest_bit(block) : find_from(bi + 1); return block ? bi * bits_per_block + lowest_bit(block) : find_from(bi + 1);
} }
size_t BitVector::Hash() const
{
size_t hash = 0;
u_char buf[SHA256_DIGEST_LENGTH];
SHA256_CTX ctx;
sha256_init(&ctx);
for ( size_type i = 0; i < Blocks(); ++i )
sha256_update(&ctx, &bits[i], sizeof(bits[i]));
sha256_final(&ctx, buf);
return *reinterpret_cast<size_t*>(buf); // Use the first bytes as seed.
}
BitVector::size_type BitVector::lowest_bit(block_type block) BitVector::size_type BitVector::lowest_bit(block_type block)
{ {
block_type x = block - (block & (block - 1)); block_type x = block - (block & (block - 1));

View file

@ -276,6 +276,13 @@ public:
*/ */
size_type FindNext(size_type i) const; size_type FindNext(size_type i) const;
/** Computes a hash value of the internal representation.
* This is mainly for debugging/testing purposes.
*
* @return The hash.
*/
size_t Hash() const;
/** /**
* Serializes the bit vector. * Serializes the bit vector.
* *

View file

@ -9,6 +9,8 @@
#include "CounterVector.h" #include "CounterVector.h"
#include "Serializer.h" #include "Serializer.h"
#include "../util.h"
using namespace probabilistic; using namespace probabilistic;
BloomFilter::BloomFilter() BloomFilter::BloomFilter()
@ -40,28 +42,15 @@ bool BloomFilter::DoSerialize(SerialInfo* info) const
{ {
DO_SERIALIZE(SER_BLOOMFILTER, SerialObj); DO_SERIALIZE(SER_BLOOMFILTER, SerialObj);
if ( ! SERIALIZE(static_cast<uint16>(hasher->K())) ) return hasher->Serialize(info);
return false;
return SERIALIZE_STR(hasher->Name().c_str(), hasher->Name().size());
} }
bool BloomFilter::DoUnserialize(UnserialInfo* info) bool BloomFilter::DoUnserialize(UnserialInfo* info)
{ {
DO_UNSERIALIZE(SerialObj); DO_UNSERIALIZE(SerialObj);
uint16 k; hasher = Hasher::Unserialize(info);
if ( ! UNSERIALIZE(&k) ) return hasher != 0;
return false;
const char* name;
if ( ! UNSERIALIZE_STR(&name, 0) )
return false;
hasher = Hasher::Create(k, name);
delete [] name;
return true;
} }
size_t BasicBloomFilter::M(double fp, size_t capacity) size_t BasicBloomFilter::M(double fp, size_t capacity)
@ -120,6 +109,11 @@ BasicBloomFilter* BasicBloomFilter::Clone() const
return copy; return copy;
} }
std::string BasicBloomFilter::InternalState() const
{
return fmt("%" PRIu64, (uint64_t)bits->Hash());
}
BasicBloomFilter::BasicBloomFilter() BasicBloomFilter::BasicBloomFilter()
{ {
bits = 0; bits = 0;
@ -146,14 +140,18 @@ bool BasicBloomFilter::DoUnserialize(UnserialInfo* info)
return (bits != 0); return (bits != 0);
} }
void BasicBloomFilter::AddImpl(const Hasher::digest_vector& h) void BasicBloomFilter::Add(const HashKey* key)
{ {
Hasher::digest_vector h = hasher->Hash(key);
for ( size_t i = 0; i < h.size(); ++i ) for ( size_t i = 0; i < h.size(); ++i )
bits->Set(h[i] % bits->Size()); bits->Set(h[i] % bits->Size());
} }
size_t BasicBloomFilter::CountImpl(const Hasher::digest_vector& h) const size_t BasicBloomFilter::Count(const HashKey* key) const
{ {
Hasher::digest_vector h = hasher->Hash(key);
for ( size_t i = 0; i < h.size(); ++i ) for ( size_t i = 0; i < h.size(); ++i )
{ {
if ( ! (*bits)[h[i] % bits->Size()] ) if ( ! (*bits)[h[i] % bits->Size()] )
@ -219,6 +217,11 @@ CountingBloomFilter* CountingBloomFilter::Clone() const
return copy; return copy;
} }
string CountingBloomFilter::InternalState() const
{
return fmt("%" PRIu64, (uint64_t)cells->Hash());
}
IMPLEMENT_SERIAL(CountingBloomFilter, SER_COUNTINGBLOOMFILTER) IMPLEMENT_SERIAL(CountingBloomFilter, SER_COUNTINGBLOOMFILTER)
bool CountingBloomFilter::DoSerialize(SerialInfo* info) const bool CountingBloomFilter::DoSerialize(SerialInfo* info) const
@ -235,14 +238,18 @@ bool CountingBloomFilter::DoUnserialize(UnserialInfo* info)
} }
// TODO: Use partitioning in add/count to allow for reusing CMS bounds. // TODO: Use partitioning in add/count to allow for reusing CMS bounds.
void CountingBloomFilter::AddImpl(const Hasher::digest_vector& h) void CountingBloomFilter::Add(const HashKey* key)
{ {
Hasher::digest_vector h = hasher->Hash(key);
for ( size_t i = 0; i < h.size(); ++i ) for ( size_t i = 0; i < h.size(); ++i )
cells->Increment(h[i] % cells->Size()); cells->Increment(h[i] % cells->Size());
} }
size_t CountingBloomFilter::CountImpl(const Hasher::digest_vector& h) const size_t CountingBloomFilter::Count(const HashKey* key) const
{ {
Hasher::digest_vector h = hasher->Hash(key);
CounterVector::size_type min = CounterVector::size_type min =
std::numeric_limits<CounterVector::size_type>::max(); std::numeric_limits<CounterVector::size_type>::max();

View file

@ -13,9 +13,6 @@ class CounterVector;
/** /**
* The abstract base class for Bloom filters. * The abstract base class for Bloom filters.
*
* At this point we won't let the user choose the hasher, but we might open
* up the interface in the future.
*/ */
class BloomFilter : public SerialObj { class BloomFilter : public SerialObj {
public: public:
@ -25,27 +22,20 @@ public:
virtual ~BloomFilter(); virtual ~BloomFilter();
/** /**
* Adds an element of type T to the Bloom filter. * Adds an element to the Bloom filter.
* @param x The element to add *
* @param key The key associated with the element to add.
*/ */
template <typename T> virtual void Add(const HashKey* key) = 0;
void Add(const T& x)
{
AddImpl((*hasher)(x));
}
/** /**
* Retrieves the associated count of a given value. * Retrieves the associated count of a given value.
* *
* @param x The value of type `T` to check. * @param key The key associated with the element to check.
* *
* @return The counter associated with *x*. * @return The counter associated with *key*.
*/ */
template <typename T> virtual size_t Count(const HashKey* key) const = 0;
size_t Count(const T& x) const
{
return CountImpl((*hasher)(x));
}
/** /**
* Checks whether the Bloom filter is empty. * Checks whether the Bloom filter is empty.
@ -75,6 +65,12 @@ public:
*/ */
virtual BloomFilter* Clone() const = 0; virtual BloomFilter* Clone() const = 0;
/**
* Returns a string with a representation of the Bloom filter's
* internal state. This is for debugging/testing purposes only.
*/
virtual string InternalState() const = 0;
/** /**
* Serializes the Bloom filter. * Serializes the Bloom filter.
* *
@ -109,25 +105,6 @@ protected:
*/ */
BloomFilter(const Hasher* hasher); BloomFilter(const Hasher* hasher);
/**
* Abstract method for implementinng the *Add* operation.
*
* @param hashes A set of *k* hashes for the item to add, computed by
* the internal hasher object.
*
*/
virtual void AddImpl(const Hasher::digest_vector& hashes) = 0;
/**
* Abstract method for implementing the *Count* operation.
*
* @param hashes A set of *k* hashes for the item to add, computed by
* the internal hasher object.
*
* @return Returns the counter associated with the hashed element.
*/
virtual size_t CountImpl(const Hasher::digest_vector& hashes) const = 0;
const Hasher* hasher; const Hasher* hasher;
}; };
@ -180,6 +157,7 @@ public:
virtual void Clear(); virtual void Clear();
virtual bool Merge(const BloomFilter* other); virtual bool Merge(const BloomFilter* other);
virtual BasicBloomFilter* Clone() const; virtual BasicBloomFilter* Clone() const;
virtual string InternalState() const;
protected: protected:
DECLARE_SERIAL(BasicBloomFilter); DECLARE_SERIAL(BasicBloomFilter);
@ -190,8 +168,8 @@ protected:
BasicBloomFilter(); BasicBloomFilter();
// Overridden from BloomFilter. // Overridden from BloomFilter.
virtual void AddImpl(const Hasher::digest_vector& h); virtual void Add(const HashKey* key);
virtual size_t CountImpl(const Hasher::digest_vector& h) const; virtual size_t Count(const HashKey* key) const;
private: private:
BitVector* bits; BitVector* bits;
@ -219,6 +197,7 @@ public:
virtual void Clear(); virtual void Clear();
virtual bool Merge(const BloomFilter* other); virtual bool Merge(const BloomFilter* other);
virtual CountingBloomFilter* Clone() const; virtual CountingBloomFilter* Clone() const;
virtual string InternalState() const;
protected: protected:
DECLARE_SERIAL(CountingBloomFilter); DECLARE_SERIAL(CountingBloomFilter);
@ -229,8 +208,8 @@ protected:
CountingBloomFilter(); CountingBloomFilter();
// Overridden from BloomFilter. // Overridden from BloomFilter.
virtual void AddImpl(const Hasher::digest_vector& h); virtual void Add(const HashKey* key);
virtual size_t CountImpl(const Hasher::digest_vector& h) const; virtual size_t Count(const HashKey* key) const;
private: private:
CounterVector* cells; CounterVector* cells;

View file

@ -10,9 +10,11 @@ set(probabilistic_SRCS
BitVector.cc BitVector.cc
BloomFilter.cc BloomFilter.cc
CounterVector.cc CounterVector.cc
Hasher.cc) Hasher.cc
Topk.cc)
bif_target(bloom-filter.bif) bif_target(bloom-filter.bif)
bif_target(top-k.bif)
bro_add_subdir_library(probabilistic ${probabilistic_SRCS}) bro_add_subdir_library(probabilistic ${probabilistic_SRCS})
add_dependencies(bro_probabilistic generate_outputs) add_dependencies(bro_probabilistic generate_outputs)

View file

@ -153,6 +153,11 @@ CounterVector operator|(const CounterVector& x, const CounterVector& y)
} }
size_t CounterVector::Hash() const
{
return bits->Hash();
}
bool CounterVector::Serialize(SerialInfo* info) const bool CounterVector::Serialize(SerialInfo* info) const
{ {
return SerialObj::Serialize(info); return SerialObj::Serialize(info);

View file

@ -126,6 +126,13 @@ public:
*/ */
CounterVector& operator|=(const CounterVector& other); CounterVector& operator|=(const CounterVector& other);
/** Computes a hash value of the internal representation.
* This is mainly for debugging/testing purposes.
*
* @return The hash.
*/
size_t Hash() const;
/** /**
* Serializes the bit vector. * Serializes the bit vector.
* *

View file

@ -1,61 +1,119 @@
// See the file "COPYING" in the main distribution directory for copyright. // See the file "COPYING" in the main distribution directory for copyright.
#include <typeinfo> #include <typeinfo>
#include <openssl/md5.h>
#include "Hasher.h" #include "Hasher.h"
#include "NetVar.h"
#include "digest.h" #include "digest.h"
#include "Serializer.h"
using namespace probabilistic; using namespace probabilistic;
UHF::UHF(size_t seed, const std::string& extra) size_t Hasher::MakeSeed(const void* data, size_t size)
: h(compute_seed(seed, extra))
{
}
Hasher::digest UHF::hash(const void* x, size_t n) const
{
assert(n <= UHASH_KEY_SIZE);
return n == 0 ? 0 : h(x, n);
}
size_t UHF::compute_seed(size_t seed, const std::string& extra)
{ {
u_char buf[SHA256_DIGEST_LENGTH]; u_char buf[SHA256_DIGEST_LENGTH];
SHA256_CTX ctx; SHA256_CTX ctx;
sha256_init(&ctx); sha256_init(&ctx);
if ( extra.empty() ) if ( data )
sha256_update(&ctx, data, size);
else if ( global_hash_seed && global_hash_seed->Len() > 0 )
sha256_update(&ctx, global_hash_seed->Bytes(), global_hash_seed->Len());
else
{ {
unsigned int first_seed = initial_seed(); unsigned int first_seed = initial_seed();
sha256_update(&ctx, &first_seed, sizeof(first_seed)); sha256_update(&ctx, &first_seed, sizeof(first_seed));
} }
else
sha256_update(&ctx, extra.c_str(), extra.size());
sha256_update(&ctx, &seed, sizeof(seed));
sha256_final(&ctx, buf); sha256_final(&ctx, buf);
return *reinterpret_cast<size_t*>(buf); // Use the first bytes as seed.
// Take the first sizeof(size_t) bytes as seed.
return *reinterpret_cast<size_t*>(buf);
} }
Hasher* Hasher::Create(size_t k, const std::string& name) Hasher::digest_vector Hasher::Hash(const HashKey* key) const
{ {
return new DefaultHasher(k, name); return Hash(key->Key(), key->Size());
} }
Hasher::Hasher(size_t k, const std::string& arg_name) bool Hasher::Serialize(SerialInfo* info) const
: k(k)
{ {
name = arg_name; return SerialObj::Serialize(info);
} }
DefaultHasher::DefaultHasher(size_t k, const std::string& name) Hasher* Hasher::Unserialize(UnserialInfo* info)
: Hasher(k, name)
{ {
for ( size_t i = 0; i < k; ++i ) return reinterpret_cast<Hasher*>(SerialObj::Unserialize(info, SER_HASHER));
hash_functions.push_back(UHF(i, name)); }
bool Hasher::DoSerialize(SerialInfo* info) const
{
DO_SERIALIZE(SER_HASHER, SerialObj);
if ( ! SERIALIZE(static_cast<uint16>(k)) )
return false;
return SERIALIZE(static_cast<uint64>(seed));
}
bool Hasher::DoUnserialize(UnserialInfo* info)
{
DO_UNSERIALIZE(SerialObj);
uint16 serial_k;
if ( ! UNSERIALIZE(&serial_k) )
return false;
k = serial_k;
assert(k > 0);
uint64 serial_seed;
if ( ! UNSERIALIZE(&serial_seed) )
return false;
seed = serial_seed;
return true;
}
Hasher::Hasher(size_t arg_k, size_t arg_seed)
{
k = arg_k;
seed = arg_seed;
}
UHF::UHF(size_t arg_seed)
: h(arg_seed)
{
seed = arg_seed;
}
// This function is almost equivalent to HashKey::HashBytes except that it
// does not depend on global state and that we mix in the seed multiple
// times.
Hasher::digest UHF::hash(const void* x, size_t n) const
{
if ( n <= UHASH_KEY_SIZE )
return n == 0 ? 0 : h(x, n);
unsigned char d[16];
MD5(reinterpret_cast<const unsigned char*>(x), n, d);
const unsigned char* s = reinterpret_cast<const unsigned char*>(&seed);
for ( size_t i = 0; i < 16; ++i )
d[i] ^= s[i % sizeof(seed)];
MD5(d, 16, d);
return d[0];
}
DefaultHasher::DefaultHasher(size_t k, size_t seed)
: Hasher(k, seed)
{
for ( size_t i = 1; i <= k; ++i )
hash_functions.push_back(UHF(Seed() + bro_prng(i)));
} }
Hasher::digest_vector DefaultHasher::Hash(const void* x, size_t n) const Hasher::digest_vector DefaultHasher::Hash(const void* x, size_t n) const
@ -82,8 +140,29 @@ bool DefaultHasher::Equals(const Hasher* other) const
return hash_functions == o->hash_functions; return hash_functions == o->hash_functions;
} }
DoubleHasher::DoubleHasher(size_t k, const std::string& name) IMPLEMENT_SERIAL(DefaultHasher, SER_DEFAULTHASHER)
: Hasher(k, name), h1(1, name), h2(2, name)
bool DefaultHasher::DoSerialize(SerialInfo* info) const
{
DO_SERIALIZE(SER_DEFAULTHASHER, Hasher);
// Nothing to do here, the base class has all we need serialized already.
return true;
}
bool DefaultHasher::DoUnserialize(UnserialInfo* info)
{
DO_UNSERIALIZE(Hasher);
hash_functions.clear();
for ( size_t i = 0; i < K(); ++i )
hash_functions.push_back(UHF(Seed() + bro_prng(i)));
return true;
}
DoubleHasher::DoubleHasher(size_t k, size_t seed)
: Hasher(k, seed), h1(seed + bro_prng(1)), h2(seed + bro_prng(2))
{ {
} }
@ -112,3 +191,23 @@ bool DoubleHasher::Equals(const Hasher* other) const
const DoubleHasher* o = static_cast<const DoubleHasher*>(other); const DoubleHasher* o = static_cast<const DoubleHasher*>(other);
return h1 == o->h1 && h2 == o->h2; return h1 == o->h1 && h2 == o->h2;
} }
IMPLEMENT_SERIAL(DoubleHasher, SER_DOUBLEHASHER)
bool DoubleHasher::DoSerialize(SerialInfo* info) const
{
DO_SERIALIZE(SER_DOUBLEHASHER, Hasher);
// Nothing to do here, the base class has all we need serialized already.
return true;
}
bool DoubleHasher::DoUnserialize(UnserialInfo* info)
{
DO_UNSERIALIZE(Hasher);
h1 = UHF(Seed() + bro_prng(1));
h2 = UHF(Seed() + bro_prng(2));
return true;
}

View file

@ -5,6 +5,7 @@
#include "Hash.h" #include "Hash.h"
#include "H3.h" #include "H3.h"
#include "SerialObj.h"
namespace probabilistic { namespace probabilistic {
@ -12,11 +13,25 @@ namespace probabilistic {
* Abstract base class for hashers. A hasher creates a family of hash * Abstract base class for hashers. A hasher creates a family of hash
* functions to hash an element *k* times. * functions to hash an element *k* times.
*/ */
class Hasher { class Hasher : public SerialObj {
public: public:
typedef hash_t digest; typedef hash_t digest;
typedef std::vector<digest> digest_vector; typedef std::vector<digest> digest_vector;
/**
* Creates a valid hasher seed from an arbitrary string.
*
* @param data A pointer to contiguous data that should be crunched into a
* seed. If 0, the function tries to find a global_hash_seed script variable
* to derive a seed from. If this variable does not exist, the function uses
* the initial seed generated at Bro startup.
*
* @param size The number of bytes of *data*.
*
* @return A seed suitable for hashers.
*/
static size_t MakeSeed(const void* data, size_t size);
/** /**
* Destructor. * Destructor.
*/ */
@ -35,6 +50,15 @@ public:
return Hash(&x, sizeof(T)); return Hash(&x, sizeof(T));
} }
/**
* Computes hash values for an element.
*
* @param x The key of the value to hash.
*
* @return Vector of *k* hash values.
*/
digest_vector Hash(const HashKey* key) const;
/** /**
* Computes the hashes for a set of bytes. * Computes the hashes for a set of bytes.
* *
@ -63,38 +87,30 @@ public:
size_t K() const { return k; } size_t K() const { return k; }
/** /**
* Returns the hasher's name. TODO: What's this? * Returns the seed used to construct the hasher.
*/ */
const std::string& Name() const { return name; } size_t Seed() const { return seed; }
/** bool Serialize(SerialInfo* info) const;
* Constructs the hasher used by the implementation. This hardcodes a static Hasher* Unserialize(UnserialInfo* info);
* specific hashing policy. It exists only because the HashingPolicy
* class hierachy is not yet serializable.
*
* @param k The number of hash functions to apply.
*
* @param name The hasher's name. Hashers with the same name should
* provide consistent results.
*
* @return Returns a new hasher instance.
*/
static Hasher* Create(size_t k, const std::string& name);
protected: protected:
DECLARE_ABSTRACT_SERIAL(Hasher);
Hasher() { }
/** /**
* Constructor. * Constructor.
* *
* @param k the number of hash functions. * @param arg_k the number of hash functions.
* *
* @param name A name for the hasher. Hashers with the same name * @param arg_seed The seed for the hasher.
* should provide consistent results.
*/ */
Hasher(size_t k, const std::string& name); Hasher(size_t arg_k, size_t arg_seed);
private: private:
const size_t k; size_t k;
std::string name; size_t seed;
}; };
/** /**
@ -107,13 +123,9 @@ public:
* Constructs an H3 hash function seeded with a given seed and an * Constructs an H3 hash function seeded with a given seed and an
* optional extra seed to replace the initial Bro seed. * optional extra seed to replace the initial Bro seed.
* *
* @param seed The seed to use for this instance. * @param arg_seed The seed to use for this instance.
*
* @param extra If not empty, this parameter replaces the initial
* seed to compute the seed for t to compute the seed NUL-terminated
* string as additional seed.
*/ */
UHF(size_t seed, const std::string& extra = ""); UHF(size_t arg_seed = 0);
template <typename T> template <typename T>
Hasher::digest operator()(const T& x) const Hasher::digest operator()(const T& x) const
@ -156,9 +168,10 @@ public:
} }
private: private:
static size_t compute_seed(size_t seed, const std::string& extra); static size_t compute_seed(size_t seed);
H3<Hasher::digest, UHASH_KEY_SIZE> h; H3<Hasher::digest, UHASH_KEY_SIZE> h;
size_t seed;
}; };
@ -173,16 +186,20 @@ public:
* *
* @param k The number of hash functions to use. * @param k The number of hash functions to use.
* *
* @param name The name of the hasher. * @param seed The seed for the hasher.
*/ */
DefaultHasher(size_t k, const std::string& name); DefaultHasher(size_t k, size_t seed);
// Overridden from Hasher. // Overridden from Hasher.
virtual digest_vector Hash(const void* x, size_t n) const /* final */; virtual digest_vector Hash(const void* x, size_t n) const /* final */;
virtual DefaultHasher* Clone() const /* final */; virtual DefaultHasher* Clone() const /* final */;
virtual bool Equals(const Hasher* other) const /* final */; virtual bool Equals(const Hasher* other) const /* final */;
DECLARE_SERIAL(DefaultHasher);
private: private:
DefaultHasher() { }
std::vector<UHF> hash_functions; std::vector<UHF> hash_functions;
}; };
@ -197,16 +214,20 @@ public:
* *
* @param k The number of hash functions to use. * @param k The number of hash functions to use.
* *
* @param name The name of the hasher. * @param seed The seed for the hasher.
*/ */
DoubleHasher(size_t k, const std::string& name); DoubleHasher(size_t k, size_t seed);
// Overridden from Hasher. // Overridden from Hasher.
virtual digest_vector Hash(const void* x, size_t n) const /* final */; virtual digest_vector Hash(const void* x, size_t n) const /* final */;
virtual DoubleHasher* Clone() const /* final */; virtual DoubleHasher* Clone() const /* final */;
virtual bool Equals(const Hasher* other) const /* final */; virtual bool Equals(const Hasher* other) const /* final */;
DECLARE_SERIAL(DoubleHasher);
private: private:
DoubleHasher() { }
UHF h1; UHF h1;
UHF h2; UHF h2;
}; };

499
src/probabilistic/Topk.cc Normal file
View file

@ -0,0 +1,499 @@
// See the file "COPYING" in the main distribution directory for copyright.
#include "probabilistic/Topk.h"
#include "CompHash.h"
#include "Reporter.h"
#include "Serializer.h"
#include "NetVar.h"
namespace probabilistic {
IMPLEMENT_SERIAL(TopkVal, SER_TOPK_VAL);
static void topk_element_hash_delete_func(void* val)
{
Element* e = (Element*) val;
delete e;
}
Element::~Element()
{
Unref(value);
}
void TopkVal::Typify(BroType* t)
{
assert(!hash && !type);
type = t->Ref();
TypeList* tl = new TypeList(t);
tl->Append(t->Ref());
hash = new CompositeHash(tl);
Unref(tl);
}
HashKey* TopkVal::GetHash(Val* v) const
{
HashKey* key = hash->ComputeHash(v, 1);
assert(key);
return key;
}
TopkVal::TopkVal(uint64 arg_size) : OpaqueVal(topk_type)
{
elementDict = new PDict(Element);
elementDict->SetDeleteFunc(topk_element_hash_delete_func);
size = arg_size;
type = 0;
numElements = 0;
pruned = false;
hash = 0;
}
TopkVal::TopkVal() : OpaqueVal(topk_type)
{
elementDict = new PDict(Element);
elementDict->SetDeleteFunc(topk_element_hash_delete_func);
size = 0;
type = 0;
numElements = 0;
hash = 0;
}
TopkVal::~TopkVal()
{
elementDict->Clear();
delete elementDict;
// now all elements are already gone - delete the buckets
std::list<Bucket*>::iterator bi = buckets.begin();
while ( bi != buckets.end() )
{
delete *bi;
bi++;
}
Unref(type);
delete hash;
}
void TopkVal::Merge(const TopkVal* value, bool doPrune)
{
if ( type == 0 )
{
assert(numElements == 0);
Typify(value->type);
}
else
{
if ( ! same_type(type, value->type) )
{
reporter->Error("Cannot merge top-k elements of differing types.");
return;
}
}
std::list<Bucket*>::const_iterator it = value->buckets.begin();
while ( it != value->buckets.end() )
{
Bucket* b = *it;
uint64_t currcount = b->count;
std::list<Element*>::const_iterator eit = b->elements.begin();
while ( eit != b->elements.end() )
{
Element* e = *eit;
// lookup if we already know this one...
HashKey* key = GetHash(e->value);
Element* olde = (Element*) elementDict->Lookup(key);
if ( olde == 0 )
{
olde = new Element();
olde->epsilon = 0;
olde->value = e->value->Ref();
// insert at bucket position 0
if ( buckets.size() > 0 )
{
assert (buckets.front()-> count > 0 );
}
Bucket* newbucket = new Bucket();
newbucket->count = 0;
newbucket->bucketPos = buckets.insert(buckets.begin(), newbucket);
olde->parent = newbucket;
newbucket->elements.insert(newbucket->elements.end(), olde);
elementDict->Insert(key, olde);
numElements++;
}
// now that we are sure that the old element is present - increment epsilon
olde->epsilon += e->epsilon;
// and increment position...
IncrementCounter(olde, currcount);
delete key;
eit++;
}
it++;
}
// now we have added everything. And our top-k table could be too big.
// prune everything...
assert(size > 0);
if ( ! doPrune )
return;
while ( numElements > size )
{
pruned = true;
assert(buckets.size() > 0 );
Bucket* b = buckets.front();
assert(b->elements.size() > 0);
Element* e = b->elements.front();
HashKey* key = GetHash(e->value);
elementDict->RemoveEntry(key);
delete e;
b->elements.pop_front();
if ( b->elements.size() == 0 )
{
delete b;
buckets.pop_front();
}
numElements--;
}
}
bool TopkVal::DoSerialize(SerialInfo* info) const
{
DO_SERIALIZE(SER_TOPK_VAL, OpaqueVal);
bool v = true;
v &= SERIALIZE(size);
v &= SERIALIZE(numElements);
v &= SERIALIZE(pruned);
bool type_present = (type != 0);
v &= SERIALIZE(type_present);
if ( type_present )
v &= type->Serialize(info);
else
assert(numElements == 0);
uint64_t i = 0;
std::list<Bucket*>::const_iterator it = buckets.begin();
while ( it != buckets.end() )
{
Bucket* b = *it;
uint32_t elements_count = b->elements.size();
v &= SERIALIZE(elements_count);
v &= SERIALIZE(b->count);
std::list<Element*>::const_iterator eit = b->elements.begin();
while ( eit != b->elements.end() )
{
Element* element = *eit;
v &= SERIALIZE(element->epsilon);
v &= element->value->Serialize(info);
eit++;
i++;
}
it++;
}
assert(i == numElements);
return v;
}
bool TopkVal::DoUnserialize(UnserialInfo* info)
{
DO_UNSERIALIZE(OpaqueVal);
bool v = true;
v &= UNSERIALIZE(&size);
v &= UNSERIALIZE(&numElements);
v &= UNSERIALIZE(&pruned);
bool type_present = false;
v &= UNSERIALIZE(&type_present);
if ( type_present )
{
BroType* deserialized_type = BroType::Unserialize(info);
Typify(deserialized_type);
Unref(deserialized_type);
assert(type);
}
else
assert(numElements == 0);
uint64_t i = 0;
while ( i < numElements )
{
Bucket* b = new Bucket();
uint32_t elements_count;
v &= UNSERIALIZE(&elements_count);
v &= UNSERIALIZE(&b->count);
b->bucketPos = buckets.insert(buckets.end(), b);
for ( uint64_t j = 0; j < elements_count; j++ )
{
Element* e = new Element();
v &= UNSERIALIZE(&e->epsilon);
e->value = Val::Unserialize(info, type);
e->parent = b;
b->elements.insert(b->elements.end(), e);
HashKey* key = GetHash(e->value);
assert (elementDict->Lookup(key) == 0);
elementDict->Insert(key, e);
delete key;
i++;
}
}
assert(i == numElements);
return v;
}
VectorVal* TopkVal::GetTopK(int k) const // returns vector
{
if ( numElements == 0 )
{
reporter->Error("Cannot return topk of empty");
return 0;
}
TypeList* vector_index = new TypeList(type);
vector_index->Append(type->Ref());
VectorType* v = new VectorType(vector_index);
VectorVal* t = new VectorVal(v);
// this does no estimation if the results is correct!
// in any case - just to make this future-proof (and I am lazy) - this can return more than k.
int read = 0;
std::list<Bucket*>::const_iterator it = buckets.end();
it--;
while (read < k )
{
//printf("Bucket %llu\n", (*it)->count);
std::list<Element*>::iterator eit = (*it)->elements.begin();
while ( eit != (*it)->elements.end() )
{
//printf("Size: %ld\n", (*it)->elements.size());
t->Assign(read, (*eit)->value->Ref());
read++;
eit++;
}
if ( it == buckets.begin() )
break;
it--;
}
Unref(v);
return t;
}
uint64_t TopkVal::GetCount(Val* value) const
{
HashKey* key = GetHash(value);
Element* e = (Element*) elementDict->Lookup(key);
if ( e == 0 )
{
reporter->Error("GetCount for element that is not in top-k");
return 0;
}
delete key;
return e->parent->count;
}
uint64_t TopkVal::GetEpsilon(Val* value) const
{
HashKey* key = GetHash(value);
Element* e = (Element*) elementDict->Lookup(key);
if ( e == 0 )
{
reporter->Error("GetEpsilon for element that is not in top-k");
return 0;
}
delete key;
return e->epsilon;
}
uint64_t TopkVal::GetSum() const
{
uint64_t sum = 0;
std::list<Bucket*>::const_iterator it = buckets.begin();
while ( it != buckets.end() )
{
sum += (*it)->elements.size() * (*it)->count;
it++;
}
if ( pruned )
reporter->Warning("TopkVal::GetSum() was used on a pruned data structure. Result values do not represent total element count");
return sum;
}
void TopkVal::Encountered(Val* encountered)
{
// ok, let's see if we already know this one.
if ( numElements == 0 )
Typify(encountered->Type());
else
if ( ! same_type(type, encountered->Type()) )
{
reporter->Error("Trying to add element to topk with differing type from other elements");
return;
}
// Step 1 - get the hash.
HashKey* key = GetHash(encountered);
Element* e = (Element*) elementDict->Lookup(key);
if ( e == 0 )
{
e = new Element();
e->epsilon = 0;
e->value = encountered->Ref(); // or no ref?
// well, we do not know this one yet...
if ( numElements < size )
{
// brilliant. just add it at position 1
if ( buckets.size() == 0 || (*buckets.begin())->count > 1 )
{
Bucket* b = new Bucket();
b->count = 1;
std::list<Bucket*>::iterator pos = buckets.insert(buckets.begin(), b);
b->bucketPos = pos;
b->elements.insert(b->elements.end(), e);
e->parent = b;
}
else
{
Bucket* b = *buckets.begin();
assert(b->count == 1);
b->elements.insert(b->elements.end(), e);
e->parent = b;
}
elementDict->Insert(key, e);
numElements++;
delete key;
return; // done. it is at pos 1.
}
else
{
// replace element with min-value
Bucket* b = *buckets.begin(); // bucket with smallest elements
// evict oldest element with least hits.
assert(b->elements.size() > 0);
HashKey* deleteKey = GetHash((*(b->elements.begin()))->value);
b->elements.erase(b->elements.begin());
Element* deleteElement = (Element*) elementDict->RemoveEntry(deleteKey);
assert(deleteElement); // there has to have been a minimal element...
delete deleteElement;
delete deleteKey;
// and add the new one to the end
e->epsilon = b->count;
b->elements.insert(b->elements.end(), e);
elementDict->Insert(key, e);
e->parent = b;
// fallthrough, increment operation has to run!
}
}
// ok, we now have an element in e
delete key;
IncrementCounter(e); // well, this certainly was anticlimatic.
}
// increment by count
void TopkVal::IncrementCounter(Element* e, unsigned int count)
{
Bucket* currBucket = e->parent;
uint64 currcount = currBucket->count;
// well, let's test if there is a bucket for currcount++
std::list<Bucket*>::iterator bucketIter = currBucket->bucketPos;
Bucket* nextBucket = 0;
bucketIter++;
while ( bucketIter != buckets.end() && (*bucketIter)->count < currcount+count )
bucketIter++;
if ( bucketIter != buckets.end() && (*bucketIter)->count == currcount+count )
nextBucket = *bucketIter;
if ( nextBucket == 0 )
{
// the bucket for the value that we want does not exist.
// create it...
Bucket* b = new Bucket();
b->count = currcount+count;
std::list<Bucket*>::iterator nextBucketPos = buckets.insert(bucketIter, b);
b->bucketPos = nextBucketPos; // and give it the iterator we know now.
nextBucket = b;
}
// ok, now we have the new bucket in nextBucket. Shift the element over...
currBucket->elements.remove(e);
nextBucket->elements.insert(nextBucket->elements.end(), e);
e->parent = nextBucket;
// if currBucket is empty, we have to delete it now
if ( currBucket->elements.size() == 0 )
{
buckets.remove(currBucket);
delete currBucket;
currBucket = 0;
}
}
};

170
src/probabilistic/Topk.h Normal file
View file

@ -0,0 +1,170 @@
// See the file "COPYING" in the main distribution directory for copyright.
#ifndef topk_h
#define topk_h
#include <list>
#include "Val.h"
#include "CompHash.h"
#include "OpaqueVal.h"
// This class implements the top-k algorithm. Or - to be more precise - an
// interpretation of it.
namespace probabilistic {
struct Element;
struct Bucket {
uint64 count;
std::list<Element*> elements;
// Iterators only get invalidated for removed elements. This one
// points to us - so it is invalid when we are no longer there. Cute,
// isn't it?
std::list<Bucket*>::iterator bucketPos;
};
struct Element {
uint64 epsilon;
Val* value;
Bucket* parent;
~Element();
};
declare(PDict, Element);
class TopkVal : public OpaqueVal {
public:
/**
* Construct a TopkVal.
*
* @param size specifies how many total elements are tracked
*
* @return A newly initialized TopkVal
*/
TopkVal(uint64 size);
/**
* Destructor.
*/
~TopkVal();
/**
* Call this when a new value is encountered. Note that on the first
* call, the Bro type of the value types that are counted is set. All
* following calls to encountered have to specify the same type.
*
* @param value The encountered element
*/
void Encountered(Val* value);
/**
* Get the first *k* elements of the result vector. At the moment,
* this does not check if it is in the right order or if we can prove
* that these are the correct top-k. Use count and epsilon for this.
*
* @param k Number of top-elements to return
*
* @returns The top-k encountered elements
*/
VectorVal* GetTopK(int k) const;
/**
* Get the current count tracked in the top-k data structure for a
* certain val. Returns 0 if the val is unknown (and logs the error
* to reporter).
*
* @param value Bro value to get counts for
*
* @returns internal count for val, 0 if unknown
*/
uint64_t GetCount(Val* value) const;
/**
* Get the current epsilon tracked in the top-k data structure for a
* certain val.
*
* @param value Bro value to get epsilons for
*
* @returns the epsilon. Returns 0 if the val is unknown (and logs
* the error to reporter)
*/
uint64_t GetEpsilon(Val* value) const;
/**
* Get the size set in the constructor
*
* @returns size of the top-k structure
*/
uint64_t GetSize() const { return size; }
/**
* Get the sum of all counts of all tracked elements. This is equal
* to the number of total observations up to this moment, if no
* elements were pruned from the data structure.
*
* @returns sum of all counts
*/
uint64_t GetSum() const;
/**
* Merge another top-k data structure into this one. doPrune
* specifies if the total count of elements is limited to size after
* merging. Please note, that pruning will invalidate the results of
* getSum.
*
* @param value TopkVal to merge into this TopkVal
*
* @param doPrune prune resulting TopkVal to size after merging
*/
void Merge(const TopkVal* value, bool doPrune=false);
protected:
/**
* Construct an empty TopkVal. Only used for deserialization
*/
TopkVal();
private:
/**
* Increment the counter for a specific element
*
* @param e element to increment counter for
*
* @param count increment counter by this much
*/
void IncrementCounter(Element* e, unsigned int count = 1);
/**
* get the hashkey for a specific value
*
* @param v value to generate key for
*
* @returns HashKey for value
*/
HashKey* GetHash(Val* v) const; // this probably should go somewhere else.
/**
* Set the type that this TopK instance tracks
*
* @param t type that is tracked
*/
void Typify(BroType* t);
BroType* type;
CompositeHash* hash;
std::list<Bucket*> buckets;
PDict(Element)* elementDict;
uint64 size; // how many elements are we tracking?
uint64 numElements; // how many elements do we have at the moment
bool pruned; // was this data structure pruned?
DECLARE_SERIAL(TopkVal);
};
};
#endif

View file

@ -20,23 +20,20 @@ module GLOBAL;
## Creates a basic Bloom filter. ## Creates a basic Bloom filter.
## ##
## .. note:: A Bloom filter can have a name associated with it. In the future,
## Bloom filters with the same name will be compatible across indepedent Bro
## instances, i.e., it will be possible to merge them. Currently, however, that is
## not yet supported.
##
## fp: The desired false-positive rate. ## fp: The desired false-positive rate.
## ##
## capacity: the maximum number of elements that guarantees a false-positive ## capacity: the maximum number of elements that guarantees a false-positive
## rate of *fp*. ## rate of *fp*.
## ##
## name: A name that uniquely identifies and seeds the Bloom filter. If empty, ## name: A name that uniquely identifies and seeds the Bloom filter. If empty,
## the filter will remain tied to the current Bro process. ## the filter will use :bro:id:`global_hash_seed` if that's set, and otherwise use
## a local seed tied to the current Bro process. Only filters with the same seed
## can be merged with :bro:id:`bloomfilter_merge` .
## ##
## Returns: A Bloom filter handle. ## Returns: A Bloom filter handle.
## ##
## .. bro:see:: bloomfilter_counting_init bloomfilter_add bloomfilter_lookup ## .. bro:see:: bloomfilter_basic_init2 bloomfilter_counting_init bloomfilter_add
## bloomfilter_clear bloomfilter_merge ## bloomfilter_lookup bloomfilter_clear bloomfilter_merge global_hash_seed
function bloomfilter_basic_init%(fp: double, capacity: count, function bloomfilter_basic_init%(fp: double, capacity: count,
name: string &default=""%): opaque of bloomfilter name: string &default=""%): opaque of bloomfilter
%{ %{
@ -48,18 +45,53 @@ function bloomfilter_basic_init%(fp: double, capacity: count,
size_t cells = BasicBloomFilter::M(fp, capacity); size_t cells = BasicBloomFilter::M(fp, capacity);
size_t optimal_k = BasicBloomFilter::K(cells, capacity); size_t optimal_k = BasicBloomFilter::K(cells, capacity);
const Hasher* h = Hasher::Create(optimal_k, name->CheckString()); size_t seed = Hasher::MakeSeed(name->Len() > 0 ? name->Bytes() : 0,
name->Len());
const Hasher* h = new DefaultHasher(optimal_k, seed);
return new BloomFilterVal(new BasicBloomFilter(h, cells));
%}
## Creates a basic Bloom filter. This function serves as a low-level
## alternative to bloomfilter_basic_init where the user has full control over
## the number of hash functions and cells in the underlying bit vector.
##
## k: The number of hash functions to use.
##
## cells: The number of cells of the underlying bit vector.
##
## name: A name that uniquely identifies and seeds the Bloom filter. If empty,
## the filter will use :bro:id:`global_hash_seed` if that's set, and otherwise use
## a local seed tied to the current Bro process. Only filters with the same seed
## can be merged with :bro:id:`bloomfilter_merge` .
##
## Returns: A Bloom filter handle.
##
## .. bro:see:: bloom_filter_basic_init bloomfilter_counting_init bloomfilter_add
## bloomfilter_lookup bloomfilter_clear bloomfilter_merge global_hash_seed
function bloomfilter_basic_init2%(k: count, cells: count,
name: string &default=""%): opaque of bloomfilter
%{
if ( k == 0 )
{
reporter->Error("number of hash functions must be non-negative");
return 0;
}
if ( cells == 0 )
{
reporter->Error("number of cells must be non-negative");
return 0;
}
size_t seed = Hasher::MakeSeed(name->Len() > 0 ? name->Bytes() : 0,
name->Len());
const Hasher* h = new DefaultHasher(k, seed);
return new BloomFilterVal(new BasicBloomFilter(h, cells)); return new BloomFilterVal(new BasicBloomFilter(h, cells));
%} %}
## Creates a counting Bloom filter. ## Creates a counting Bloom filter.
## ##
## .. note:: A Bloom filter can have a name associated with it. In the future,
## Bloom filters with the same name will be compatible across indepedent Bro
## instances, i.e., it will be possible to merge them. Currently, however, that is
## not yet supported.
##
## k: The number of hash functions to use. ## k: The number of hash functions to use.
## ##
## cells: The number of cells of the underlying counter vector. As there's no ## cells: The number of cells of the underlying counter vector. As there's no
@ -71,12 +103,14 @@ function bloomfilter_basic_init%(fp: double, capacity: count,
## becomes a cell of size *w* bits. ## becomes a cell of size *w* bits.
## ##
## name: A name that uniquely identifies and seeds the Bloom filter. If empty, ## name: A name that uniquely identifies and seeds the Bloom filter. If empty,
## the filter will remain tied to the current Bro process. ## the filter will use :bro:id:`global_hash_seed` if that's set, and otherwise use
## a local seed tied to the current Bro process. Only filters with the same seed
## can be merged with :bro:id:`bloomfilter_merge` .
## ##
## Returns: A Bloom filter handle. ## Returns: A Bloom filter handle.
## ##
## .. bro:see:: bloomfilter_basic_init bloomfilter_add bloomfilter_lookup ## .. bro:see:: bloomfilter_basic_init bloomfilter_basic_init2 bloomfilter_add
## bloomfilter_clear bloomfilter_merge ## bloomfilter_lookup bloomfilter_clear bloomfilter_merge global_hash_seed
function bloomfilter_counting_init%(k: count, cells: count, max: count, function bloomfilter_counting_init%(k: count, cells: count, max: count,
name: string &default=""%): opaque of bloomfilter name: string &default=""%): opaque of bloomfilter
%{ %{
@ -86,7 +120,10 @@ function bloomfilter_counting_init%(k: count, cells: count, max: count,
return 0; return 0;
} }
const Hasher* h = Hasher::Create(k, name->CheckString()); size_t seed = Hasher::MakeSeed(name->Len() > 0 ? name->Bytes() : 0,
name->Len());
const Hasher* h = new DefaultHasher(k, seed);
uint16 width = 1; uint16 width = 1;
while ( max >>= 1 ) while ( max >>= 1 )
@ -101,8 +138,9 @@ function bloomfilter_counting_init%(k: count, cells: count, max: count,
## ##
## x: The element to add. ## x: The element to add.
## ##
## .. bro:see:: bloomfilter_counting_init bloomfilter_basic_init loomfilter_lookup ## .. bro:see:: bloomfilter_basic_init bloomfilter_basic_init2
## bloomfilter_clear bloomfilter_merge ## bloomfilter_counting_init bloomfilter_lookup bloomfilter_clear
## bloomfilter_merge
function bloomfilter_add%(bf: opaque of bloomfilter, x: any%): any function bloomfilter_add%(bf: opaque of bloomfilter, x: any%): any
%{ %{
BloomFilterVal* bfv = static_cast<BloomFilterVal*>(bf); BloomFilterVal* bfv = static_cast<BloomFilterVal*>(bf);
@ -127,8 +165,9 @@ function bloomfilter_add%(bf: opaque of bloomfilter, x: any%): any
## ##
## Returns: the counter associated with *x* in *bf*. ## Returns: the counter associated with *x* in *bf*.
## ##
## .. bro:see:: bloomfilter_counting_init bloomfilter_basic_init ## .. bro:see:: bloomfilter_basic_init bloomfilter_basic_init2
## bloomfilter_add bloomfilter_clear bloomfilter_merge ## bloomfilter_counting_init bloomfilter_add bloomfilter_clear
## bloomfilter_merge
function bloomfilter_lookup%(bf: opaque of bloomfilter, x: any%): count function bloomfilter_lookup%(bf: opaque of bloomfilter, x: any%): count
%{ %{
const BloomFilterVal* bfv = static_cast<const BloomFilterVal*>(bf); const BloomFilterVal* bfv = static_cast<const BloomFilterVal*>(bf);
@ -154,8 +193,9 @@ function bloomfilter_lookup%(bf: opaque of bloomfilter, x: any%): count
## ##
## bf: The Bloom filter handle. ## bf: The Bloom filter handle.
## ##
## .. bro:see:: bloomfilter_counting_init bloomfilter_basic_init ## .. bro:see:: bloomfilter_basic_init bloomfilter_counting_init2
## bloomfilter_add bloomfilter_lookup bloomfilter_merge ## bloomfilter_counting_init bloomfilter_add bloomfilter_lookup
## bloomfilter_merge
function bloomfilter_clear%(bf: opaque of bloomfilter%): any function bloomfilter_clear%(bf: opaque of bloomfilter%): any
%{ %{
BloomFilterVal* bfv = static_cast<BloomFilterVal*>(bf); BloomFilterVal* bfv = static_cast<BloomFilterVal*>(bf);
@ -178,15 +218,18 @@ function bloomfilter_clear%(bf: opaque of bloomfilter%): any
## ##
## Returns: The union of *bf1* and *bf2*. ## Returns: The union of *bf1* and *bf2*.
## ##
## .. bro:see:: bloomfilter_counting_init bloomfilter_basic_init ## .. bro:see:: bloomfilter_basic_init bloomfilter_basic_init2
## bloomfilter_add bloomfilter_lookup bloomfilter_clear ## bloomfilter_counting_init bloomfilter_add bloomfilter_lookup
## bloomfilter_clear
function bloomfilter_merge%(bf1: opaque of bloomfilter, function bloomfilter_merge%(bf1: opaque of bloomfilter,
bf2: opaque of bloomfilter%): opaque of bloomfilter bf2: opaque of bloomfilter%): opaque of bloomfilter
%{ %{
const BloomFilterVal* bfv1 = static_cast<const BloomFilterVal*>(bf1); const BloomFilterVal* bfv1 = static_cast<const BloomFilterVal*>(bf1);
const BloomFilterVal* bfv2 = static_cast<const BloomFilterVal*>(bf2); const BloomFilterVal* bfv2 = static_cast<const BloomFilterVal*>(bf2);
if ( ! same_type(bfv1->Type(), bfv2->Type()) ) if ( bfv1->Type() && // any one 0 is ok here
bfv2->Type() &&
! same_type(bfv1->Type(), bfv2->Type()) )
{ {
reporter->Error("incompatible Bloom filter types"); reporter->Error("incompatible Bloom filter types");
return 0; return 0;
@ -194,3 +237,13 @@ function bloomfilter_merge%(bf1: opaque of bloomfilter,
return BloomFilterVal::Merge(bfv1, bfv2); return BloomFilterVal::Merge(bfv1, bfv2);
%} %}
## Returns a string with a representation of a Bloom filter's internal
## state. This is for debugging/testing purposes only.
##
## bf: The Bloom filter handle.
function bloomfilter_internal_state%(bf: opaque of bloomfilter%): string
%{
BloomFilterVal* bfv = static_cast<BloomFilterVal*>(bf);
return new StringVal(bfv->InternalState());
%}

184
src/probabilistic/top-k.bif Normal file
View file

@ -0,0 +1,184 @@
# ===========================================================================
#
# Top-K Functions
#
# ===========================================================================
%%{
#include "probabilistic/Topk.h"
%%}
## Creates a top-k data structure which tracks *size* elements.
##
## size: number of elements to track
##
## Returns: Opaque pointer to the data structure.
##
## .. bro:see:: topk_add topk_get_top topk_count topk_epsilon
## topk_size topk_sum topk_merge topk_merge_prune
function topk_init%(size: count%): opaque of topk
%{
probabilistic::TopkVal* v = new probabilistic::TopkVal(size);
return v;
%}
## Add a new observed object to the data structure.
##
## .. note:: The first added object sets the type of data tracked by
## the top-k data structure. All following values have to be of the same
## type.
##
## handle: the TopK handle
##
## value: observed value
##
## .. bro:see:: topk_init topk_get_top topk_count topk_epsilon
## topk_size topk_sum topk_merge topk_merge_prune
function topk_add%(handle: opaque of topk, value: any%): any
%{
assert(handle);
probabilistic::TopkVal* h = (probabilistic::TopkVal*) handle;
h->Encountered(value);
return 0;
%}
## Get the first *k* elements of the top-k data structure.
##
## handle: the TopK handle
##
## k: number of elements to return
##
## Returns: vector of the first k elements
##
## .. bro:see:: topk_init topk_add topk_count topk_epsilon
## topk_size topk_sum topk_merge topk_merge_prune
function topk_get_top%(handle: opaque of topk, k: count%): any
%{
assert(handle);
probabilistic::TopkVal* h = (probabilistic::TopkVal*) handle;
return h->GetTopK(k);
%}
## Get an overestimated count of how often value has been encountered.
##
## .. note:: value has to be part of the currently tracked elements, otherwise
## 0 will be returned and an error message will be added to reporter.
##
## handle: the TopK handle
##
## value: Value to look up count for.
##
## Returns: Overestimated number for how often the element has been encountered
##
## .. bro:see:: topk_init topk_add topk_get_top topk_epsilon
## topk_size topk_sum topk_merge topk_merge_prune
function topk_count%(handle: opaque of topk, value: any%): count
%{
assert(handle);
probabilistic::TopkVal* h = (probabilistic::TopkVal*) handle;
return new Val(h->GetCount(value), TYPE_COUNT);
%}
## Get the maximal overestimation for count.
##
## .. note:: Same restrictions as for :bro:id:`topk_count` apply.
##
## handle: the TopK handle
##
## value: Value to look up epsilon for.
##
## Returns: Number which represents the maximal overesimation for the count of this element.
##
## .. bro:see:: topk_init topk_add topk_get_top topk_count
## topk_size topk_sum topk_merge topk_merge_prune
function topk_epsilon%(handle: opaque of topk, value: any%): count
%{
assert(handle);
probabilistic::TopkVal* h = (probabilistic::TopkVal*) handle;
return new Val(h->GetEpsilon(value), TYPE_COUNT);
%}
## Get the number of elements this data structure is supposed to track (given on init).
##
## .. note ::Note that the actual number of elements in the data structure can be lower
## or higher (due to non-pruned merges) than this.
##
## handle: the TopK handle
##
## Returns: size given during initialization
##
## .. bro:see:: topk_init topk_add topk_get_top topk_count topk_epsilon
## topk_sum topk_merge topk_merge_prune
function topk_size%(handle: opaque of topk%): count
%{
assert(handle);
probabilistic::TopkVal* h = (probabilistic::TopkVal*) handle;
return new Val(h->GetSize(), TYPE_COUNT);
%}
## Get the sum of all counts of all elements in the data structure.
##
## .. note:: This is equal to the number of all inserted objects if the data structure
## never has been pruned. Do not use after calling topk_merge_prune (will throw a
## warning message if used afterwards)
##
## handle: the TopK handle
##
## Returns: sum of all counts
##
## .. bro:see:: topk_init topk_add topk_get_top topk_count topk_epsilon
## topk_size topk_merge topk_merge_prune
function topk_sum%(handle: opaque of topk%): count
%{
assert(handle);
probabilistic::TopkVal* h = (probabilistic::TopkVal*) handle;
return new Val(h->GetSum(), TYPE_COUNT);
%}
## Merge the second topk data structure into the first.
##
## .. note:: This does not remove any elements, the resulting data structure can be
## bigger than the maximum size given on initialization.
##
## .. bro:see:: topk_init topk_add topk_get_top topk_count topk_epsilon
## topk_size topk_sum topk_merge_prune
function topk_merge%(handle1: opaque of topk, handle2: opaque of topk%): any
%{
assert(handle1);
assert(handle2);
probabilistic::TopkVal* h1 = (probabilistic::TopkVal*) handle1;
probabilistic::TopkVal* h2 = (probabilistic::TopkVal*) handle2;
h1->Merge(h2);
return 0;
%}
## Merge the second topk data structure into the first and prunes the final data
## structure back to the size given on initialization.
##
## .. note:: Use with care and only when being aware of the restrictions this
## entails. Do not call :bro:id:`topk_size` or :bro:id:`topk_add` afterwards,
## results will probably not be what you expect.
##
## handle1: the TopK handle in which the second TopK structure is merged
##
## handle2: the TopK handle in which is merged into the first TopK structure
##
## .. bro:see:: topk_init topk_add topk_get_top topk_count topk_epsilon
## topk_size topk_sum topk_merge
function topk_merge_prune%(handle1: opaque of topk, handle2: opaque of topk%): any
%{
assert(handle1);
assert(handle2);
probabilistic::TopkVal* h1 = (probabilistic::TopkVal*) handle1;
probabilistic::TopkVal* h2 = (probabilistic::TopkVal*) handle2;
h1->Merge(h2, true);
return 0;
%}

View file

@ -0,0 +1,8 @@
bf1, global_seed, 11979365913534242684
bf2, global_seed, 12550100962110750449
bf3, my_seed, 12550100962110750449
bf4, my_seed, 945716460325754659
bf1, global_seed, 12550100962110750449
bf2, global_seed, 945716460325754659
bf3, my_seed, 12550100962110750449
bf4, my_seed, 945716460325754659

View file

@ -12,6 +12,9 @@ error: false-positive rate must take value between 0 and 1
1 1
1 1
1 1
0, no fp
1
1
1 1
1 1
1 1

View file

@ -0,0 +1,11 @@
error: GetCount for element that is not in top-k
error: GetEpsilon for element that is not in top-k
error: GetCount for element that is not in top-k
error: GetEpsilon for element that is not in top-k
error: GetCount for element that is not in top-k
error: GetEpsilon for element that is not in top-k
error: GetCount for element that is not in top-k
error: GetEpsilon for element that is not in top-k
warning: TopkVal::GetSum() was used on a pruned data structure. Result values do not represent total element count
error: GetCount for element that is not in top-k
error: GetEpsilon for element that is not in top-k

View file

@ -0,0 +1,81 @@
[b, c]
4
0
0
2
0
2
1
[d, c]
5
0
0
2
1
3
2
[d, e]
6
3
2
3
2
[f, e]
7
4
3
3
2
[f, e]
8
4
3
4
2
[g, e]
9
0
0
4
2
5
4
[c, e, d]
19
6
0
5
0
4
0
[c, e]
6
0
5
0
0
0
[c, e]
22
12
0
10
0
0
0
[c, e]
19
6
0
5
0
4
0
[c, e, d]
38
12
0
10
0
8
0

View file

@ -161,6 +161,7 @@ scripts/base/init-default.bro
scripts/base/protocols/dns/main.bro scripts/base/protocols/dns/main.bro
scripts/base/protocols/ftp/__load__.bro scripts/base/protocols/ftp/__load__.bro
scripts/base/protocols/ftp/utils-commands.bro scripts/base/protocols/ftp/utils-commands.bro
scripts/base/protocols/ftp/info.bro
scripts/base/protocols/ftp/main.bro scripts/base/protocols/ftp/main.bro
scripts/base/protocols/ftp/utils.bro scripts/base/protocols/ftp/utils.bro
scripts/base/protocols/ftp/files.bro scripts/base/protocols/ftp/files.bro

View file

@ -0,0 +1,21 @@
1
2
6
4
5
1
[c, e, d]
1
2
6
4
5
1
[c, e, d]
2
4
12
8
10
2
[c, e, d]

View file

@ -0,0 +1,9 @@
Top entries for key counter
Num: 995, count: 100, epsilon: 0
Num: 1, count: 99, epsilon: 0
Num: 2, count: 98, epsilon: 0
Num: 3, count: 97, epsilon: 0
Num: 4, count: 96, epsilon: 0
Top entries for key two
Num: 2, count: 4, epsilon: 0
Num: 1, count: 3, epsilon: 0

View file

@ -0,0 +1,8 @@
Top entries for key counter
Num: 1, count: 99, epsilon: 0
Num: 2, count: 98, epsilon: 0
Num: 3, count: 97, epsilon: 0
Num: 4, count: 96, epsilon: 0
Num: 5, count: 95, epsilon: 0
Top entries for key two
Num: 1, count: 2, epsilon: 0

View file

@ -0,0 +1,40 @@
# @TEST-EXEC: bro -b %INPUT global_hash_seed="foo" >>output
# @TEST-EXEC: bro -b %INPUT global_hash_seed="my_seed" >>output
# @TEST-EXEC: btest-diff output
type Foo: record
{
a: count;
b: string;
};
function test_bloom_filter()
{
local bf1 = bloomfilter_basic_init(0.9, 10);
bloomfilter_add(bf1, "foo");
bloomfilter_add(bf1, "bar");
local bf2 = bloomfilter_basic_init(0.9, 10);
bloomfilter_add(bf2, Foo($a=1, $b="xx"));
bloomfilter_add(bf2, Foo($a=2, $b="yy"));
local bf3 = bloomfilter_basic_init(0.9, 10, "my_seed");
bloomfilter_add(bf3, "foo");
bloomfilter_add(bf3, "bar");
local bf4 = bloomfilter_basic_init(0.9, 10, "my_seed");
bloomfilter_add(bf4, Foo($a=1, $b="xx"));
bloomfilter_add(bf4, Foo($a=2, $b="yy"));
print "bf1, global_seed", bloomfilter_internal_state(bf1);
print "bf2, global_seed", bloomfilter_internal_state(bf2);
print "bf3, my_seed", bloomfilter_internal_state(bf3);
print "bf4, my_seed", bloomfilter_internal_state(bf4);
}
event bro_init()
{
test_bloom_filter();
}

View file

@ -15,14 +15,21 @@ function test_basic_bloom_filter()
bloomfilter_add(bf_cnt, 0.5); # Type mismatch bloomfilter_add(bf_cnt, 0.5); # Type mismatch
bloomfilter_add(bf_cnt, "foo"); # Type mismatch bloomfilter_add(bf_cnt, "foo"); # Type mismatch
# Alternative constructor.
local bf_dbl = bloomfilter_basic_init2(4, 10);
bloomfilter_add(bf_dbl, 4.2);
bloomfilter_add(bf_dbl, 3.14);
print bloomfilter_lookup(bf_dbl, 4.2);
print bloomfilter_lookup(bf_dbl, 3.14);
# Basic usage with strings. # Basic usage with strings.
local bf_str = bloomfilter_basic_init(0.9, 10); local bf_str = bloomfilter_basic_init(0.9, 10);
bloomfilter_add(bf_str, "foo"); bloomfilter_add(bf_str, "foo");
bloomfilter_add(bf_str, "bar"); bloomfilter_add(bf_str, "bar");
print bloomfilter_lookup(bf_str, "foo"); print bloomfilter_lookup(bf_str, "foo");
print bloomfilter_lookup(bf_str, "bar"); print bloomfilter_lookup(bf_str, "bar");
print bloomfilter_lookup(bf_str, "b4z"); # FP print bloomfilter_lookup(bf_str, "b4zzz"), "no fp"; # FP
print bloomfilter_lookup(bf_str, "quux"); # FP print bloomfilter_lookup(bf_str, "quuux"); # FP
bloomfilter_add(bf_str, 0.5); # Type mismatch bloomfilter_add(bf_str, 0.5); # Type mismatch
bloomfilter_add(bf_str, 100); # Type mismatch bloomfilter_add(bf_str, 100); # Type mismatch
@ -45,6 +52,11 @@ function test_basic_bloom_filter()
print bloomfilter_lookup(bf_merged, 84); print bloomfilter_lookup(bf_merged, 84);
print bloomfilter_lookup(bf_merged, 100); print bloomfilter_lookup(bf_merged, 100);
print bloomfilter_lookup(bf_merged, 168); print bloomfilter_lookup(bf_merged, 168);
#empty filter tests
local bf_empty = bloomfilter_basic_init(0.1, 1000);
local bf_empty_merged = bloomfilter_merge(bf_merged, bf_empty);
print bloomfilter_lookup(bf_empty_merged, 42);
} }
function test_counting_bloom_filter() function test_counting_bloom_filter()

154
testing/btest/bifs/topk.bro Normal file
View file

@ -0,0 +1,154 @@
# @TEST-EXEC: bro -b %INPUT > out
# @TEST-EXEC: btest-diff out
# @TEST-EXEC: btest-diff .stderr
event bro_init()
{
local k1 = topk_init(2);
# first - peculiarity check...
topk_add(k1, "a");
topk_add(k1, "b");
topk_add(k1, "b");
topk_add(k1, "c");
local s = topk_get_top(k1, 5);
print s;
print topk_sum(k1);
print topk_count(k1, "a");
print topk_epsilon(k1, "a");
print topk_count(k1, "b");
print topk_epsilon(k1, "b");
print topk_count(k1, "c");
print topk_epsilon(k1, "c");
topk_add(k1, "d");
s = topk_get_top(k1, 5);
print s;
print topk_sum(k1);
print topk_count(k1, "b");
print topk_epsilon(k1, "b");
print topk_count(k1, "c");
print topk_epsilon(k1, "c");
print topk_count(k1, "d");
print topk_epsilon(k1, "d");
topk_add(k1, "e");
s = topk_get_top(k1, 5);
print s;
print topk_sum(k1);
print topk_count(k1, "d");
print topk_epsilon(k1, "d");
print topk_count(k1, "e");
print topk_epsilon(k1, "e");
topk_add(k1, "f");
s = topk_get_top(k1, 5);
print s;
print topk_sum(k1);
print topk_count(k1, "f");
print topk_epsilon(k1, "f");
print topk_count(k1, "e");
print topk_epsilon(k1, "e");
topk_add(k1, "e");
s = topk_get_top(k1, 5);
print s;
print topk_sum(k1);
print topk_count(k1, "f");
print topk_epsilon(k1, "f");
print topk_count(k1, "e");
print topk_epsilon(k1, "e");
topk_add(k1, "g");
s = topk_get_top(k1, 5);
print s;
print topk_sum(k1);
print topk_count(k1, "f");
print topk_epsilon(k1, "f");
print topk_count(k1, "e");
print topk_epsilon(k1, "e");
print topk_count(k1, "g");
print topk_epsilon(k1, "g");
k1 = topk_init(100);
topk_add(k1, "a");
topk_add(k1, "b");
topk_add(k1, "b");
topk_add(k1, "c");
topk_add(k1, "c");
topk_add(k1, "c");
topk_add(k1, "c");
topk_add(k1, "c");
topk_add(k1, "c");
topk_add(k1, "d");
topk_add(k1, "d");
topk_add(k1, "d");
topk_add(k1, "d");
topk_add(k1, "e");
topk_add(k1, "e");
topk_add(k1, "e");
topk_add(k1, "e");
topk_add(k1, "e");
topk_add(k1, "f");
s = topk_get_top(k1, 3);
print s;
print topk_sum(k1);
print topk_count(k1, "c");
print topk_epsilon(k1, "c");
print topk_count(k1, "e");
print topk_epsilon(k1, "d");
print topk_count(k1, "d");
print topk_epsilon(k1, "d");
local k3 = topk_init(2);
topk_merge_prune(k3, k1);
s = topk_get_top(k3, 3);
print s;
print topk_count(k3, "c");
print topk_epsilon(k3, "c");
print topk_count(k3, "e");
print topk_epsilon(k3, "e");
print topk_count(k3, "d");
print topk_epsilon(k3, "d");
topk_merge_prune(k3, k1);
s = topk_get_top(k3, 3);
print s;
print topk_sum(k3); # this gives a warning and a wrong result.
print topk_count(k3, "c");
print topk_epsilon(k3, "c");
print topk_count(k3, "e");
print topk_epsilon(k3, "e");
print topk_count(k3, "d");
print topk_epsilon(k3, "d");
k3 = topk_init(2);
topk_merge(k3, k1);
print s;
print topk_sum(k3);
print topk_count(k3, "c");
print topk_epsilon(k3, "c");
print topk_count(k3, "e");
print topk_epsilon(k3, "e");
print topk_count(k3, "d");
print topk_epsilon(k3, "d");
topk_merge(k3, k1);
s = topk_get_top(k3, 3);
print s;
print topk_sum(k3);
print topk_count(k3, "c");
print topk_epsilon(k3, "c");
print topk_count(k3, "e");
print topk_epsilon(k3, "e");
print topk_count(k3, "d");
print topk_epsilon(k3, "d");
}

View file

@ -0,0 +1,74 @@
# @TEST-EXEC: bro -b %INPUT runnumber=1 >out
# @TEST-EXEC: bro -b %INPUT runnumber=2 >>out
# @TEST-EXEC: bro -b %INPUT runnumber=3 >>out
# @TEST-EXEC: btest-diff out
global runnumber: count &redef; # differentiate runs
global k1: opaque of topk &persistent;
global k2: opaque of topk &persistent;
event bro_init()
{
k2 = topk_init(20);
if ( runnumber == 1 )
{
k1 = topk_init(100);
topk_add(k1, "a");
topk_add(k1, "b");
topk_add(k1, "b");
topk_add(k1, "c");
topk_add(k1, "c");
topk_add(k1, "c");
topk_add(k1, "c");
topk_add(k1, "c");
topk_add(k1, "c");
topk_add(k1, "d");
topk_add(k1, "d");
topk_add(k1, "d");
topk_add(k1, "d");
topk_add(k1, "e");
topk_add(k1, "e");
topk_add(k1, "e");
topk_add(k1, "e");
topk_add(k1, "e");
topk_add(k1, "f");
}
local s = topk_get_top(k1, 3);
print topk_count(k1, "a");
print topk_count(k1, "b");
print topk_count(k1, "c");
print topk_count(k1, "d");
print topk_count(k1, "e");
print topk_count(k1, "f");
if ( runnumber == 2 )
{
topk_add(k1, "a");
topk_add(k1, "b");
topk_add(k1, "b");
topk_add(k1, "c");
topk_add(k1, "c");
topk_add(k1, "c");
topk_add(k1, "c");
topk_add(k1, "c");
topk_add(k1, "c");
topk_add(k1, "d");
topk_add(k1, "d");
topk_add(k1, "d");
topk_add(k1, "d");
topk_add(k1, "e");
topk_add(k1, "e");
topk_add(k1, "e");
topk_add(k1, "e");
topk_add(k1, "e");
topk_add(k1, "f");
}
print s;
}

View file

@ -0,0 +1,110 @@
# @TEST-SERIALIZE: comm
#
# @TEST-EXEC: btest-bg-run manager-1 BROPATH=$BROPATH:.. CLUSTER_NODE=manager-1 bro %INPUT
# @TEST-EXEC: sleep 1
# @TEST-EXEC: btest-bg-run worker-1 BROPATH=$BROPATH:.. CLUSTER_NODE=worker-1 bro %INPUT
# @TEST-EXEC: btest-bg-run worker-2 BROPATH=$BROPATH:.. CLUSTER_NODE=worker-2 bro %INPUT
# @TEST-EXEC: btest-bg-wait 15
# @TEST-EXEC: btest-diff manager-1/.stdout
#
@TEST-START-FILE cluster-layout.bro
redef Cluster::nodes = {
["manager-1"] = [$node_type=Cluster::MANAGER, $ip=127.0.0.1, $p=37757/tcp, $workers=set("worker-1", "worker-2")],
["worker-1"] = [$node_type=Cluster::WORKER, $ip=127.0.0.1, $p=37760/tcp, $manager="manager-1", $interface="eth0"],
["worker-2"] = [$node_type=Cluster::WORKER, $ip=127.0.0.1, $p=37761/tcp, $manager="manager-1", $interface="eth1"],
};
@TEST-END-FILE
redef Log::default_rotation_interval = 0secs;
event bro_init() &priority=5
{
local r1: SumStats::Reducer = [$stream="test.metric",
$apply=set(SumStats::TOPK)];
SumStats::create([$epoch=5secs,
$reducers=set(r1),
$epoch_finished(data: SumStats::ResultTable) =
{
for ( key in data )
{
local r = data[key]["test.metric"];
local s: vector of SumStats::Observation;
s = topk_get_top(r$topk, 5);
print fmt("Top entries for key %s", key$str);
for ( element in s )
{
print fmt("Num: %d, count: %d, epsilon: %d", s[element]$num, topk_count(r$topk, s[element]), topk_epsilon(r$topk, s[element]));
}
terminate();
}
}
]);
}
event remote_connection_closed(p: event_peer)
{
terminate();
}
global ready_for_data: event();
redef Cluster::manager2worker_events += /^ready_for_data$/;
event ready_for_data()
{
const loop_v: vector of count = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100};
if ( Cluster::node == "worker-1" )
{
local a: count;
a = 0;
for ( i in loop_v )
{
a = a + 1;
for ( j in loop_v )
{
if ( i < j )
SumStats::observe("test.metric", [$str="counter"], [$num=a]);
}
}
SumStats::observe("test.metric", [$str="two"], [$num=1]);
SumStats::observe("test.metric", [$str="two"], [$num=1]);
}
if ( Cluster::node == "worker-2" )
{
SumStats::observe("test.metric", [$str="two"], [$num=2]);
SumStats::observe("test.metric", [$str="two"], [$num=2]);
SumStats::observe("test.metric", [$str="two"], [$num=2]);
SumStats::observe("test.metric", [$str="two"], [$num=2]);
SumStats::observe("test.metric", [$str="two"], [$num=1]);
for ( i in loop_v )
{
SumStats::observe("test.metric", [$str="counter"], [$num=995]);
}
}
}
@if ( Cluster::local_node_type() == Cluster::MANAGER )
global peer_count = 0;
event remote_connection_handshake_done(p: event_peer) &priority=-5
{
++peer_count;
if ( peer_count == 2 )
event ready_for_data();
}
@endif

View file

@ -0,0 +1,48 @@
# @TEST-EXEC: bro %INPUT
# @TEST-EXEC: btest-diff .stdout
event bro_init() &priority=5
{
local r1: SumStats::Reducer = [$stream="test.metric",
$apply=set(SumStats::TOPK)];
SumStats::create([$epoch=3secs,
$reducers=set(r1),
$epoch_finished(data: SumStats::ResultTable) =
{
for ( key in data )
{
local r = data[key]["test.metric"];
local s: vector of SumStats::Observation;
s = topk_get_top(r$topk, 5);
print fmt("Top entries for key %s", key$str);
for ( element in s )
{
print fmt("Num: %d, count: %d, epsilon: %d", s[element]$num, topk_count(r$topk, s[element]), topk_epsilon(r$topk, s[element]));
}
}
}
]);
const loop_v: vector of count = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100};
local a: count;
a = 0;
for ( i in loop_v )
{
a = a + 1;
for ( j in loop_v )
{
if ( i < j )
SumStats::observe("test.metric", [$str="counter"], [$num=a]);
}
}
SumStats::observe("test.metric", [$str="two"], [$num=1]);
SumStats::observe("test.metric", [$str="two"], [$num=1]);
}