mirror of
https://github.com/zeek/zeek.git
synced 2025-10-02 14:48:21 +00:00
Merge remote-tracking branch 'origin/master' into topic/seth/sumstats-updates
This commit is contained in:
commit
d6edbd27b1
96 changed files with 3085 additions and 839 deletions
50
CHANGES
50
CHANGES
|
@ -1,4 +1,54 @@
|
|||
|
||||
2.1-1007 | 2013-08-01 15:41:54 -0700
|
||||
|
||||
* More function documentation. (Bernhard Amann)
|
||||
|
||||
2.1-1004 | 2013-08-01 14:37:43 -0700
|
||||
|
||||
* Adding a probabilistic data structure for computing "top k"
|
||||
elements. (Bernhard Amann)
|
||||
|
||||
The corresponding functions are:
|
||||
|
||||
topk_init(size: count): opaque of topk
|
||||
topk_add(handle: opaque of topk, value: any)
|
||||
topk_get_top(handle: opaque of topk, k: count)
|
||||
topk_count(handle: opaque of topk, value: any): count
|
||||
topk_epsilon(handle: opaque of topk, value: any): count
|
||||
topk_size(handle: opaque of topk): count
|
||||
topk_sum(handle: opaque of topk): count
|
||||
topk_merge(handle1: opaque of topk, handle2: opaque of topk)
|
||||
topk_merge_prune(handle1: opaque of topk, handle2: opaque of topk)
|
||||
|
||||
2.1-971 | 2013-08-01 13:28:32 -0700
|
||||
|
||||
* Fix some build errors. (Jon Siwek)
|
||||
|
||||
* Internal refactoring of how plugin components are tagged/managed.
|
||||
(Jon Siwek)
|
||||
|
||||
* Fix various documentation, mostly related to file analysis. (Jon
|
||||
Siwek)
|
||||
|
||||
* Changing the Bloom filter hashing so that it's independent of
|
||||
CompositeHash. (Robin Sommer)
|
||||
|
||||
2.1-951 | 2013-08-01 11:19:23 -0400
|
||||
|
||||
* Small fix to deal with a bug in the SSL log delay mechanism.
|
||||
|
||||
2.1-948 | 2013-07-31 20:08:28 -0700
|
||||
|
||||
* Fix segfault caused by merging an empty bloom-filter with a
|
||||
bloom-filter already containing values. (Bernhard Amann)
|
||||
|
||||
2.1-945 | 2013-07-30 10:05:10 -0700
|
||||
|
||||
* Make hashers serializable. (Matthias Vallentin)
|
||||
|
||||
* Add docs and use default value for hasher names. (Matthias
|
||||
Vallentin)
|
||||
|
||||
2.1-939 | 2013-07-29 15:42:38 -0700
|
||||
|
||||
* Added Exec, Dir, and ActiveHTTP modules. (Seth Hall)
|
||||
|
|
16
NEWS
16
NEWS
|
@ -113,6 +113,7 @@ New Functionality
|
|||
the frequency of elements. The corresponding functions are:
|
||||
|
||||
bloomfilter_basic_init(fp: double, capacity: count, name: string &default=""): opaque of bloomfilter
|
||||
bloomfilter_basic_init2(k: count, cells: count, name: string &default=""): opaque of bloomfilter
|
||||
bloomfilter_counting_init(k: count, cells: count, max: count, name: string &default=""): opaque of bloomfilter
|
||||
bloomfilter_add(bf: opaque of bloomfilter, x: any)
|
||||
bloomfilter_lookup(bf: opaque of bloomfilter, x: any): count
|
||||
|
@ -121,6 +122,21 @@ New Functionality
|
|||
|
||||
See <INSERT LINK> for full documentation.
|
||||
|
||||
- Bro now provides a probabilistic data structure for computing
|
||||
"top k" elements. The corresponding functions are:
|
||||
|
||||
topk_init(size: count): opaque of topk
|
||||
topk_add(handle: opaque of topk, value: any)
|
||||
topk_get_top(handle: opaque of topk, k: count)
|
||||
topk_count(handle: opaque of topk, value: any): count
|
||||
topk_epsilon(handle: opaque of topk, value: any): count
|
||||
topk_size(handle: opaque of topk): count
|
||||
topk_sum(handle: opaque of topk): count
|
||||
topk_merge(handle1: opaque of topk, handle2: opaque of topk)
|
||||
topk_merge_prune(handle1: opaque of topk, handle2: opaque of topk)
|
||||
|
||||
See <INSERT LINK> for full documentation.
|
||||
|
||||
- base/utils/exec.bro provides a module to start external processes
|
||||
asynchronously and retrieve their output on termination.
|
||||
base/utils/dir.bro uses it to monitor a directory for changes, and
|
||||
|
|
2
VERSION
2
VERSION
|
@ -1 +1 @@
|
|||
2.1-939
|
||||
2.1-1007
|
||||
|
|
|
@ -1 +1 @@
|
|||
Subproject commit 91d258cc8b2f74cd02fc93dfe61f73ec9f0dd489
|
||||
Subproject commit d9963983c0b4d426b24836f8d154d014d5aecbba
|
|
@ -1 +1 @@
|
|||
Subproject commit ce366206e3407e534a786ad572c342e9f9fef26b
|
||||
Subproject commit 69606f8f3cc84d694ca1da14868a5fecd4abbc96
|
|
@ -82,9 +82,9 @@ attached, they start receiving the contents of the file as Bro extracts
|
|||
it from an ongoing network connection. What they do with the file
|
||||
contents is up to the particular file analyzer implementation, but
|
||||
they'll typically either report further information about the file via
|
||||
events (e.g. :bro:see:`FileAnalysis::ANALYZER_MD5` will report the
|
||||
events (e.g. :bro:see:`Files::ANALYZER_MD5` will report the
|
||||
file's MD5 checksum via :bro:see:`file_hash` once calculated) or they'll
|
||||
have some side effect (e.g. :bro:see:`FileAnalysis::ANALYZER_EXTRACT`
|
||||
have some side effect (e.g. :bro:see:`Files::ANALYZER_EXTRACT`
|
||||
will write the contents of the file out to the local file system).
|
||||
|
||||
In the future there may be file analyzers that automatically attach to
|
||||
|
@ -98,7 +98,7 @@ explicit attachment decision:
|
|||
{
|
||||
print "new file", f$id;
|
||||
if ( f?$mime_type && f$mime_type == "text/plain" )
|
||||
FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_MD5]);
|
||||
Files::add_analyzer(f, Files::ANALYZER_MD5);
|
||||
}
|
||||
|
||||
event file_hash(f: fa_file, kind: string, hash: string)
|
||||
|
@ -113,26 +113,27 @@ output::
|
|||
file_hash, Cx92a0ym5R8, md5, 397168fd09991a0e712254df7bc639ac
|
||||
|
||||
Some file analyzers might have tunable parameters that need to be
|
||||
specified in the call to :bro:see:`FileAnalysis::add_analyzer`:
|
||||
specified in the call to :bro:see:`Files::add_analyzer`:
|
||||
|
||||
.. code:: bro
|
||||
|
||||
event file_new(f: fa_file)
|
||||
{
|
||||
FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
|
||||
$extract_filename="./myfile"]);
|
||||
Files::add_analyzer(f, Files::ANALYZER_EXTRACT,
|
||||
[$extract_filename="myfile"]);
|
||||
}
|
||||
|
||||
In this case, the file extraction analyzer doesn't generate any further
|
||||
events, but does have the side effect of writing out the file contents
|
||||
to the local file system at the specified location of ``./myfile``. Of
|
||||
course, for a network with more than a single file being transferred,
|
||||
it's probably preferable to specify a different extraction path for each
|
||||
file, unlike this example.
|
||||
events, but does have the effect of writing out the file contents to the
|
||||
local file system at the location resulting from the concatenation of
|
||||
the path specified by :bro:see:`FileExtract::prefix` and the string,
|
||||
``myfile``. Of course, for a network with more than a single file being
|
||||
transferred, it's probably preferable to specify a different extraction
|
||||
path for each file, unlike this example.
|
||||
|
||||
Regardless of which file analyzers end up acting on a file, general
|
||||
information about the file (e.g. size, time of last data transferred,
|
||||
MIME type, etc.) are logged in ``file_analysis.log``.
|
||||
MIME type, etc.) are logged in ``files.log``.
|
||||
|
||||
Input Framework Integration
|
||||
===========================
|
||||
|
@ -150,7 +151,7 @@ a network interface it's monitoring. It only requires a call to
|
|||
event file_new(f: fa_file)
|
||||
{
|
||||
print "new file", f$id;
|
||||
FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_MD5]);
|
||||
Files::add_analyzer(f, Files::ANALYZER_MD5);
|
||||
}
|
||||
|
||||
event file_state_remove(f: fa_file)
|
||||
|
|
|
@ -47,6 +47,7 @@ Script Reference
|
|||
scripts/index
|
||||
scripts/builtins
|
||||
scripts/proto-analyzers
|
||||
scripts/file-analyzers
|
||||
|
||||
Other Bro Components
|
||||
--------------------
|
||||
|
|
|
@ -124,28 +124,34 @@ endmacro(REST_TARGET)
|
|||
# Schedule Bro scripts for which to generate documentation.
|
||||
include(DocSourcesList.cmake)
|
||||
|
||||
# This reST target is independent of a particular Bro script...
|
||||
add_custom_command(OUTPUT proto-analyzers.rst
|
||||
# delete any leftover state from previous bro runs
|
||||
COMMAND "${CMAKE_COMMAND}"
|
||||
ARGS -E remove_directory .state
|
||||
# generate the reST documentation using bro
|
||||
COMMAND BROPATH=${BROPATH}:${srcDir} BROMAGIC=${CMAKE_SOURCE_DIR}/magic/database ${CMAKE_BINARY_DIR}/src/bro
|
||||
ARGS -b -Z base/init-bare.bro || (rm -rf .state *.log *.rst && exit 1)
|
||||
# move generated doc into a new directory tree that
|
||||
# defines the final structure of documents
|
||||
COMMAND "${CMAKE_COMMAND}"
|
||||
ARGS -E make_directory ${dstDir}
|
||||
COMMAND "${CMAKE_COMMAND}"
|
||||
ARGS -E copy proto-analyzers.rst ${dstDir}
|
||||
# clean up the build directory
|
||||
COMMAND rm
|
||||
ARGS -rf .state *.log *.rst
|
||||
DEPENDS bro
|
||||
WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
|
||||
COMMENT "[Bro] Generating reST docs for proto-analyzers.rst"
|
||||
)
|
||||
list(APPEND ALL_REST_OUTPUTS proto-analyzers.rst)
|
||||
# Macro for generating reST docs that are independent of any particular Bro
|
||||
# script.
|
||||
macro(INDEPENDENT_REST_TARGET reST_file)
|
||||
add_custom_command(OUTPUT ${reST_file}
|
||||
# delete any leftover state from previous bro runs
|
||||
COMMAND "${CMAKE_COMMAND}"
|
||||
ARGS -E remove_directory .state
|
||||
# generate the reST documentation using bro
|
||||
COMMAND BROPATH=${BROPATH}:${srcDir} BROMAGIC=${CMAKE_SOURCE_DIR}/magic/database ${CMAKE_BINARY_DIR}/src/bro
|
||||
ARGS -b -Z base/init-bare.bro || (rm -rf .state *.log *.rst && exit 1)
|
||||
# move generated doc into a new directory tree that
|
||||
# defines the final structure of documents
|
||||
COMMAND "${CMAKE_COMMAND}"
|
||||
ARGS -E make_directory ${dstDir}
|
||||
COMMAND "${CMAKE_COMMAND}"
|
||||
ARGS -E copy ${reST_file} ${dstDir}
|
||||
# clean up the build directory
|
||||
COMMAND rm
|
||||
ARGS -rf .state *.log *.rst
|
||||
DEPENDS bro
|
||||
WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
|
||||
COMMENT "[Bro] Generating reST docs for ${reST_file}"
|
||||
)
|
||||
list(APPEND ALL_REST_OUTPUTS ${reST_file})
|
||||
endmacro(INDEPENDENT_REST_TARGET)
|
||||
|
||||
independent_rest_target(proto-analyzers.rst)
|
||||
independent_rest_target(file-analyzers.rst)
|
||||
|
||||
# create temporary list of all docs to include in the master policy/index file
|
||||
file(WRITE ${MASTER_POLICY_INDEX} "${MASTER_POLICY_INDEX_TEXT}")
|
||||
|
|
|
@ -73,6 +73,7 @@ rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/plugins/Bro_UDP.events.bif.bro)
|
|||
rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/plugins/Bro_ZIP.events.bif.bro)
|
||||
rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/reporter.bif.bro)
|
||||
rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/strings.bif.bro)
|
||||
rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/top-k.bif.bro)
|
||||
rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/types.bif.bro)
|
||||
rest_target(${psd} base/files/extract/main.bro)
|
||||
rest_target(${psd} base/files/hash/main.bro)
|
||||
|
@ -129,6 +130,7 @@ rest_target(${psd} base/frameworks/sumstats/plugins/min.bro)
|
|||
rest_target(${psd} base/frameworks/sumstats/plugins/sample.bro)
|
||||
rest_target(${psd} base/frameworks/sumstats/plugins/std-dev.bro)
|
||||
rest_target(${psd} base/frameworks/sumstats/plugins/sum.bro)
|
||||
rest_target(${psd} base/frameworks/sumstats/plugins/topk.bro)
|
||||
rest_target(${psd} base/frameworks/sumstats/plugins/unique.bro)
|
||||
rest_target(${psd} base/frameworks/sumstats/plugins/variance.bro)
|
||||
rest_target(${psd} base/frameworks/tunnels/main.bro)
|
||||
|
@ -141,6 +143,7 @@ rest_target(${psd} base/protocols/dns/consts.bro)
|
|||
rest_target(${psd} base/protocols/dns/main.bro)
|
||||
rest_target(${psd} base/protocols/ftp/files.bro)
|
||||
rest_target(${psd} base/protocols/ftp/gridftp.bro)
|
||||
rest_target(${psd} base/protocols/ftp/info.bro)
|
||||
rest_target(${psd} base/protocols/ftp/main.bro)
|
||||
rest_target(${psd} base/protocols/ftp/utils-commands.bro)
|
||||
rest_target(${psd} base/protocols/ftp/utils.bro)
|
||||
|
|
|
@ -204,7 +204,7 @@ export {
|
|||
##
|
||||
## tag: Tag for the protocol analyzer having a callback being registered.
|
||||
##
|
||||
## reg: A :bro:see:`ProtoRegistration` record.
|
||||
## reg: A :bro:see:`Files::ProtoRegistration` record.
|
||||
##
|
||||
## Returns: true if the protocol being registered was not previously registered.
|
||||
global register_protocol: function(tag: Analyzer::Tag, reg: ProtoRegistration): bool;
|
||||
|
@ -228,11 +228,6 @@ redef record fa_file += {
|
|||
info: Info &optional;
|
||||
};
|
||||
|
||||
redef record AnalyzerArgs += {
|
||||
# This is used interally for the core file analyzer api.
|
||||
tag: Files::Tag &optional;
|
||||
};
|
||||
|
||||
# Store the callbacks for protocol analyzers that have files.
|
||||
global registered_protocols: table[Analyzer::Tag] of ProtoRegistration = table();
|
||||
|
||||
|
@ -275,14 +270,12 @@ function set_timeout_interval(f: fa_file, t: interval): bool
|
|||
|
||||
function add_analyzer(f: fa_file, tag: Files::Tag, args: AnalyzerArgs): bool
|
||||
{
|
||||
# This is to construct the correct args for the core API.
|
||||
args$tag = tag;
|
||||
add f$info$analyzers[Files::analyzer_name(tag)];
|
||||
|
||||
if ( tag in analyzer_add_callbacks )
|
||||
analyzer_add_callbacks[tag](f, args);
|
||||
|
||||
if ( ! __add_analyzer(f$id, args) )
|
||||
if ( ! __add_analyzer(f$id, tag, args) )
|
||||
{
|
||||
Reporter::warning(fmt("Analyzer %s not added successfully to file %s.", tag, f$id));
|
||||
return F;
|
||||
|
@ -297,8 +290,7 @@ function register_analyzer_add_callback(tag: Files::Tag, callback: function(f: f
|
|||
|
||||
function remove_analyzer(f: fa_file, tag: Files::Tag, args: AnalyzerArgs): bool
|
||||
{
|
||||
args$tag = tag;
|
||||
return __remove_analyzer(f$id, args);
|
||||
return __remove_analyzer(f$id, tag, args);
|
||||
}
|
||||
|
||||
function stop(f: fa_file): bool
|
||||
|
|
|
@ -109,7 +109,7 @@ export {
|
|||
|
||||
## Enables the old filtering approach of "only watch common ports for
|
||||
## analyzed protocols".
|
||||
##
|
||||
##
|
||||
## Unless you know what you are doing, leave this set to F.
|
||||
const enable_auto_protocol_capture_filters = F &redef;
|
||||
|
||||
|
|
|
@ -5,5 +5,6 @@
|
|||
@load ./sample
|
||||
@load ./std-dev
|
||||
@load ./sum
|
||||
@load ./topk
|
||||
@load ./unique
|
||||
@load ./variance
|
||||
@load ./variance
|
||||
|
|
50
scripts/base/frameworks/sumstats/plugins/topk.bro
Normal file
50
scripts/base/frameworks/sumstats/plugins/topk.bro
Normal file
|
@ -0,0 +1,50 @@
|
|||
@load base/frameworks/sumstats
|
||||
|
||||
module SumStats;
|
||||
|
||||
export {
|
||||
redef record Reducer += {
|
||||
## number of elements to keep in the top-k list
|
||||
topk_size: count &default=500;
|
||||
};
|
||||
|
||||
redef enum Calculation += {
|
||||
TOPK
|
||||
};
|
||||
|
||||
redef record ResultVal += {
|
||||
topk: opaque of topk &optional;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
hook init_resultval_hook(r: Reducer, rv: ResultVal)
|
||||
{
|
||||
if ( TOPK in r$apply && ! rv?$topk )
|
||||
rv$topk = topk_init(r$topk_size);
|
||||
}
|
||||
|
||||
hook observe_hook(r: Reducer, val: double, obs: Observation, rv: ResultVal)
|
||||
{
|
||||
if ( TOPK in r$apply )
|
||||
topk_add(rv$topk, obs);
|
||||
}
|
||||
|
||||
hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal)
|
||||
{
|
||||
if ( rv1?$topk )
|
||||
{
|
||||
result$topk = topk_init(topk_size(rv1$topk));
|
||||
|
||||
topk_merge(result$topk, rv1$topk);
|
||||
|
||||
if ( rv2?$topk )
|
||||
topk_merge(result$topk, rv2$topk);
|
||||
}
|
||||
|
||||
else if ( rv2?$topk )
|
||||
{
|
||||
result$topk = topk_init(topk_size(rv2$topk));
|
||||
topk_merge(result$topk, rv2$topk);
|
||||
}
|
||||
}
|
|
@ -531,22 +531,19 @@ type record_field_table: table[string] of record_field;
|
|||
# dependent on the names remaining as they are now.
|
||||
|
||||
## Set of BPF capture filters to use for capturing, indexed by a user-definable
|
||||
## ID (which must be unique). If Bro is *not* configured to examine
|
||||
## :bro:id:`PacketFilter::all_packets`, all packets matching at least
|
||||
## one of the filters in this table (and all in :bro:id:`restrict_filters`)
|
||||
## will be analyzed.
|
||||
## ID (which must be unique). If Bro is *not* configured with
|
||||
## :bro:id:`PacketFilter::enable_auto_protocol_capture_filters`,
|
||||
## all packets matching at least one of the filters in this table (and all in
|
||||
## :bro:id:`restrict_filters`) will be analyzed.
|
||||
##
|
||||
## .. bro:see:: PacketFilter PacketFilter::all_packets
|
||||
## .. bro:see:: PacketFilter PacketFilter::enable_auto_protocol_capture_filters
|
||||
## PacketFilter::unrestricted_filter restrict_filters
|
||||
global capture_filters: table[string] of string &redef;
|
||||
|
||||
## Set of BPF filters to restrict capturing, indexed by a user-definable ID (which
|
||||
## must be unique). If Bro is *not* configured to examine
|
||||
## :bro:id:`PacketFilter::all_packets`, only packets matching *all* of the
|
||||
## filters in this table (and any in :bro:id:`capture_filters`) will be
|
||||
## analyzed.
|
||||
## must be unique).
|
||||
##
|
||||
## .. bro:see:: PacketFilter PacketFilter::all_packets
|
||||
## .. bro:see:: PacketFilter PacketFilter::enable_auto_protocol_capture_filters
|
||||
## PacketFilter::unrestricted_filter capture_filters
|
||||
global restrict_filters: table[string] of string &redef;
|
||||
|
||||
|
@ -3042,6 +3039,11 @@ module GLOBAL;
|
|||
## Number of bytes per packet to capture from live interfaces.
|
||||
const snaplen = 8192 &redef;
|
||||
|
||||
## Seed for hashes computed internally for probabilistic data structures. Using
|
||||
## the same value here will make the hashes compatible between independent Bro
|
||||
## instances. If left unset, Bro will use a temporary local seed.
|
||||
const global_hash_seed: string = "" &redef;
|
||||
|
||||
# Load BiFs defined by plugins.
|
||||
@load base/bif/plugins
|
||||
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
@load ./utils-commands
|
||||
@load ./info
|
||||
@load ./main
|
||||
@load ./utils
|
||||
@load ./files
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
@load ./info
|
||||
@load ./main
|
||||
@load ./utils
|
||||
@load base/utils/conn-ids
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
##! sizes are not logged, but at the benefit of saving CPU cycles that
|
||||
##! otherwise go to analyzing the large (and likely benign) connections.
|
||||
|
||||
@load ./info
|
||||
@load ./main
|
||||
@load base/protocols/conn
|
||||
@load base/protocols/ssl
|
||||
|
|
72
scripts/base/protocols/ftp/info.bro
Normal file
72
scripts/base/protocols/ftp/info.bro
Normal file
|
@ -0,0 +1,72 @@
|
|||
##! Defines data structures for tracking and logging FTP sessions.
|
||||
|
||||
module FTP;
|
||||
|
||||
@load ./utils-commands
|
||||
|
||||
export {
|
||||
|
||||
## This setting changes if passwords used in FTP sessions are
|
||||
## captured or not.
|
||||
const default_capture_password = F &redef;
|
||||
|
||||
## The expected endpoints of an FTP data channel.
|
||||
type ExpectedDataChannel: record {
|
||||
## Whether PASV mode is toggled for control channel.
|
||||
passive: bool &log;
|
||||
## The host that will be initiating the data connection.
|
||||
orig_h: addr &log;
|
||||
## The host that will be accepting the data connection.
|
||||
resp_h: addr &log;
|
||||
## The port at which the acceptor is listening for the data connection.
|
||||
resp_p: port &log;
|
||||
};
|
||||
|
||||
type Info: record {
|
||||
## Time when the command was sent.
|
||||
ts: time &log;
|
||||
## Unique ID for the connection.
|
||||
uid: string &log;
|
||||
## The connection's 4-tuple of endpoint addresses/ports.
|
||||
id: conn_id &log;
|
||||
## User name for the current FTP session.
|
||||
user: string &log &default="<unknown>";
|
||||
## Password for the current FTP session if captured.
|
||||
password: string &log &optional;
|
||||
## Command given by the client.
|
||||
command: string &log &optional;
|
||||
## Argument for the command if one is given.
|
||||
arg: string &log &optional;
|
||||
|
||||
## Libmagic "sniffed" file type if the command indicates a file transfer.
|
||||
mime_type: string &log &optional;
|
||||
## Size of the file if the command indicates a file transfer.
|
||||
file_size: count &log &optional;
|
||||
|
||||
## Reply code from the server in response to the command.
|
||||
reply_code: count &log &optional;
|
||||
## Reply message from the server in response to the command.
|
||||
reply_msg: string &log &optional;
|
||||
|
||||
## Expected FTP data channel.
|
||||
data_channel: ExpectedDataChannel &log &optional;
|
||||
|
||||
## Current working directory that this session is in. By making
|
||||
## the default value '.', we can indicate that unless something
|
||||
## more concrete is discovered that the existing but unknown
|
||||
## directory is ok to use.
|
||||
cwd: string &default=".";
|
||||
|
||||
## Command that is currently waiting for a response.
|
||||
cmdarg: CmdArg &optional;
|
||||
## Queue for commands that have been sent but not yet responded to
|
||||
## are tracked here.
|
||||
pending_commands: PendingCmds;
|
||||
|
||||
## Indicates if the session is in active or passive mode.
|
||||
passive: bool &default=F;
|
||||
|
||||
## Determines if the password will be captured for this request.
|
||||
capture_password: bool &default=default_capture_password;
|
||||
};
|
||||
}
|
|
@ -3,6 +3,8 @@
|
|||
##! will take on the full path that the client is at along with the requested
|
||||
##! file name.
|
||||
|
||||
@load ./info
|
||||
@load ./utils
|
||||
@load ./utils-commands
|
||||
@load base/utils/paths
|
||||
@load base/utils/numbers
|
||||
|
@ -20,72 +22,9 @@ export {
|
|||
"EPSV"
|
||||
} &redef;
|
||||
|
||||
## This setting changes if passwords used in FTP sessions are captured or not.
|
||||
const default_capture_password = F &redef;
|
||||
|
||||
## User IDs that can be considered "anonymous".
|
||||
const guest_ids = { "anonymous", "ftp", "ftpuser", "guest" } &redef;
|
||||
|
||||
## The expected endpoints of an FTP data channel.
|
||||
type ExpectedDataChannel: record {
|
||||
## Whether PASV mode is toggled for control channel.
|
||||
passive: bool &log;
|
||||
## The host that will be initiating the data connection.
|
||||
orig_h: addr &log;
|
||||
## The host that will be accepting the data connection.
|
||||
resp_h: addr &log;
|
||||
## The port at which the acceptor is listening for the data connection.
|
||||
resp_p: port &log;
|
||||
};
|
||||
|
||||
type Info: record {
|
||||
## Time when the command was sent.
|
||||
ts: time &log;
|
||||
## Unique ID for the connection.
|
||||
uid: string &log;
|
||||
## The connection's 4-tuple of endpoint addresses/ports.
|
||||
id: conn_id &log;
|
||||
## User name for the current FTP session.
|
||||
user: string &log &default="<unknown>";
|
||||
## Password for the current FTP session if captured.
|
||||
password: string &log &optional;
|
||||
## Command given by the client.
|
||||
command: string &log &optional;
|
||||
## Argument for the command if one is given.
|
||||
arg: string &log &optional;
|
||||
|
||||
## Libmagic "sniffed" file type if the command indicates a file transfer.
|
||||
mime_type: string &log &optional;
|
||||
## Size of the file if the command indicates a file transfer.
|
||||
file_size: count &log &optional;
|
||||
|
||||
## Reply code from the server in response to the command.
|
||||
reply_code: count &log &optional;
|
||||
## Reply message from the server in response to the command.
|
||||
reply_msg: string &log &optional;
|
||||
|
||||
## Expected FTP data channel.
|
||||
data_channel: ExpectedDataChannel &log &optional;
|
||||
|
||||
## Current working directory that this session is in. By making
|
||||
## the default value '.', we can indicate that unless something
|
||||
## more concrete is discovered that the existing but unknown
|
||||
## directory is ok to use.
|
||||
cwd: string &default=".";
|
||||
|
||||
## Command that is currently waiting for a response.
|
||||
cmdarg: CmdArg &optional;
|
||||
## Queue for commands that have been sent but not yet responded to
|
||||
## are tracked here.
|
||||
pending_commands: PendingCmds;
|
||||
|
||||
## Indicates if the session is in active or passive mode.
|
||||
passive: bool &default=F;
|
||||
|
||||
## Determines if the password will be captured for this request.
|
||||
capture_password: bool &default=default_capture_password;
|
||||
};
|
||||
|
||||
## This record is to hold a parsed FTP reply code. For example, for the
|
||||
## 201 status code, the digits would be parsed as: x->2, y->0, z=>1.
|
||||
type ReplyCode: record {
|
||||
|
@ -102,8 +41,6 @@ export {
|
|||
global log_ftp: event(rec: Info);
|
||||
}
|
||||
|
||||
@load ./utils
|
||||
|
||||
# Add the state tracking information variable to the connection record
|
||||
redef record connection += {
|
||||
ftp: Info &optional;
|
||||
|
|
|
@ -1,7 +1,8 @@
|
|||
##! Utilities specific for FTP processing.
|
||||
|
||||
@load ./main
|
||||
@load ./info
|
||||
@load base/utils/addrs
|
||||
@load base/utils/paths
|
||||
|
||||
module FTP;
|
||||
|
||||
|
|
|
@ -67,11 +67,8 @@ export {
|
|||
## (especially with large file transfers).
|
||||
const disable_analyzer_after_detection = T &redef;
|
||||
|
||||
## The maximum amount of time a script can delay records from being logged.
|
||||
const max_log_delay = 15secs &redef;
|
||||
|
||||
## Delays an SSL record for a specific token: the record will not be logged
|
||||
## as longs the token exists or until :bro:id:`SSL::max_log_delay` elapses.
|
||||
## as longs the token exists or until 15 seconds elapses.
|
||||
global delay_log: function(info: Info, token: string);
|
||||
|
||||
## Undelays an SSL record for a previously inserted token, allowing the
|
||||
|
@ -90,7 +87,7 @@ redef record connection += {
|
|||
redef record Info += {
|
||||
# Adding a string "token" to this set will cause the SSL script
|
||||
# to delay logging the record until either the token has been removed or
|
||||
# the record has been delayed for :bro:id:`SSL::max_log_delay`.
|
||||
# the record has been delayed.
|
||||
delay_tokens: set[string] &optional;
|
||||
};
|
||||
|
||||
|
@ -138,7 +135,7 @@ function log_record(info: Info)
|
|||
{
|
||||
log_record(info);
|
||||
}
|
||||
timeout SSL::max_log_delay
|
||||
timeout 15secs
|
||||
{
|
||||
Reporter::info(fmt("SSL delay tokens not released in time (%s tokens remaining)",
|
||||
|info$delay_tokens|));
|
||||
|
|
|
@ -34,8 +34,8 @@ export {
|
|||
global current_shunted_host_pairs: function(): set[conn_id];
|
||||
|
||||
redef enum Notice::Type += {
|
||||
## Indicative that :bro:id:`max_bpf_shunts` connections are already
|
||||
## being shunted with BPF filters and no more are allowed.
|
||||
## Indicative that :bro:id:`PacketFilter::max_bpf_shunts` connections
|
||||
## are already being shunted with BPF filters and no more are allowed.
|
||||
No_More_Conn_Shunts_Available,
|
||||
|
||||
## Limitations in BPF make shunting some connections with BPF impossible.
|
||||
|
|
|
@ -12,12 +12,12 @@ export {
|
|||
## Apply BPF filters to each worker in a way that causes them to
|
||||
## automatically flow balance traffic between them.
|
||||
AUTO_BPF,
|
||||
## Load balance traffic across the workers by making each one apply
|
||||
## a restrict filter to only listen to a single MAC address. This
|
||||
## is a somewhat common deployment option for sites doing network
|
||||
## based load balancing with MAC address rewriting and passing the
|
||||
## traffic to a single interface. Multiple MAC addresses will show
|
||||
## up on the same interface and need filtered to a single address.
|
||||
# Load balance traffic across the workers by making each one apply
|
||||
# a restrict filter to only listen to a single MAC address. This
|
||||
# is a somewhat common deployment option for sites doing network
|
||||
# based load balancing with MAC address rewriting and passing the
|
||||
# traffic to a single interface. Multiple MAC addresses will show
|
||||
# up on the same interface and need filtered to a single address.
|
||||
#MAC_ADDR_BPF,
|
||||
};
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
## Capture TCP fragments, but not UDP (or ICMP), since those are a lot more
|
||||
## common due to high-volume, fragmenting protocols such as NFS :-(.
|
||||
# Capture TCP fragments, but not UDP (or ICMP), since those are a lot more
|
||||
# common due to high-volume, fragmenting protocols such as NFS :-(.
|
||||
|
||||
## This normally isn't used because of the default open packet filter
|
||||
## but we set it anyway in case the user is using a packet filter.
|
||||
## Note: This was removed because the default model now is to have a wide
|
||||
## open packet filter.
|
||||
# This normally isn't used because of the default open packet filter
|
||||
# but we set it anyway in case the user is using a packet filter.
|
||||
# Note: This was removed because the default model now is to have a wide
|
||||
# open packet filter.
|
||||
#redef capture_filters += { ["frag"] = "(ip[6:2] & 0x3fff != 0) and tcp" };
|
||||
|
||||
## Shorten the fragment timeout from never expiring to expiring fragments after
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
#include "plugin/Manager.h"
|
||||
#include "analyzer/Manager.h"
|
||||
#include "analyzer/Component.h"
|
||||
#include "file_analysis/Manager.h"
|
||||
|
||||
BroDoc::BroDoc(const std::string& rel, const std::string& abs)
|
||||
{
|
||||
|
@ -479,6 +480,17 @@ static void WriteAnalyzerComponent(FILE* f, const analyzer::Component* c)
|
|||
fprintf(f, ":bro:enum:`Analyzer::%s`\n\n", tag.c_str());
|
||||
}
|
||||
|
||||
static void WriteAnalyzerComponent(FILE* f, const file_analysis::Component* c)
|
||||
{
|
||||
EnumType* atag = file_mgr->GetTagEnumType();
|
||||
string tag = fmt("ANALYZER_%s", c->CanonicalName());
|
||||
|
||||
if ( atag->Lookup("Files", tag.c_str()) < 0 )
|
||||
reporter->InternalError("missing analyzer tag for %s", tag.c_str());
|
||||
|
||||
fprintf(f, ":bro:enum:`Files::%s`\n\n", tag.c_str());
|
||||
}
|
||||
|
||||
static void WritePluginComponents(FILE* f, const plugin::Plugin* p)
|
||||
{
|
||||
plugin::Plugin::component_list components = p->Components();
|
||||
|
@ -494,6 +506,10 @@ static void WritePluginComponents(FILE* f, const plugin::Plugin* p)
|
|||
WriteAnalyzerComponent(f,
|
||||
dynamic_cast<const analyzer::Component*>(*it));
|
||||
break;
|
||||
case plugin::component::FILE_ANALYZER:
|
||||
WriteAnalyzerComponent(f,
|
||||
dynamic_cast<const file_analysis::Component*>(*it));
|
||||
break;
|
||||
case plugin::component::READER:
|
||||
reporter->InternalError("docs for READER component unimplemented");
|
||||
case plugin::component::WRITER:
|
||||
|
@ -537,30 +553,35 @@ static void WritePluginBifItems(FILE* f, const plugin::Plugin* p,
|
|||
}
|
||||
}
|
||||
|
||||
static void WriteAnalyzerTagDefn(FILE* f, EnumType* e)
|
||||
static void WriteAnalyzerTagDefn(FILE* f, EnumType* e, const string& module)
|
||||
{
|
||||
string tag_id= module + "::Tag";
|
||||
e = new CommentedEnumType(e);
|
||||
e->SetTypeID(copy_string("Analyzer::Tag"));
|
||||
e->SetTypeID(copy_string(tag_id.c_str()));
|
||||
|
||||
ID* dummy_id = new ID(copy_string("Analyzer::Tag"), SCOPE_GLOBAL, true);
|
||||
ID* dummy_id = new ID(copy_string(tag_id.c_str()), SCOPE_GLOBAL, true);
|
||||
dummy_id->SetType(e);
|
||||
dummy_id->MakeType();
|
||||
|
||||
list<string>* r = new list<string>();
|
||||
r->push_back("Unique identifiers for protocol analyzers.");
|
||||
r->push_back("Unique identifiers for analyzers.");
|
||||
|
||||
BroDocObj bdo(dummy_id, r, true);
|
||||
|
||||
bdo.WriteReST(f);
|
||||
}
|
||||
|
||||
static bool IsAnalyzerPlugin(const plugin::Plugin* p)
|
||||
static bool ComponentsMatch(const plugin::Plugin* p, plugin::component::Type t,
|
||||
bool match_empty = false)
|
||||
{
|
||||
plugin::Plugin::component_list components = p->Components();
|
||||
plugin::Plugin::component_list::const_iterator it;
|
||||
|
||||
if ( components.empty() )
|
||||
return match_empty;
|
||||
|
||||
for ( it = components.begin(); it != components.end(); ++it )
|
||||
if ( (*it)->Type() != plugin::component::ANALYZER )
|
||||
if ( (*it)->Type() != t )
|
||||
return false;
|
||||
|
||||
return true;
|
||||
|
@ -573,14 +594,44 @@ void CreateProtoAnalyzerDoc(const char* filename)
|
|||
fprintf(f, "Protocol Analyzer Reference\n");
|
||||
fprintf(f, "===========================\n\n");
|
||||
|
||||
WriteAnalyzerTagDefn(f, analyzer_mgr->GetTagEnumType());
|
||||
WriteAnalyzerTagDefn(f, analyzer_mgr->GetTagEnumType(), "Analyzer");
|
||||
|
||||
plugin::Manager::plugin_list plugins = plugin_mgr->Plugins();
|
||||
plugin::Manager::plugin_list::const_iterator it;
|
||||
|
||||
for ( it = plugins.begin(); it != plugins.end(); ++it )
|
||||
{
|
||||
if ( ! IsAnalyzerPlugin(*it) )
|
||||
if ( ! ComponentsMatch(*it, plugin::component::ANALYZER, true) )
|
||||
continue;
|
||||
|
||||
WritePluginSectionHeading(f, *it);
|
||||
WritePluginComponents(f, *it);
|
||||
WritePluginBifItems(f, *it, plugin::BifItem::CONSTANT,
|
||||
"Options/Constants");
|
||||
WritePluginBifItems(f, *it, plugin::BifItem::GLOBAL, "Globals");
|
||||
WritePluginBifItems(f, *it, plugin::BifItem::TYPE, "Types");
|
||||
WritePluginBifItems(f, *it, plugin::BifItem::EVENT, "Events");
|
||||
WritePluginBifItems(f, *it, plugin::BifItem::FUNCTION, "Functions");
|
||||
}
|
||||
|
||||
fclose(f);
|
||||
}
|
||||
|
||||
void CreateFileAnalyzerDoc(const char* filename)
|
||||
{
|
||||
FILE* f = fopen(filename, "w");
|
||||
|
||||
fprintf(f, "File Analyzer Reference\n");
|
||||
fprintf(f, "=======================\n\n");
|
||||
|
||||
WriteAnalyzerTagDefn(f, file_mgr->GetTagEnumType(), "Files");
|
||||
|
||||
plugin::Manager::plugin_list plugins = plugin_mgr->Plugins();
|
||||
plugin::Manager::plugin_list::const_iterator it;
|
||||
|
||||
for ( it = plugins.begin(); it != plugins.end(); ++it )
|
||||
{
|
||||
if ( ! ComponentsMatch(*it, plugin::component::FILE_ANALYZER) )
|
||||
continue;
|
||||
|
||||
WritePluginSectionHeading(f, *it);
|
||||
|
|
|
@ -413,4 +413,10 @@ private:
|
|||
*/
|
||||
void CreateProtoAnalyzerDoc(const char* filename);
|
||||
|
||||
/**
|
||||
* Writes out plugin index documentation for all file analyzer plugins.
|
||||
* @param filename the name of the file to write.
|
||||
*/
|
||||
void CreateFileAnalyzerDoc(const char* filename);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -319,6 +319,7 @@ set(bro_SRCS
|
|||
StateAccess.cc
|
||||
Stats.cc
|
||||
Stmt.cc
|
||||
Tag.cc
|
||||
Timer.cc
|
||||
Traverse.cc
|
||||
Trigger.cc
|
||||
|
@ -362,6 +363,8 @@ set(bro_SRCS
|
|||
3rdparty/sqlite3.c
|
||||
|
||||
plugin/Component.cc
|
||||
plugin/ComponentManager.h
|
||||
plugin/TaggedComponent.h
|
||||
plugin/Manager.cc
|
||||
plugin/Plugin.cc
|
||||
plugin/Macros.h
|
||||
|
|
|
@ -16,7 +16,8 @@ DebugLogger::Stream DebugLogger::streams[NUM_DBGS] = {
|
|||
{ "notifiers", 0, false }, { "main-loop", 0, false },
|
||||
{ "dpd", 0, false }, { "tm", 0, false },
|
||||
{ "logging", 0, false }, {"input", 0, false },
|
||||
{ "threading", 0, false }, { "file_analysis", 0, false }
|
||||
{ "threading", 0, false }, { "file_analysis", 0, false },
|
||||
{ "plugins", 0, false}
|
||||
};
|
||||
|
||||
DebugLogger::DebugLogger(const char* filename)
|
||||
|
|
|
@ -27,6 +27,7 @@ enum DebugStream {
|
|||
DBG_INPUT, // Input streams
|
||||
DBG_THREADING, // Threading system
|
||||
DBG_FILE_ANALYSIS, // File analysis
|
||||
DBG_PLUGINS,
|
||||
|
||||
NUM_DBGS // Has to be last
|
||||
};
|
||||
|
|
|
@ -238,10 +238,13 @@ TableType* record_field_table;
|
|||
|
||||
StringVal* cmd_line_bpf_filter;
|
||||
|
||||
StringVal* global_hash_seed;
|
||||
|
||||
OpaqueType* md5_type;
|
||||
OpaqueType* sha1_type;
|
||||
OpaqueType* sha256_type;
|
||||
OpaqueType* entropy_type;
|
||||
OpaqueType* topk_type;
|
||||
OpaqueType* bloomfilter_type;
|
||||
|
||||
#include "const.bif.netvar_def"
|
||||
|
@ -304,10 +307,13 @@ void init_general_global_var()
|
|||
cmd_line_bpf_filter =
|
||||
internal_val("cmd_line_bpf_filter")->AsStringVal();
|
||||
|
||||
global_hash_seed = opt_internal_string("global_hash_seed");
|
||||
|
||||
md5_type = new OpaqueType("md5");
|
||||
sha1_type = new OpaqueType("sha1");
|
||||
sha256_type = new OpaqueType("sha256");
|
||||
entropy_type = new OpaqueType("entropy");
|
||||
topk_type = new OpaqueType("topk");
|
||||
bloomfilter_type = new OpaqueType("bloomfilter");
|
||||
}
|
||||
|
||||
|
|
|
@ -242,11 +242,14 @@ extern TableType* record_field_table;
|
|||
|
||||
extern StringVal* cmd_line_bpf_filter;
|
||||
|
||||
extern StringVal* global_hash_seed;
|
||||
|
||||
class OpaqueType;
|
||||
extern OpaqueType* md5_type;
|
||||
extern OpaqueType* sha1_type;
|
||||
extern OpaqueType* sha256_type;
|
||||
extern OpaqueType* entropy_type;
|
||||
extern OpaqueType* topk_type;
|
||||
extern OpaqueType* bloomfilter_type;
|
||||
|
||||
// Initializes globals that don't pertain to network/event analysis.
|
||||
|
|
|
@ -566,14 +566,14 @@ BroType* BloomFilterVal::Type() const
|
|||
void BloomFilterVal::Add(const Val* val)
|
||||
{
|
||||
HashKey* key = hash->ComputeHash(val, 1);
|
||||
bloom_filter->Add(key->Hash());
|
||||
bloom_filter->Add(key);
|
||||
delete key;
|
||||
}
|
||||
|
||||
size_t BloomFilterVal::Count(const Val* val) const
|
||||
{
|
||||
HashKey* key = hash->ComputeHash(val, 1);
|
||||
size_t cnt = bloom_filter->Count(key->Hash());
|
||||
size_t cnt = bloom_filter->Count(key);
|
||||
delete key;
|
||||
return cnt;
|
||||
}
|
||||
|
@ -588,10 +588,17 @@ bool BloomFilterVal::Empty() const
|
|||
return bloom_filter->Empty();
|
||||
}
|
||||
|
||||
string BloomFilterVal::InternalState() const
|
||||
{
|
||||
return bloom_filter->InternalState();
|
||||
}
|
||||
|
||||
BloomFilterVal* BloomFilterVal::Merge(const BloomFilterVal* x,
|
||||
const BloomFilterVal* y)
|
||||
{
|
||||
if ( ! same_type(x->Type(), y->Type()) )
|
||||
if ( x->Type() && // any one 0 is ok here
|
||||
y->Type() &&
|
||||
! same_type(x->Type(), y->Type()) )
|
||||
{
|
||||
reporter->Error("cannot merge Bloom filters with different types");
|
||||
return 0;
|
||||
|
@ -613,7 +620,7 @@ BloomFilterVal* BloomFilterVal::Merge(const BloomFilterVal* x,
|
|||
|
||||
BloomFilterVal* merged = new BloomFilterVal(copy);
|
||||
|
||||
if ( ! merged->Typify(x->Type()) )
|
||||
if ( x->Type() && ! merged->Typify(x->Type()) )
|
||||
{
|
||||
reporter->Error("failed to set type on merged Bloom filter");
|
||||
return 0;
|
||||
|
|
|
@ -127,6 +127,7 @@ public:
|
|||
size_t Count(const Val* val) const;
|
||||
void Clear();
|
||||
bool Empty() const;
|
||||
string InternalState() const;
|
||||
|
||||
static BloomFilterVal* Merge(const BloomFilterVal* x,
|
||||
const BloomFilterVal* y);
|
||||
|
|
|
@ -40,7 +40,7 @@ RuleActionAnalyzer::RuleActionAnalyzer(const char* arg_analyzer)
|
|||
string str(arg_analyzer);
|
||||
string::size_type pos = str.find(':');
|
||||
string arg = str.substr(0, pos);
|
||||
analyzer = analyzer_mgr->GetAnalyzerTag(arg.c_str());
|
||||
analyzer = analyzer_mgr->GetComponentTag(arg.c_str());
|
||||
|
||||
if ( ! analyzer )
|
||||
reporter->Warning("unknown analyzer '%s' specified in rule", arg.c_str());
|
||||
|
@ -48,7 +48,7 @@ RuleActionAnalyzer::RuleActionAnalyzer(const char* arg_analyzer)
|
|||
if ( pos != string::npos )
|
||||
{
|
||||
arg = str.substr(pos + 1);
|
||||
child_analyzer = analyzer_mgr->GetAnalyzerTag(arg.c_str());
|
||||
child_analyzer = analyzer_mgr->GetComponentTag(arg.c_str());
|
||||
|
||||
if ( ! child_analyzer )
|
||||
reporter->Warning("unknown analyzer '%s' specified in rule", arg.c_str());
|
||||
|
@ -60,11 +60,11 @@ RuleActionAnalyzer::RuleActionAnalyzer(const char* arg_analyzer)
|
|||
void RuleActionAnalyzer::PrintDebug()
|
||||
{
|
||||
if ( ! child_analyzer )
|
||||
fprintf(stderr, "|%s|\n", analyzer_mgr->GetAnalyzerName(analyzer));
|
||||
fprintf(stderr, "|%s|\n", analyzer_mgr->GetComponentName(analyzer));
|
||||
else
|
||||
fprintf(stderr, "|%s:%s|\n",
|
||||
analyzer_mgr->GetAnalyzerName(analyzer),
|
||||
analyzer_mgr->GetAnalyzerName(child_analyzer));
|
||||
analyzer_mgr->GetComponentName(analyzer),
|
||||
analyzer_mgr->GetComponentName(child_analyzer));
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -52,6 +52,7 @@ SERIAL_IS(RE_MATCHER, 0x1400)
|
|||
SERIAL_IS(BITVECTOR, 0x1500)
|
||||
SERIAL_IS(COUNTERVECTOR, 0x1600)
|
||||
SERIAL_IS(BLOOMFILTER, 0x1700)
|
||||
SERIAL_IS(HASHER, 0x1800)
|
||||
|
||||
// These are the externally visible types.
|
||||
const SerialType SER_NONE = 0;
|
||||
|
@ -107,7 +108,8 @@ SERIAL_VAL(MD5_VAL, 16)
|
|||
SERIAL_VAL(SHA1_VAL, 17)
|
||||
SERIAL_VAL(SHA256_VAL, 18)
|
||||
SERIAL_VAL(ENTROPY_VAL, 19)
|
||||
SERIAL_VAL(BLOOMFILTER_VAL, 20)
|
||||
SERIAL_VAL(TOPK_VAL, 20)
|
||||
SERIAL_VAL(BLOOMFILTER_VAL, 21)
|
||||
|
||||
#define SERIAL_EXPR(name, val) SERIAL_CONST(name, val, EXPR)
|
||||
SERIAL_EXPR(EXPR, 1)
|
||||
|
@ -206,6 +208,11 @@ SERIAL_BLOOMFILTER(BLOOMFILTER, 1)
|
|||
SERIAL_BLOOMFILTER(BASICBLOOMFILTER, 2)
|
||||
SERIAL_BLOOMFILTER(COUNTINGBLOOMFILTER, 3)
|
||||
|
||||
#define SERIAL_HASHER(name, val) SERIAL_CONST(name, val, HASHER)
|
||||
SERIAL_HASHER(HASHER, 1)
|
||||
SERIAL_HASHER(DEFAULTHASHER, 2)
|
||||
SERIAL_HASHER(DOUBLEHASHER, 3)
|
||||
|
||||
SERIAL_CONST2(ID)
|
||||
SERIAL_CONST2(STATE_ACCESS)
|
||||
SERIAL_CONST2(CASE)
|
||||
|
|
82
src/Tag.cc
Normal file
82
src/Tag.cc
Normal file
|
@ -0,0 +1,82 @@
|
|||
// See the file "COPYING" in the main distribution directory for copyright.
|
||||
|
||||
#include "Tag.h"
|
||||
#include "Val.h"
|
||||
|
||||
Tag::Tag(EnumType* etype, type_t arg_type, subtype_t arg_subtype)
|
||||
{
|
||||
assert(arg_type > 0);
|
||||
|
||||
type = arg_type;
|
||||
subtype = arg_subtype;
|
||||
int64_t i = (int64)(type) | ((int64)subtype << 31);
|
||||
Ref(etype);
|
||||
val = new EnumVal(i, etype);
|
||||
}
|
||||
|
||||
Tag::Tag(EnumVal* arg_val)
|
||||
{
|
||||
assert(arg_val);
|
||||
|
||||
val = arg_val;
|
||||
Ref(val);
|
||||
|
||||
int64 i = val->InternalInt();
|
||||
type = i & 0xffffffff;
|
||||
subtype = (i >> 31) & 0xffffffff;
|
||||
}
|
||||
|
||||
Tag::Tag(const Tag& other)
|
||||
{
|
||||
type = other.type;
|
||||
subtype = other.subtype;
|
||||
val = other.val;
|
||||
|
||||
if ( val )
|
||||
Ref(val);
|
||||
}
|
||||
|
||||
Tag::Tag()
|
||||
{
|
||||
type = 0;
|
||||
subtype = 0;
|
||||
val = 0;
|
||||
}
|
||||
|
||||
Tag::~Tag()
|
||||
{
|
||||
Unref(val);
|
||||
val = 0;
|
||||
}
|
||||
|
||||
Tag& Tag::operator=(const Tag& other)
|
||||
{
|
||||
if ( this != &other )
|
||||
{
|
||||
type = other.type;
|
||||
subtype = other.subtype;
|
||||
val = other.val;
|
||||
|
||||
if ( val )
|
||||
Ref(val);
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
EnumVal* Tag::AsEnumVal(EnumType* etype) const
|
||||
{
|
||||
if ( ! val )
|
||||
{
|
||||
assert(type == 0 && subtype == 0);
|
||||
Ref(etype);
|
||||
val = new EnumVal(0, etype);
|
||||
}
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
std::string Tag::AsString() const
|
||||
{
|
||||
return fmt("%" PRIu32 "/%" PRIu32, type, subtype);
|
||||
}
|
138
src/Tag.h
Normal file
138
src/Tag.h
Normal file
|
@ -0,0 +1,138 @@
|
|||
// See the file "COPYING" in the main distribution directory for copyright.
|
||||
|
||||
#ifndef TAG_H
|
||||
#define TAG_H
|
||||
|
||||
#include "config.h"
|
||||
#include "util.h"
|
||||
#include "Type.h"
|
||||
|
||||
class EnumVal;
|
||||
|
||||
/**
|
||||
* Class to identify an analyzer type.
|
||||
*
|
||||
* Each analyzer type gets a tag consisting of a main type and subtype. The
|
||||
* former is an identifier that's unique across all analyzer classes. The latter is
|
||||
* passed through to the analyzer instances for their use, yet not further
|
||||
* interpreted by the analyzer infrastructure; it allows an analyzer to
|
||||
* branch out into a set of sub-analyzers internally. Jointly, main type and
|
||||
* subtype form an analyzer "tag". Each unique tag corresponds to a single
|
||||
* "analyzer" from the user's perspective. At the script layer, these tags
|
||||
* are mapped into enums of type \c Analyzer::Tag or Files::Tag. Internally,
|
||||
* the analyzer::Manager and file_analysis::Manager maintain the mapping of tag
|
||||
* to analyzer (and it also assigns them their main types), and
|
||||
* analyzer::Component and file_analysis::Component create new tag.
|
||||
*
|
||||
* The Tag class supports all operations necessary to act as an index in a
|
||||
* \c std::map.
|
||||
*/
|
||||
class Tag {
|
||||
public:
|
||||
/**
|
||||
* Type for the analyzer's main type.
|
||||
*/
|
||||
typedef uint32 type_t;
|
||||
|
||||
/**
|
||||
* Type for the analyzer's subtype.
|
||||
*/
|
||||
typedef uint32 subtype_t;
|
||||
|
||||
/**
|
||||
* Returns the tag's main type.
|
||||
*/
|
||||
type_t Type() const { return type; }
|
||||
|
||||
/**
|
||||
* Returns the tag's subtype.
|
||||
*/
|
||||
subtype_t Subtype() const { return subtype; }
|
||||
|
||||
/**
|
||||
* Returns the numerical values for main and subtype inside a string
|
||||
* suitable for printing. This is primarily for debugging.
|
||||
*/
|
||||
std::string AsString() const;
|
||||
|
||||
protected:
|
||||
/*
|
||||
* Copy constructor.
|
||||
*/
|
||||
Tag(const Tag& other);
|
||||
|
||||
/**
|
||||
* Default constructor. This initializes the tag with an error value
|
||||
* that will make \c operator \c bool return false.
|
||||
*/
|
||||
Tag();
|
||||
|
||||
/**
|
||||
* Destructor.
|
||||
*/
|
||||
~Tag();
|
||||
|
||||
/**
|
||||
* Assignment operator.
|
||||
*/
|
||||
Tag& operator=(const Tag& other);
|
||||
|
||||
/**
|
||||
* Compares two tags for equality.
|
||||
*/
|
||||
bool operator==(const Tag& other) const
|
||||
{
|
||||
return type == other.type && subtype == other.subtype;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compares two tags for inequality.
|
||||
*/
|
||||
bool operator!=(const Tag& other) const
|
||||
{
|
||||
return type != other.type || subtype != other.subtype;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compares two tags for less-than relationship.
|
||||
*/
|
||||
bool operator<(const Tag& other) const
|
||||
{
|
||||
return type != other.type ? type < other.type : (subtype < other.subtype);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the script-layer enum that corresponds to this tag.
|
||||
* The returned value does not have its ref-count increased.
|
||||
*
|
||||
* @param etype the script-layer enum type associated with the tag.
|
||||
*/
|
||||
EnumVal* AsEnumVal(EnumType* etype) const;
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
*
|
||||
* @param etype the script-layer enum type associated with the tag.
|
||||
*
|
||||
* @param type The main type. Note that the manager class manages the
|
||||
* the value space internally, so noone else should assign main types.
|
||||
*
|
||||
* @param subtype The sub type, which is left to an analyzer for
|
||||
* interpretation. By default it's set to zero.
|
||||
*/
|
||||
Tag(EnumType* etype, type_t type, subtype_t subtype = 0);
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
*
|
||||
* @param val An enum value of script type \c Analyzer::Tag.
|
||||
*/
|
||||
Tag(EnumVal* val);
|
||||
|
||||
private:
|
||||
type_t type; // Main type.
|
||||
subtype_t subtype; // Subtype.
|
||||
mutable EnumVal* val; // Script-layer value.
|
||||
};
|
||||
|
||||
#endif
|
|
@ -70,12 +70,12 @@ void AnalyzerTimer::Init(Analyzer* arg_analyzer, analyzer_timer_func arg_timer,
|
|||
Ref(analyzer->Conn());
|
||||
}
|
||||
|
||||
analyzer::ID Analyzer::id_counter = 0;;
|
||||
analyzer::ID Analyzer::id_counter = 0;
|
||||
|
||||
const char* Analyzer::GetAnalyzerName() const
|
||||
{
|
||||
assert(tag);
|
||||
return analyzer_mgr->GetAnalyzerName(tag);
|
||||
return analyzer_mgr->GetComponentName(tag);
|
||||
}
|
||||
|
||||
void Analyzer::SetAnalyzerTag(const Tag& arg_tag)
|
||||
|
@ -87,7 +87,7 @@ void Analyzer::SetAnalyzerTag(const Tag& arg_tag)
|
|||
bool Analyzer::IsAnalyzer(const char* name)
|
||||
{
|
||||
assert(tag);
|
||||
return strcmp(analyzer_mgr->GetAnalyzerName(tag), name) == 0;
|
||||
return strcmp(analyzer_mgr->GetComponentName(tag), name) == 0;
|
||||
}
|
||||
|
||||
// Used in debugging output.
|
||||
|
@ -98,7 +98,7 @@ static string fmt_analyzer(Analyzer* a)
|
|||
|
||||
Analyzer::Analyzer(const char* name, Connection* conn)
|
||||
{
|
||||
Tag tag = analyzer_mgr->GetAnalyzerTag(name);
|
||||
Tag tag = analyzer_mgr->GetComponentTag(name);
|
||||
|
||||
if ( ! tag )
|
||||
reporter->InternalError("unknown analyzer name %s; mismatch with tag analyzer::Component?", name);
|
||||
|
@ -494,7 +494,7 @@ Analyzer* Analyzer::FindChild(Tag arg_tag)
|
|||
|
||||
Analyzer* Analyzer::FindChild(const char* name)
|
||||
{
|
||||
Tag tag = analyzer_mgr->GetAnalyzerTag(name);
|
||||
Tag tag = analyzer_mgr->GetComponentTag(name);
|
||||
return tag ? FindChild(tag) : 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -8,29 +8,26 @@
|
|||
|
||||
using namespace analyzer;
|
||||
|
||||
Tag::type_t Component::type_counter = 0;
|
||||
|
||||
Component::Component(const char* arg_name, factory_callback arg_factory, Tag::subtype_t arg_subtype, bool arg_enabled, bool arg_partial)
|
||||
: plugin::Component(plugin::component::ANALYZER)
|
||||
: plugin::Component(plugin::component::ANALYZER),
|
||||
plugin::TaggedComponent<analyzer::Tag>(arg_subtype)
|
||||
{
|
||||
name = copy_string(arg_name);
|
||||
canon_name = canonify_name(arg_name);
|
||||
factory = arg_factory;
|
||||
enabled = arg_enabled;
|
||||
partial = arg_partial;
|
||||
|
||||
tag = analyzer::Tag(++type_counter, arg_subtype);
|
||||
}
|
||||
|
||||
Component::Component(const Component& other)
|
||||
: plugin::Component(Type())
|
||||
: plugin::Component(Type()),
|
||||
plugin::TaggedComponent<analyzer::Tag>(other)
|
||||
{
|
||||
name = copy_string(other.name);
|
||||
canon_name = copy_string(other.canon_name);
|
||||
factory = other.factory;
|
||||
enabled = other.enabled;
|
||||
partial = other.partial;
|
||||
tag = other.tag;
|
||||
}
|
||||
|
||||
Component::~Component()
|
||||
|
@ -39,11 +36,6 @@ Component::~Component()
|
|||
delete [] canon_name;
|
||||
}
|
||||
|
||||
analyzer::Tag Component::Tag() const
|
||||
{
|
||||
return tag;
|
||||
}
|
||||
|
||||
void Component::Describe(ODesc* d) const
|
||||
{
|
||||
plugin::Component::Describe(d);
|
||||
|
@ -63,13 +55,14 @@ void Component::Describe(ODesc* d) const
|
|||
|
||||
Component& Component::operator=(const Component& other)
|
||||
{
|
||||
plugin::TaggedComponent<analyzer::Tag>::operator=(other);
|
||||
|
||||
if ( &other != this )
|
||||
{
|
||||
name = copy_string(other.name);
|
||||
factory = other.factory;
|
||||
enabled = other.enabled;
|
||||
partial = other.partial;
|
||||
tag = other.tag;
|
||||
}
|
||||
|
||||
return *this;
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
|
||||
#include "Tag.h"
|
||||
#include "plugin/Component.h"
|
||||
#include "plugin/TaggedComponent.h"
|
||||
|
||||
#include "../config.h"
|
||||
#include "../util.h"
|
||||
|
@ -21,7 +22,8 @@ class Analyzer;
|
|||
* A plugin can provide a specific protocol analyzer by registering this
|
||||
* analyzer component, describing the analyzer.
|
||||
*/
|
||||
class Component : public plugin::Component {
|
||||
class Component : public plugin::Component,
|
||||
public plugin::TaggedComponent<analyzer::Tag> {
|
||||
public:
|
||||
typedef Analyzer* (*factory_callback)(Connection* conn);
|
||||
|
||||
|
@ -100,13 +102,6 @@ public:
|
|||
*/
|
||||
bool Enabled() const { return enabled; }
|
||||
|
||||
/**
|
||||
* Returns the analyzer's tag. Note that this is automatically
|
||||
* generated for each new Components, and hence unique across all of
|
||||
* them.
|
||||
*/
|
||||
analyzer::Tag Tag() const;
|
||||
|
||||
/**
|
||||
* Enables or disables this analyzer.
|
||||
*
|
||||
|
@ -128,11 +123,7 @@ private:
|
|||
const char* canon_name; // The analyzer's canonical name.
|
||||
factory_callback factory; // The analyzer's factory callback.
|
||||
bool partial; // True if the analyzer supports partial connections.
|
||||
analyzer::Tag tag; // The automatically assigned analyzer tag.
|
||||
bool enabled; // True if the analyzer is enabled.
|
||||
|
||||
// Global counter used to generate unique tags.
|
||||
static analyzer::Tag::type_t type_counter;
|
||||
};
|
||||
|
||||
}
|
||||
|
|
|
@ -60,10 +60,8 @@ bool Manager::ConnIndex::operator<(const ConnIndex& other) const
|
|||
}
|
||||
|
||||
Manager::Manager()
|
||||
: plugin::ComponentManager<analyzer::Tag, analyzer::Component>("Analyzer")
|
||||
{
|
||||
tag_enum_type = new EnumType("Analyzer::Tag");
|
||||
::ID* id = install_ID("Tag", "Analyzer", true, true);
|
||||
add_type(id, tag_enum_type, 0, 0);
|
||||
}
|
||||
|
||||
Manager::~Manager()
|
||||
|
@ -91,14 +89,14 @@ void Manager::InitPreScript()
|
|||
std::list<Component*> analyzers = plugin_mgr->Components<Component>();
|
||||
|
||||
for ( std::list<Component*>::const_iterator i = analyzers.begin(); i != analyzers.end(); i++ )
|
||||
RegisterAnalyzerComponent(*i);
|
||||
RegisterComponent(*i, "ANALYZER_");
|
||||
|
||||
// Cache these tags.
|
||||
analyzer_backdoor = GetAnalyzerTag("BACKDOOR");
|
||||
analyzer_connsize = GetAnalyzerTag("CONNSIZE");
|
||||
analyzer_interconn = GetAnalyzerTag("INTERCONN");
|
||||
analyzer_stepping = GetAnalyzerTag("STEPPINGSTONE");
|
||||
analyzer_tcpstats = GetAnalyzerTag("TCPSTATS");
|
||||
analyzer_backdoor = GetComponentTag("BACKDOOR");
|
||||
analyzer_connsize = GetComponentTag("CONNSIZE");
|
||||
analyzer_interconn = GetComponentTag("INTERCONN");
|
||||
analyzer_stepping = GetComponentTag("STEPPINGSTONE");
|
||||
analyzer_tcpstats = GetComponentTag("TCPSTATS");
|
||||
}
|
||||
|
||||
void Manager::InitPostScript()
|
||||
|
@ -109,8 +107,9 @@ void Manager::DumpDebug()
|
|||
{
|
||||
#ifdef DEBUG
|
||||
DBG_LOG(DBG_ANALYZER, "Available analyzers after bro_init():");
|
||||
for ( analyzer_map_by_name::const_iterator i = analyzers_by_name.begin(); i != analyzers_by_name.end(); i++ )
|
||||
DBG_LOG(DBG_ANALYZER, " %s (%s)", i->second->Name(), IsEnabled(i->second->Tag()) ? "enabled" : "disabled");
|
||||
list<Component*> all_analyzers = GetComponents();
|
||||
for ( list<Component*>::const_iterator i = all_analyzers.begin(); i != all_analyzers.end(); ++i )
|
||||
DBG_LOG(DBG_ANALYZER, " %s (%s)", (*i)->Name(), IsEnabled((*i)->Tag()) ? "enabled" : "disabled");
|
||||
|
||||
DBG_LOG(DBG_ANALYZER, "");
|
||||
DBG_LOG(DBG_ANALYZER, "Analyzers by port:");
|
||||
|
@ -120,7 +119,7 @@ void Manager::DumpDebug()
|
|||
string s;
|
||||
|
||||
for ( tag_set::const_iterator j = i->second->begin(); j != i->second->end(); j++ )
|
||||
s += string(GetAnalyzerName(*j)) + " ";
|
||||
s += string(GetComponentName(*j)) + " ";
|
||||
|
||||
DBG_LOG(DBG_ANALYZER, " %d/tcp: %s", i->first, s.c_str());
|
||||
}
|
||||
|
@ -130,7 +129,7 @@ void Manager::DumpDebug()
|
|||
string s;
|
||||
|
||||
for ( tag_set::const_iterator j = i->second->begin(); j != i->second->end(); j++ )
|
||||
s += string(GetAnalyzerName(*j)) + " ";
|
||||
s += string(GetComponentName(*j)) + " ";
|
||||
|
||||
DBG_LOG(DBG_ANALYZER, " %d/udp: %s", i->first, s.c_str());
|
||||
}
|
||||
|
@ -142,25 +141,6 @@ void Manager::Done()
|
|||
{
|
||||
}
|
||||
|
||||
void Manager::RegisterAnalyzerComponent(Component* component)
|
||||
{
|
||||
const char* cname = component->CanonicalName();
|
||||
|
||||
if ( Lookup(cname) )
|
||||
reporter->FatalError("Analyzer %s defined more than once", cname);
|
||||
|
||||
DBG_LOG(DBG_ANALYZER, "Registering analyzer %s (tag %s)",
|
||||
component->Name(), component->Tag().AsString().c_str());
|
||||
|
||||
analyzers_by_name.insert(std::make_pair(cname, component));
|
||||
analyzers_by_tag.insert(std::make_pair(component->Tag(), component));
|
||||
analyzers_by_val.insert(std::make_pair(component->Tag().AsEnumVal()->InternalInt(), component));
|
||||
|
||||
// Install enum "Analyzer::ANALYZER_*"
|
||||
string id = fmt("ANALYZER_%s", cname);
|
||||
tag_enum_type->AddName("Analyzer", id.c_str(), component->Tag().AsEnumVal()->InternalInt(), true);
|
||||
}
|
||||
|
||||
bool Manager::EnableAnalyzer(Tag tag)
|
||||
{
|
||||
Component* p = Lookup(tag);
|
||||
|
@ -217,8 +197,9 @@ void Manager::DisableAllAnalyzers()
|
|||
{
|
||||
DBG_LOG(DBG_ANALYZER, "Disabling all analyzers");
|
||||
|
||||
for ( analyzer_map_by_tag::const_iterator i = analyzers_by_tag.begin(); i != analyzers_by_tag.end(); i++ )
|
||||
i->second->SetEnabled(false);
|
||||
list<Component*> all_analyzers = GetComponents();
|
||||
for ( list<Component*>::const_iterator i = all_analyzers.begin(); i != all_analyzers.end(); ++i )
|
||||
(*i)->SetEnabled(false);
|
||||
}
|
||||
|
||||
bool Manager::IsEnabled(Tag tag)
|
||||
|
@ -270,7 +251,7 @@ bool Manager::RegisterAnalyzerForPort(Tag tag, TransportProto proto, uint32 port
|
|||
tag_set* l = LookupPort(proto, port, true);
|
||||
|
||||
#ifdef DEBUG
|
||||
const char* name = GetAnalyzerName(tag);
|
||||
const char* name = GetComponentName(tag);
|
||||
DBG_LOG(DBG_ANALYZER, "Registering analyzer %s for port %" PRIu32 "/%d", name, port, proto);
|
||||
#endif
|
||||
|
||||
|
@ -283,7 +264,7 @@ bool Manager::UnregisterAnalyzerForPort(Tag tag, TransportProto proto, uint32 po
|
|||
tag_set* l = LookupPort(proto, port, true);
|
||||
|
||||
#ifdef DEBUG
|
||||
const char* name = GetAnalyzerName(tag);
|
||||
const char* name = GetComponentName(tag);
|
||||
DBG_LOG(DBG_ANALYZER, "Unregistering analyzer %s for port %" PRIu32 "/%d", name, port, proto);
|
||||
#endif
|
||||
|
||||
|
@ -302,7 +283,7 @@ Analyzer* Manager::InstantiateAnalyzer(Tag tag, Connection* conn)
|
|||
return 0;
|
||||
|
||||
if ( ! c->Factory() )
|
||||
reporter->InternalError("analyzer %s cannot be instantiated dynamically", GetAnalyzerName(tag));
|
||||
reporter->InternalError("analyzer %s cannot be instantiated dynamically", GetComponentName(tag));
|
||||
|
||||
Analyzer* a = c->Factory()(conn);
|
||||
|
||||
|
@ -316,59 +297,10 @@ Analyzer* Manager::InstantiateAnalyzer(Tag tag, Connection* conn)
|
|||
|
||||
Analyzer* Manager::InstantiateAnalyzer(const char* name, Connection* conn)
|
||||
{
|
||||
Tag tag = GetAnalyzerTag(name);
|
||||
Tag tag = GetComponentTag(name);
|
||||
return tag ? InstantiateAnalyzer(tag, conn) : 0;
|
||||
}
|
||||
|
||||
const char* Manager::GetAnalyzerName(Tag tag)
|
||||
{
|
||||
static const char* error = "<error>";
|
||||
|
||||
if ( ! tag )
|
||||
return error;
|
||||
|
||||
Component* c = Lookup(tag);
|
||||
|
||||
if ( ! c )
|
||||
reporter->InternalError("request for name of unknown analyzer tag %s", tag.AsString().c_str());
|
||||
|
||||
return c->CanonicalName();
|
||||
}
|
||||
|
||||
const char* Manager::GetAnalyzerName(Val* val)
|
||||
{
|
||||
return GetAnalyzerName(Tag(val->AsEnumVal()));
|
||||
}
|
||||
|
||||
Tag Manager::GetAnalyzerTag(const char* name)
|
||||
{
|
||||
Component* c = Lookup(name);
|
||||
return c ? c->Tag() : Tag();
|
||||
}
|
||||
|
||||
EnumType* Manager::GetTagEnumType()
|
||||
{
|
||||
return tag_enum_type;
|
||||
}
|
||||
|
||||
Component* Manager::Lookup(const char* name)
|
||||
{
|
||||
analyzer_map_by_name::const_iterator i = analyzers_by_name.find(to_upper(name));
|
||||
return i != analyzers_by_name.end() ? i->second : 0;
|
||||
}
|
||||
|
||||
Component* Manager::Lookup(const Tag& tag)
|
||||
{
|
||||
analyzer_map_by_tag::const_iterator i = analyzers_by_tag.find(tag);
|
||||
return i != analyzers_by_tag.end() ? i->second : 0;
|
||||
}
|
||||
|
||||
Component* Manager::Lookup(EnumVal* val)
|
||||
{
|
||||
analyzer_map_by_val::const_iterator i = analyzers_by_val.find(val->InternalInt());
|
||||
return i != analyzers_by_val.end() ? i->second : 0;
|
||||
}
|
||||
|
||||
Manager::tag_set* Manager::LookupPort(TransportProto proto, uint32 port, bool add_if_not_found)
|
||||
{
|
||||
analyzer_map_by_port* m = 0;
|
||||
|
@ -461,7 +393,7 @@ bool Manager::BuildInitialAnalyzerTree(Connection* conn)
|
|||
root->AddChildAnalyzer(analyzer, false);
|
||||
|
||||
DBG_ANALYZER_ARGS(conn, "activated %s analyzer as scheduled",
|
||||
analyzer_mgr->GetAnalyzerName(*i));
|
||||
analyzer_mgr->GetComponentName(*i));
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -487,7 +419,7 @@ bool Manager::BuildInitialAnalyzerTree(Connection* conn)
|
|||
|
||||
root->AddChildAnalyzer(analyzer, false);
|
||||
DBG_ANALYZER_ARGS(conn, "activated %s analyzer due to port %d",
|
||||
analyzer_mgr->GetAnalyzerName(*j), resp_port);
|
||||
analyzer_mgr->GetComponentName(*j), resp_port);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -613,7 +545,7 @@ void Manager::ExpireScheduledAnalyzers()
|
|||
conns.erase(i);
|
||||
|
||||
DBG_LOG(DBG_ANALYZER, "Expiring expected analyzer %s for connection %s",
|
||||
analyzer_mgr->GetAnalyzerName(a->analyzer),
|
||||
analyzer_mgr->GetComponentName(a->analyzer),
|
||||
fmt_conn_id(a->conn.orig, 0, a->conn.resp, a->conn.resp_p));
|
||||
|
||||
delete a;
|
||||
|
@ -655,7 +587,7 @@ void Manager::ScheduleAnalyzer(const IPAddr& orig, const IPAddr& resp,
|
|||
TransportProto proto, const char* analyzer,
|
||||
double timeout)
|
||||
{
|
||||
Tag tag = GetAnalyzerTag(analyzer);
|
||||
Tag tag = GetComponentTag(analyzer);
|
||||
|
||||
if ( tag != Tag() )
|
||||
ScheduleAnalyzer(orig, resp, resp_p, proto, tag, timeout);
|
||||
|
|
|
@ -26,6 +26,7 @@
|
|||
#include "Analyzer.h"
|
||||
#include "Component.h"
|
||||
#include "Tag.h"
|
||||
#include "plugin/ComponentManager.h"
|
||||
|
||||
#include "../Dict.h"
|
||||
#include "../net_util.h"
|
||||
|
@ -49,7 +50,7 @@ namespace analyzer {
|
|||
* classes. This allows to external analyzer code to potentially use a
|
||||
* different C++ standard library.
|
||||
*/
|
||||
class Manager {
|
||||
class Manager : public plugin::ComponentManager<Tag, Component> {
|
||||
public:
|
||||
/**
|
||||
* Constructor.
|
||||
|
@ -231,42 +232,6 @@ public:
|
|||
*/
|
||||
Analyzer* InstantiateAnalyzer(const char* name, Connection* c);
|
||||
|
||||
/**
|
||||
* Translates an analyzer tag into corresponding analyzer name.
|
||||
*
|
||||
* @param tag The analyzer tag.
|
||||
*
|
||||
* @return The name, or an empty string if the tag is invalid.
|
||||
*/
|
||||
const char* GetAnalyzerName(Tag tag);
|
||||
|
||||
/**
|
||||
* Translates an script-level analyzer tag into corresponding
|
||||
* analyzer name.
|
||||
*
|
||||
* @param val The analyzer tag as an script-level enum value of type
|
||||
* \c Analyzer::Tag.
|
||||
*
|
||||
* @return The name, or an empty string if the tag is invalid.
|
||||
*/
|
||||
const char* GetAnalyzerName(Val* val);
|
||||
|
||||
/**
|
||||
* Translates an analyzer name into the corresponding tag.
|
||||
*
|
||||
* @param name The name.
|
||||
*
|
||||
* @return The tag. If the name does not correspond to a valid
|
||||
* analyzer, the returned tag will evaluate to false.
|
||||
*/
|
||||
Tag GetAnalyzerTag(const char* name);
|
||||
|
||||
/**
|
||||
* Returns the enum type that corresponds to the script-level type \c
|
||||
* Analyzer::Tag.
|
||||
*/
|
||||
EnumType* GetTagEnumType();
|
||||
|
||||
/**
|
||||
* Given the first packet of a connection, builds its initial
|
||||
* analyzer tree.
|
||||
|
@ -350,18 +315,8 @@ public:
|
|||
|
||||
private:
|
||||
typedef set<Tag> tag_set;
|
||||
typedef map<string, Component*> analyzer_map_by_name;
|
||||
typedef map<Tag, Component*> analyzer_map_by_tag;
|
||||
typedef map<int, Component*> analyzer_map_by_val;
|
||||
typedef map<uint32, tag_set*> analyzer_map_by_port;
|
||||
|
||||
void RegisterAnalyzerComponent(Component* component); // Takes ownership.
|
||||
|
||||
Component* Lookup(const string& name);
|
||||
Component* Lookup(const char* name);
|
||||
Component* Lookup(const Tag& tag);
|
||||
Component* Lookup(EnumVal* val);
|
||||
|
||||
tag_set* LookupPort(PortVal* val, bool add_if_not_found);
|
||||
tag_set* LookupPort(TransportProto proto, uint32 port, bool add_if_not_found);
|
||||
|
||||
|
@ -370,9 +325,6 @@ private:
|
|||
|
||||
analyzer_map_by_port analyzers_by_port_tcp;
|
||||
analyzer_map_by_port analyzers_by_port_udp;
|
||||
analyzer_map_by_name analyzers_by_name;
|
||||
analyzer_map_by_tag analyzers_by_tag;
|
||||
analyzer_map_by_val analyzers_by_val;
|
||||
|
||||
Tag analyzer_backdoor;
|
||||
Tag analyzer_connsize;
|
||||
|
@ -380,8 +332,6 @@ private:
|
|||
Tag analyzer_stepping;
|
||||
Tag analyzer_tcpstats;
|
||||
|
||||
EnumType* tag_enum_type;
|
||||
|
||||
//// Data structures to track analyzed scheduled for future connections.
|
||||
|
||||
// The index for a scheduled connection.
|
||||
|
|
|
@ -3,90 +3,20 @@
|
|||
#include "Tag.h"
|
||||
#include "Manager.h"
|
||||
|
||||
#include "../NetVar.h"
|
||||
analyzer::Tag analyzer::Tag::Error;
|
||||
|
||||
using namespace analyzer;
|
||||
|
||||
Tag Tag::Error;
|
||||
|
||||
Tag::Tag(type_t arg_type, subtype_t arg_subtype)
|
||||
analyzer::Tag::Tag(type_t type, subtype_t subtype)
|
||||
: ::Tag(analyzer_mgr->GetTagEnumType(), type, subtype)
|
||||
{
|
||||
assert(arg_type > 0);
|
||||
|
||||
type = arg_type;
|
||||
subtype = arg_subtype;
|
||||
int64_t i = (int64)(type) | ((int64)subtype << 31);
|
||||
|
||||
EnumType* etype = analyzer_mgr->GetTagEnumType();
|
||||
Ref(etype);
|
||||
val = new EnumVal(i, etype);
|
||||
}
|
||||
|
||||
Tag::Tag(EnumVal* arg_val)
|
||||
analyzer::Tag& analyzer::Tag::operator=(const analyzer::Tag& other)
|
||||
{
|
||||
assert(arg_val);
|
||||
|
||||
val = arg_val;
|
||||
Ref(val);
|
||||
|
||||
int64 i = val->InternalInt();
|
||||
type = i & 0xffffffff;
|
||||
subtype = (i >> 31) & 0xffffffff;
|
||||
}
|
||||
|
||||
Tag::Tag(const Tag& other)
|
||||
{
|
||||
type = other.type;
|
||||
subtype = other.subtype;
|
||||
val = other.val;
|
||||
|
||||
if ( val )
|
||||
Ref(val);
|
||||
}
|
||||
|
||||
Tag::Tag()
|
||||
{
|
||||
type = 0;
|
||||
subtype = 0;
|
||||
val = 0;
|
||||
}
|
||||
|
||||
Tag::~Tag()
|
||||
{
|
||||
Unref(val);
|
||||
val = 0;
|
||||
}
|
||||
|
||||
Tag& Tag::operator=(const Tag& other)
|
||||
{
|
||||
if ( this != &other )
|
||||
{
|
||||
type = other.type;
|
||||
subtype = other.subtype;
|
||||
val = other.val;
|
||||
|
||||
if ( val )
|
||||
Ref(val);
|
||||
}
|
||||
|
||||
::Tag::operator=(other);
|
||||
return *this;
|
||||
}
|
||||
|
||||
EnumVal* Tag::AsEnumVal() const
|
||||
EnumVal* analyzer::Tag::AsEnumVal() const
|
||||
{
|
||||
if ( ! val )
|
||||
{
|
||||
assert(analyzer_mgr);
|
||||
assert(type == 0 && subtype == 0);
|
||||
EnumType* etype = analyzer_mgr->GetTagEnumType();
|
||||
Ref(etype);
|
||||
val = new EnumVal(0, etype);
|
||||
}
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
std::string Tag::AsString() const
|
||||
{
|
||||
return fmt("%" PRIu32 "/%" PRIu32, type, subtype);
|
||||
return ::Tag::AsEnumVal(analyzer_mgr->GetTagEnumType());
|
||||
}
|
||||
|
|
|
@ -5,90 +5,46 @@
|
|||
|
||||
#include "config.h"
|
||||
#include "util.h"
|
||||
#include "../Tag.h"
|
||||
#include "plugin/TaggedComponent.h"
|
||||
#include "plugin/ComponentManager.h"
|
||||
|
||||
class EnumVal;
|
||||
|
||||
namespace file_analysis {
|
||||
class Manager;
|
||||
class Component;
|
||||
}
|
||||
|
||||
namespace analyzer {
|
||||
|
||||
class Manager;
|
||||
class Component;
|
||||
|
||||
/**
|
||||
* Class to identify an analyzer type.
|
||||
* Class to identify a protocol analyzer type.
|
||||
*
|
||||
* Each analyzer type gets a tag consisting of a main type and subtype. The
|
||||
* former is an identifier that's unique all analyzer classes. The latter is
|
||||
* passed through to the analyzer instances for their use, yet not further
|
||||
* interpreted by the analyzer infrastructure; it allows an analyzer to
|
||||
* branch out into a set of sub-analyzers internally. Jointly, main type and
|
||||
* subtype form an analyzer "tag". Each unique tag corresponds to a single
|
||||
* "analyzer" from the user's perspective. At the script layer, these tags
|
||||
* are mapped into enums of type \c Analyzer::Tag. Internally, the
|
||||
* analyzer::Manager maintains the mapping of tag to analyzer (and it also
|
||||
* assigns them their main types), and analyzer::Component creates new
|
||||
* tags.
|
||||
*
|
||||
* The Tag class supports all operations necessary to act as an index in a
|
||||
* \c std::map.
|
||||
* The script-layer analogue is Analyzer::Tag.
|
||||
*/
|
||||
class Tag {
|
||||
class Tag : public ::Tag {
|
||||
public:
|
||||
/**
|
||||
* Type for the analyzer's main type.
|
||||
*/
|
||||
typedef uint32 type_t;
|
||||
|
||||
/**
|
||||
* Type for the analyzer's subtype.
|
||||
*/
|
||||
typedef uint32 subtype_t;
|
||||
|
||||
/*
|
||||
* Copy constructor.
|
||||
*/
|
||||
Tag(const Tag& other);
|
||||
Tag(const Tag& other) : ::Tag(other) {}
|
||||
|
||||
/**
|
||||
* Default constructor. This initializes the tag with an error value
|
||||
* that will make \c operator \c bool return false.
|
||||
*/
|
||||
Tag();
|
||||
Tag() : ::Tag() {}
|
||||
|
||||
/**
|
||||
* Destructor.
|
||||
*/
|
||||
~Tag();
|
||||
|
||||
/**
|
||||
* Returns the tag's main type.
|
||||
*/
|
||||
type_t Type() const { return type; }
|
||||
|
||||
/**
|
||||
* Returns the tag's subtype.
|
||||
*/
|
||||
subtype_t Subtype() const { return subtype; }
|
||||
|
||||
/**
|
||||
* Returns the \c Analyzer::Tag enum that corresponds to this tag.
|
||||
* The returned value is \a does not have its ref-count increased.
|
||||
*/
|
||||
EnumVal* AsEnumVal() const;
|
||||
|
||||
/**
|
||||
* Returns the numerical values for main and subtype inside a string
|
||||
* suitable for printing. This is primarily for debugging.
|
||||
*/
|
||||
std::string AsString() const;
|
||||
~Tag() {}
|
||||
|
||||
/**
|
||||
* Returns false if the tag represents an error value rather than a
|
||||
* legal analyzer type.
|
||||
* TODO: make this conversion operator "explicit" (C++11) or use a
|
||||
* "safe bool" idiom (not necessary if "explicit" is available),
|
||||
* otherwise this may allow nonsense/undesired comparison operations.
|
||||
*/
|
||||
operator bool() const { return *this != Tag(); }
|
||||
|
||||
|
@ -102,7 +58,7 @@ public:
|
|||
*/
|
||||
bool operator==(const Tag& other) const
|
||||
{
|
||||
return type == other.type && subtype == other.subtype;
|
||||
return ::Tag::operator==(other);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -110,7 +66,7 @@ public:
|
|||
*/
|
||||
bool operator!=(const Tag& other) const
|
||||
{
|
||||
return type != other.type || subtype != other.subtype;
|
||||
return ::Tag::operator!=(other);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -118,23 +74,30 @@ public:
|
|||
*/
|
||||
bool operator<(const Tag& other) const
|
||||
{
|
||||
return type != other.type ? type < other.type : (subtype < other.subtype);
|
||||
return ::Tag::operator<(other);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the \c Analyzer::Tag enum that corresponds to this tag.
|
||||
* The returned value does not have its ref-count increased.
|
||||
*
|
||||
* @param etype the script-layer enum type associated with the tag.
|
||||
*/
|
||||
EnumVal* AsEnumVal() const;
|
||||
|
||||
static Tag Error;
|
||||
|
||||
protected:
|
||||
friend class analyzer::Manager;
|
||||
friend class analyzer::Component;
|
||||
friend class file_analysis::Manager;
|
||||
friend class file_analysis::Component;
|
||||
friend class plugin::ComponentManager<Tag, Component>;
|
||||
friend class plugin::TaggedComponent<Tag>;
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
*
|
||||
* @param type The main type. Note that the \a analyzer::Manager
|
||||
* manages the value space internally, so noone else should assign
|
||||
* any main tyoes.
|
||||
* any main types.
|
||||
*
|
||||
* @param subtype The sub type, which is left to an analyzer for
|
||||
* interpretation. By default it's set to zero.
|
||||
|
@ -144,14 +107,9 @@ protected:
|
|||
/**
|
||||
* Constructor.
|
||||
*
|
||||
* @param val An enuam value of script type \c Analyzer::Tag.
|
||||
* @param val An enum value of script type \c Analyzer::Tag.
|
||||
*/
|
||||
Tag(EnumVal* val);
|
||||
|
||||
private:
|
||||
type_t type; // Main type.
|
||||
subtype_t subtype; // Subtype.
|
||||
mutable EnumVal* val; // Analyzer::Tag value.
|
||||
Tag(EnumVal* val) : ::Tag(val) {}
|
||||
};
|
||||
|
||||
}
|
||||
|
|
|
@ -41,11 +41,11 @@ function Analyzer::__schedule_analyzer%(orig: addr, resp: addr, resp_p: port,
|
|||
|
||||
function __name%(atype: Analyzer::Tag%) : string
|
||||
%{
|
||||
return new StringVal(analyzer_mgr->GetAnalyzerName(atype));
|
||||
return new StringVal(analyzer_mgr->GetComponentName(atype));
|
||||
%}
|
||||
|
||||
function __tag%(name: string%) : Analyzer::Tag
|
||||
%{
|
||||
analyzer::Tag t = analyzer_mgr->GetAnalyzerTag(name->CheckString());
|
||||
analyzer::Tag t = analyzer_mgr->GetComponentTag(name->CheckString());
|
||||
return t.AsEnumVal()->Ref();
|
||||
%}
|
||||
|
|
11
src/file_analysis/Analyzer.cc
Normal file
11
src/file_analysis/Analyzer.cc
Normal file
|
@ -0,0 +1,11 @@
|
|||
// See the file "COPYING" in the main distribution directory for copyright.
|
||||
|
||||
#include "Analyzer.h"
|
||||
#include "Manager.h"
|
||||
|
||||
file_analysis::Analyzer::~Analyzer()
|
||||
{
|
||||
DBG_LOG(DBG_FILE_ANALYSIS, "Destroy file analyzer %s",
|
||||
file_mgr->GetComponentName(tag));
|
||||
Unref(args);
|
||||
}
|
|
@ -5,14 +5,12 @@
|
|||
|
||||
#include "Val.h"
|
||||
#include "NetVar.h"
|
||||
#include "analyzer/Tag.h"
|
||||
#include "Tag.h"
|
||||
|
||||
#include "file_analysis/file_analysis.bif.h"
|
||||
|
||||
namespace file_analysis {
|
||||
|
||||
typedef int FA_Tag;
|
||||
|
||||
class File;
|
||||
|
||||
/**
|
||||
|
@ -25,11 +23,7 @@ public:
|
|||
* Destructor. Nothing special about it. Virtual since we definitely expect
|
||||
* to delete instances of derived classes via pointers to this class.
|
||||
*/
|
||||
virtual ~Analyzer()
|
||||
{
|
||||
DBG_LOG(DBG_FILE_ANALYSIS, "Destroy file analyzer %d", tag);
|
||||
Unref(args);
|
||||
}
|
||||
virtual ~Analyzer();
|
||||
|
||||
/**
|
||||
* Subclasses may override this metod to receive file data non-sequentially.
|
||||
|
@ -76,7 +70,7 @@ public:
|
|||
/**
|
||||
* @return the analyzer type enum value.
|
||||
*/
|
||||
FA_Tag Tag() const { return tag; }
|
||||
file_analysis::Tag Tag() const { return tag; }
|
||||
|
||||
/**
|
||||
* @return the AnalyzerArgs associated with the analyzer.
|
||||
|
@ -88,18 +82,6 @@ public:
|
|||
*/
|
||||
File* GetFile() const { return file; }
|
||||
|
||||
/**
|
||||
* Retrieves an analyzer tag field from full analyzer argument record.
|
||||
* @param args an \c AnalyzerArgs (script-layer type) value.
|
||||
* @return the analyzer tag equivalent of the 'tag' field from the
|
||||
* \c AnalyzerArgs value \a args.
|
||||
*/
|
||||
static FA_Tag ArgsTag(const RecordVal* args)
|
||||
{
|
||||
using BifType::Record::Files::AnalyzerArgs;
|
||||
return args->Lookup(AnalyzerArgs->FieldOffset("tag"))->AsEnum();
|
||||
}
|
||||
|
||||
protected:
|
||||
|
||||
/**
|
||||
|
@ -108,15 +90,15 @@ protected:
|
|||
* tunable options, if any, related to a particular analyzer type.
|
||||
* @param arg_file the file to which the the analyzer is being attached.
|
||||
*/
|
||||
Analyzer(RecordVal* arg_args, File* arg_file)
|
||||
: tag(file_analysis::Analyzer::ArgsTag(arg_args)),
|
||||
Analyzer(file_analysis::Tag arg_tag, RecordVal* arg_args, File* arg_file)
|
||||
: tag(arg_tag),
|
||||
args(arg_args->Ref()->AsRecordVal()),
|
||||
file(arg_file)
|
||||
{}
|
||||
|
||||
private:
|
||||
|
||||
FA_Tag tag; /**< The particular analyzer type of the analyzer instance. */
|
||||
file_analysis::Tag tag; /**< The particular type of the analyzer instance. */
|
||||
RecordVal* args; /**< \c AnalyzerArgs val gives tunable analyzer params. */
|
||||
File* file; /**< The file to which the analyzer is attached. */
|
||||
};
|
||||
|
|
|
@ -15,6 +15,7 @@ static void analyzer_del_func(void* v)
|
|||
AnalyzerSet::AnalyzerSet(File* arg_file) : file(arg_file)
|
||||
{
|
||||
TypeList* t = new TypeList();
|
||||
t->Append(file_mgr->GetTagEnumType()->Ref());
|
||||
t->Append(BifType::Record::Files::AnalyzerArgs->Ref());
|
||||
analyzer_hash = new CompositeHash(t);
|
||||
Unref(t);
|
||||
|
@ -34,20 +35,20 @@ AnalyzerSet::~AnalyzerSet()
|
|||
delete analyzer_hash;
|
||||
}
|
||||
|
||||
bool AnalyzerSet::Add(RecordVal* args)
|
||||
bool AnalyzerSet::Add(file_analysis::Tag tag, RecordVal* args)
|
||||
{
|
||||
HashKey* key = GetKey(args);
|
||||
HashKey* key = GetKey(tag, args);
|
||||
|
||||
if ( analyzer_map.Lookup(key) )
|
||||
{
|
||||
DBG_LOG(DBG_FILE_ANALYSIS, "Instantiate analyzer %d skipped for file id"
|
||||
" %s: already exists", file_analysis::Analyzer::ArgsTag(args),
|
||||
DBG_LOG(DBG_FILE_ANALYSIS, "Instantiate analyzer %s skipped for file id"
|
||||
" %s: already exists", file_mgr->GetComponentName(tag),
|
||||
file->GetID().c_str());
|
||||
delete key;
|
||||
return true;
|
||||
}
|
||||
|
||||
file_analysis::Analyzer* a = InstantiateAnalyzer(args);
|
||||
file_analysis::Analyzer* a = InstantiateAnalyzer(tag, args);
|
||||
|
||||
if ( ! a )
|
||||
{
|
||||
|
@ -60,10 +61,10 @@ bool AnalyzerSet::Add(RecordVal* args)
|
|||
return true;
|
||||
}
|
||||
|
||||
bool AnalyzerSet::QueueAdd(RecordVal* args)
|
||||
bool AnalyzerSet::QueueAdd(file_analysis::Tag tag, RecordVal* args)
|
||||
{
|
||||
HashKey* key = GetKey(args);
|
||||
file_analysis::Analyzer* a = InstantiateAnalyzer(args);
|
||||
HashKey* key = GetKey(tag, args);
|
||||
file_analysis::Analyzer* a = InstantiateAnalyzer(tag, args);
|
||||
|
||||
if ( ! a )
|
||||
{
|
||||
|
@ -80,8 +81,9 @@ bool AnalyzerSet::AddMod::Perform(AnalyzerSet* set)
|
|||
{
|
||||
if ( set->analyzer_map.Lookup(key) )
|
||||
{
|
||||
DBG_LOG(DBG_FILE_ANALYSIS, "Add analyzer %d skipped for file id"
|
||||
" %s: already exists", a->Tag(), a->GetFile()->GetID().c_str());
|
||||
DBG_LOG(DBG_FILE_ANALYSIS, "Add analyzer %s skipped for file id"
|
||||
" %s: already exists", file_mgr->GetComponentName(a->Tag()),
|
||||
a->GetFile()->GetID().c_str());
|
||||
|
||||
Abort();
|
||||
return true;
|
||||
|
@ -91,12 +93,12 @@ bool AnalyzerSet::AddMod::Perform(AnalyzerSet* set)
|
|||
return true;
|
||||
}
|
||||
|
||||
bool AnalyzerSet::Remove(const RecordVal* args)
|
||||
bool AnalyzerSet::Remove(file_analysis::Tag tag, RecordVal* args)
|
||||
{
|
||||
return Remove(file_analysis::Analyzer::ArgsTag(args), GetKey(args));
|
||||
return Remove(tag, GetKey(tag, args));
|
||||
}
|
||||
|
||||
bool AnalyzerSet::Remove(FA_Tag tag, HashKey* key)
|
||||
bool AnalyzerSet::Remove(file_analysis::Tag tag, HashKey* key)
|
||||
{
|
||||
file_analysis::Analyzer* a =
|
||||
(file_analysis::Analyzer*) analyzer_map.Remove(key);
|
||||
|
@ -105,22 +107,22 @@ bool AnalyzerSet::Remove(FA_Tag tag, HashKey* key)
|
|||
|
||||
if ( ! a )
|
||||
{
|
||||
DBG_LOG(DBG_FILE_ANALYSIS, "Skip remove analyzer %d for file id %s",
|
||||
tag, file->GetID().c_str());
|
||||
DBG_LOG(DBG_FILE_ANALYSIS, "Skip remove analyzer %s for file id %s",
|
||||
file_mgr->GetComponentName(tag), file->GetID().c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
DBG_LOG(DBG_FILE_ANALYSIS, "Remove analyzer %d for file id %s", a->Tag(),
|
||||
DBG_LOG(DBG_FILE_ANALYSIS, "Remove analyzer %s for file id %s",
|
||||
file_mgr->GetComponentName(tag),
|
||||
file->GetID().c_str());
|
||||
|
||||
delete a;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AnalyzerSet::QueueRemove(const RecordVal* args)
|
||||
bool AnalyzerSet::QueueRemove(file_analysis::Tag tag, RecordVal* args)
|
||||
{
|
||||
HashKey* key = GetKey(args);
|
||||
FA_Tag tag = file_analysis::Analyzer::ArgsTag(args);
|
||||
HashKey* key = GetKey(tag, args);
|
||||
|
||||
mod_queue.push(new RemoveMod(tag, key));
|
||||
|
||||
|
@ -132,24 +134,28 @@ bool AnalyzerSet::RemoveMod::Perform(AnalyzerSet* set)
|
|||
return set->Remove(tag, key);
|
||||
}
|
||||
|
||||
HashKey* AnalyzerSet::GetKey(const RecordVal* args) const
|
||||
HashKey* AnalyzerSet::GetKey(file_analysis::Tag t, RecordVal* args) const
|
||||
{
|
||||
HashKey* key = analyzer_hash->ComputeHash(args, 1);
|
||||
ListVal* lv = new ListVal(TYPE_ANY);
|
||||
lv->Append(t.AsEnumVal()->Ref());
|
||||
lv->Append(args->Ref());
|
||||
HashKey* key = analyzer_hash->ComputeHash(lv, 1);
|
||||
Unref(lv);
|
||||
if ( ! key )
|
||||
reporter->InternalError("AnalyzerArgs type mismatch");
|
||||
|
||||
return key;
|
||||
}
|
||||
|
||||
file_analysis::Analyzer* AnalyzerSet::InstantiateAnalyzer(RecordVal* args) const
|
||||
file_analysis::Analyzer* AnalyzerSet::InstantiateAnalyzer(Tag tag,
|
||||
RecordVal* args) const
|
||||
{
|
||||
FA_Tag tag = file_analysis::Analyzer::ArgsTag(args);
|
||||
file_analysis::Analyzer* a = file_mgr->InstantiateAnalyzer(tag, args, file);
|
||||
|
||||
if ( ! a )
|
||||
{
|
||||
reporter->Error("Failed file analyzer %s instantiation for file id %s",
|
||||
file_mgr->GetAnalyzerName(tag), file->GetID().c_str());
|
||||
file_mgr->GetComponentName(tag), file->GetID().c_str());
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -158,8 +164,8 @@ file_analysis::Analyzer* AnalyzerSet::InstantiateAnalyzer(RecordVal* args) const
|
|||
|
||||
void AnalyzerSet::Insert(file_analysis::Analyzer* a, HashKey* key)
|
||||
{
|
||||
DBG_LOG(DBG_FILE_ANALYSIS, "Add analyzer %d for file id %s", a->Tag(),
|
||||
file->GetID().c_str());
|
||||
DBG_LOG(DBG_FILE_ANALYSIS, "Add analyzer %s for file id %s",
|
||||
file_mgr->GetComponentName(a->Tag()), file->GetID().c_str());
|
||||
analyzer_map.Insert(key, a);
|
||||
delete key;
|
||||
}
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
#include "Dict.h"
|
||||
#include "CompHash.h"
|
||||
#include "Val.h"
|
||||
#include "Tag.h"
|
||||
|
||||
namespace file_analysis {
|
||||
|
||||
|
@ -38,31 +39,35 @@ public:
|
|||
|
||||
/**
|
||||
* Attach an analyzer to #file immediately.
|
||||
* @param tag the analyzer tag of the file analyzer to add.
|
||||
* @param args an \c AnalyzerArgs value which specifies an analyzer.
|
||||
* @return true if analyzer was instantiated/attached, else false.
|
||||
*/
|
||||
bool Add(RecordVal* args);
|
||||
bool Add(file_analysis::Tag tag, RecordVal* args);
|
||||
|
||||
/**
|
||||
* Queue the attachment of an analyzer to #file.
|
||||
* @param tag the analyzer tag of the file analyzer to add.
|
||||
* @param args an \c AnalyzerArgs value which specifies an analyzer.
|
||||
* @return true if analyzer was able to be instantiated, else false.
|
||||
*/
|
||||
bool QueueAdd(RecordVal* args);
|
||||
bool QueueAdd(file_analysis::Tag tag, RecordVal* args);
|
||||
|
||||
/**
|
||||
* Remove an analyzer from #file immediately.
|
||||
* @param tag the analyzer tag of the file analyzer to remove.
|
||||
* @param args an \c AnalyzerArgs value which specifies an analyzer.
|
||||
* @return false if analyzer didn't exist and so wasn't removed, else true.
|
||||
*/
|
||||
bool Remove(const RecordVal* args);
|
||||
bool Remove(file_analysis::Tag tag, RecordVal* args);
|
||||
|
||||
/**
|
||||
* Queue the removal of an analyzer from #file.
|
||||
* @param tag the analyzer tag of the file analyzer to remove.
|
||||
* @param args an \c AnalyzerArgs value which specifies an analyzer.
|
||||
* @return true if analyzer exists at time of call, else false;
|
||||
*/
|
||||
bool QueueRemove(const RecordVal* args);
|
||||
bool QueueRemove(file_analysis::Tag tag, RecordVal* args);
|
||||
|
||||
/**
|
||||
* Perform all queued modifications to the current analyzer set.
|
||||
|
@ -91,17 +96,20 @@ protected:
|
|||
|
||||
/**
|
||||
* Get a hash key which represents an analyzer instance.
|
||||
* @param tag the file analyzer tag.
|
||||
* @param args an \c AnalyzerArgs value which specifies an analyzer.
|
||||
* @return the hash key calculated from \a args
|
||||
*/
|
||||
HashKey* GetKey(const RecordVal* args) const;
|
||||
HashKey* GetKey(file_analysis::Tag tag, RecordVal* args) const;
|
||||
|
||||
/**
|
||||
* Create an instance of a file analyzer.
|
||||
* @param tag the tag of a file analyzer.
|
||||
* @param args an \c AnalyzerArgs value which specifies an analyzer.
|
||||
* @return a new file analyzer instance.
|
||||
*/
|
||||
file_analysis::Analyzer* InstantiateAnalyzer(RecordVal* args) const;
|
||||
file_analysis::Analyzer* InstantiateAnalyzer(file_analysis::Tag tag,
|
||||
RecordVal* args) const;
|
||||
|
||||
/**
|
||||
* Insert an analyzer instance in to the set.
|
||||
|
@ -116,7 +124,7 @@ protected:
|
|||
* just used for debugging messages.
|
||||
* @param key the hash key which represents the analyzer's \c AnalyzerArgs.
|
||||
*/
|
||||
bool Remove(FA_Tag tag, HashKey* key);
|
||||
bool Remove(file_analysis::Tag tag, HashKey* key);
|
||||
|
||||
private:
|
||||
|
||||
|
@ -175,14 +183,14 @@ private:
|
|||
* @param arg_a an analyzer instance to add to an analyzer set.
|
||||
* @param arg_key hash key representing the analyzer's \c AnalyzerArgs.
|
||||
*/
|
||||
RemoveMod(FA_Tag arg_tag, HashKey* arg_key)
|
||||
RemoveMod(file_analysis::Tag arg_tag, HashKey* arg_key)
|
||||
: Modification(), tag(arg_tag), key(arg_key) {}
|
||||
virtual ~RemoveMod() {}
|
||||
virtual bool Perform(AnalyzerSet* set);
|
||||
virtual void Abort() { delete key; }
|
||||
|
||||
protected:
|
||||
FA_Tag tag;
|
||||
file_analysis::Tag tag;
|
||||
HashKey* key;
|
||||
};
|
||||
|
||||
|
|
|
@ -11,9 +11,10 @@ set(file_analysis_SRCS
|
|||
Manager.cc
|
||||
File.cc
|
||||
FileTimer.cc
|
||||
Analyzer.h
|
||||
Analyzer.cc
|
||||
AnalyzerSet.cc
|
||||
Component.cc
|
||||
Tag.cc
|
||||
)
|
||||
|
||||
bif_target(file_analysis.bif)
|
||||
|
|
|
@ -8,26 +8,22 @@
|
|||
|
||||
using namespace file_analysis;
|
||||
|
||||
analyzer::Tag::type_t Component::type_counter = 0;
|
||||
|
||||
Component::Component(const char* arg_name, factory_callback arg_factory,
|
||||
analyzer::Tag::subtype_t arg_subtype)
|
||||
: plugin::Component(plugin::component::FILE_ANALYZER)
|
||||
Component::Component(const char* arg_name, factory_callback arg_factory)
|
||||
: plugin::Component(plugin::component::FILE_ANALYZER),
|
||||
plugin::TaggedComponent<file_analysis::Tag>()
|
||||
{
|
||||
name = copy_string(arg_name);
|
||||
canon_name = canonify_name(arg_name);
|
||||
factory = arg_factory;
|
||||
|
||||
tag = analyzer::Tag(++type_counter, arg_subtype);
|
||||
}
|
||||
|
||||
Component::Component(const Component& other)
|
||||
: plugin::Component(Type())
|
||||
: plugin::Component(Type()),
|
||||
plugin::TaggedComponent<file_analysis::Tag>(other)
|
||||
{
|
||||
name = copy_string(other.name);
|
||||
canon_name = copy_string(other.canon_name);
|
||||
factory = other.factory;
|
||||
tag = other.tag;
|
||||
}
|
||||
|
||||
Component::~Component()
|
||||
|
@ -36,11 +32,6 @@ Component::~Component()
|
|||
delete [] canon_name;
|
||||
}
|
||||
|
||||
analyzer::Tag Component::Tag() const
|
||||
{
|
||||
return tag;
|
||||
}
|
||||
|
||||
void Component::Describe(ODesc* d) const
|
||||
{
|
||||
plugin::Component::Describe(d);
|
||||
|
@ -58,11 +49,12 @@ void Component::Describe(ODesc* d) const
|
|||
|
||||
Component& Component::operator=(const Component& other)
|
||||
{
|
||||
plugin::TaggedComponent<file_analysis::Tag>::operator=(other);
|
||||
|
||||
if ( &other != this )
|
||||
{
|
||||
name = copy_string(other.name);
|
||||
factory = other.factory;
|
||||
tag = other.tag;
|
||||
}
|
||||
|
||||
return *this;
|
||||
|
|
|
@ -3,8 +3,9 @@
|
|||
#ifndef FILE_ANALYZER_PLUGIN_COMPONENT_H
|
||||
#define FILE_ANALYZER_PLUGIN_COMPONENT_H
|
||||
|
||||
#include "analyzer/Tag.h"
|
||||
#include "Tag.h"
|
||||
#include "plugin/Component.h"
|
||||
#include "plugin/TaggedComponent.h"
|
||||
|
||||
#include "Val.h"
|
||||
|
||||
|
@ -22,7 +23,8 @@ class Analyzer;
|
|||
* A plugin can provide a specific file analyzer by registering this
|
||||
* analyzer component, describing the analyzer.
|
||||
*/
|
||||
class Component : public plugin::Component {
|
||||
class Component : public plugin::Component,
|
||||
public plugin::TaggedComponent<file_analysis::Tag> {
|
||||
public:
|
||||
typedef Analyzer* (*factory_callback)(RecordVal* args, File* file);
|
||||
|
||||
|
@ -38,15 +40,8 @@ public:
|
|||
* from file_analysis::Analyzer. This is typically a static \c
|
||||
* Instatiate() method inside the class that just allocates and
|
||||
* returns a new instance.
|
||||
*
|
||||
* @param subtype A subtype associated with this component that
|
||||
* further distinguishes it. The subtype will be integrated into
|
||||
* the analyzer::Tag that the manager associates with this analyzer,
|
||||
* and analyzer instances can accordingly access it via analyzer::Tag().
|
||||
* If not used, leave at zero.
|
||||
*/
|
||||
Component(const char* name, factory_callback factory,
|
||||
analyzer::Tag::subtype_t subtype = 0);
|
||||
Component(const char* name, factory_callback factory);
|
||||
|
||||
/**
|
||||
* Copy constructor.
|
||||
|
@ -79,13 +74,6 @@ public:
|
|||
*/
|
||||
factory_callback Factory() const { return factory; }
|
||||
|
||||
/**
|
||||
* Returns the analyzer's tag. Note that this is automatically
|
||||
* generated for each new Components, and hence unique across all of
|
||||
* them.
|
||||
*/
|
||||
analyzer::Tag Tag() const;
|
||||
|
||||
/**
|
||||
* Generates a human-readable description of the component's main
|
||||
* parameters. This goes into the output of \c "bro -NN".
|
||||
|
@ -98,10 +86,6 @@ private:
|
|||
const char* name; // The analyzer's name.
|
||||
const char* canon_name; // The analyzer's canonical name.
|
||||
factory_callback factory; // The analyzer's factory callback.
|
||||
analyzer::Tag tag; // The automatically assigned analyzer tag.
|
||||
|
||||
// Global counter used to generate unique tags.
|
||||
static analyzer::Tag::type_t type_counter;
|
||||
};
|
||||
|
||||
}
|
||||
|
|
|
@ -88,7 +88,7 @@ File::File(const string& file_id, Connection* conn, analyzer::Tag tag,
|
|||
if ( conn )
|
||||
{
|
||||
// add source, connection, is_orig fields
|
||||
SetSource(analyzer_mgr->GetAnalyzerName(tag));
|
||||
SetSource(analyzer_mgr->GetComponentName(tag));
|
||||
val->Assign(is_orig_idx, new Val(is_orig, TYPE_BOOL));
|
||||
UpdateConnectionFields(conn, is_orig);
|
||||
}
|
||||
|
@ -231,14 +231,14 @@ void File::ScheduleInactivityTimer() const
|
|||
timer_mgr->Add(new FileTimer(network_time, id, GetTimeoutInterval()));
|
||||
}
|
||||
|
||||
bool File::AddAnalyzer(RecordVal* args)
|
||||
bool File::AddAnalyzer(file_analysis::Tag tag, RecordVal* args)
|
||||
{
|
||||
return done ? false : analyzers.QueueAdd(args);
|
||||
return done ? false : analyzers.QueueAdd(tag, args);
|
||||
}
|
||||
|
||||
bool File::RemoveAnalyzer(const RecordVal* args)
|
||||
bool File::RemoveAnalyzer(file_analysis::Tag tag, RecordVal* args)
|
||||
{
|
||||
return done ? false : analyzers.QueueRemove(args);
|
||||
return done ? false : analyzers.QueueRemove(tag, args);
|
||||
}
|
||||
|
||||
bool File::BufferBOF(const u_char* data, uint64 len)
|
||||
|
@ -321,7 +321,7 @@ void File::DataIn(const u_char* data, uint64 len, uint64 offset)
|
|||
while ( (a = analyzers.NextEntry(c)) )
|
||||
{
|
||||
if ( ! a->DeliverChunk(data, len, offset) )
|
||||
analyzers.QueueRemove(a->Args());
|
||||
analyzers.QueueRemove(a->Tag(), a->Args());
|
||||
}
|
||||
|
||||
analyzers.DrainModifications();
|
||||
|
@ -356,7 +356,7 @@ void File::DataIn(const u_char* data, uint64 len)
|
|||
{
|
||||
if ( ! a->DeliverStream(data, len) )
|
||||
{
|
||||
analyzers.QueueRemove(a->Args());
|
||||
analyzers.QueueRemove(a->Tag(), a->Args());
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -364,7 +364,7 @@ void File::DataIn(const u_char* data, uint64 len)
|
|||
LookupFieldDefaultCount(missing_bytes_idx);
|
||||
|
||||
if ( ! a->DeliverChunk(data, len, offset) )
|
||||
analyzers.QueueRemove(a->Args());
|
||||
analyzers.QueueRemove(a->Tag(), a->Args());
|
||||
}
|
||||
|
||||
analyzers.DrainModifications();
|
||||
|
@ -389,7 +389,7 @@ void File::EndOfFile()
|
|||
while ( (a = analyzers.NextEntry(c)) )
|
||||
{
|
||||
if ( ! a->EndOfFile() )
|
||||
analyzers.QueueRemove(a->Args());
|
||||
analyzers.QueueRemove(a->Tag(), a->Args());
|
||||
}
|
||||
|
||||
FileEvent(file_state_remove);
|
||||
|
@ -411,7 +411,7 @@ void File::Gap(uint64 offset, uint64 len)
|
|||
while ( (a = analyzers.NextEntry(c)) )
|
||||
{
|
||||
if ( ! a->Undelivered(offset, len) )
|
||||
analyzers.QueueRemove(a->Args());
|
||||
analyzers.QueueRemove(a->Tag(), a->Args());
|
||||
}
|
||||
|
||||
if ( FileEventAvailable(file_gap) )
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
|
||||
#include "Conn.h"
|
||||
#include "Val.h"
|
||||
#include "Tag.h"
|
||||
#include "AnalyzerSet.h"
|
||||
#include "BroString.h"
|
||||
|
||||
|
@ -94,17 +95,19 @@ public:
|
|||
/**
|
||||
* Queues attaching an analyzer. Only one analyzer per type can be attached
|
||||
* at a time unless the arguments differ.
|
||||
* @param tag the analyzer tag of the file analyzer to add.
|
||||
* @param args an \c AnalyzerArgs value representing a file analyzer.
|
||||
* @return false if analyzer can't be instantiated, else true.
|
||||
*/
|
||||
bool AddAnalyzer(RecordVal* args);
|
||||
bool AddAnalyzer(file_analysis::Tag tag, RecordVal* args);
|
||||
|
||||
/**
|
||||
* Queues removal of an analyzer.
|
||||
* @param tag the analyzer tag of the file analyzer to remove.
|
||||
* @param args an \c AnalyzerArgs value representing a file analyzer.
|
||||
* @return true if analyzer was active at time of call, else false.
|
||||
*/
|
||||
bool RemoveAnalyzer(const RecordVal* args);
|
||||
bool RemoveAnalyzer(file_analysis::Tag tag, RecordVal* args);
|
||||
|
||||
/**
|
||||
* Pass in non-sequential data and deliver to attached analyzers.
|
||||
|
|
|
@ -14,7 +14,7 @@ FileTimer::FileTimer(double t, const string& id, double interval)
|
|||
|
||||
void FileTimer::Dispatch(double t, int is_expire)
|
||||
{
|
||||
File* file = file_mgr->Lookup(file_id);
|
||||
File* file = file_mgr->LookupFile(file_id);
|
||||
|
||||
if ( ! file )
|
||||
return;
|
||||
|
|
|
@ -18,10 +18,9 @@ TableVal* Manager::disabled = 0;
|
|||
string Manager::salt;
|
||||
|
||||
Manager::Manager()
|
||||
: plugin::ComponentManager<file_analysis::Tag,
|
||||
file_analysis::Component>("Files")
|
||||
{
|
||||
tag_enum_type = new EnumType("Files::Tag");
|
||||
::ID* id = install_ID("Tag", "Files", true, true);
|
||||
add_type(id, tag_enum_type, 0, 0);
|
||||
}
|
||||
|
||||
Manager::~Manager()
|
||||
|
@ -35,27 +34,7 @@ void Manager::InitPreScript()
|
|||
|
||||
for ( std::list<Component*>::const_iterator i = analyzers.begin();
|
||||
i != analyzers.end(); ++i )
|
||||
RegisterAnalyzerComponent(*i);
|
||||
}
|
||||
|
||||
void Manager::RegisterAnalyzerComponent(Component* component)
|
||||
{
|
||||
const char* cname = component->CanonicalName();
|
||||
|
||||
if ( tag_enum_type->Lookup("Files", cname) != -1 )
|
||||
reporter->FatalError("File Analyzer %s defined more than once", cname);
|
||||
|
||||
DBG_LOG(DBG_FILE_ANALYSIS, "Registering analyzer %s (tag %s)",
|
||||
component->Name(), component->Tag().AsString().c_str());
|
||||
|
||||
analyzers_by_name.insert(std::make_pair(cname, component));
|
||||
analyzers_by_tag.insert(std::make_pair(component->Tag(), component));
|
||||
analyzers_by_val.insert(std::make_pair(
|
||||
component->Tag().AsEnumVal()->InternalInt(), component));
|
||||
|
||||
string id = fmt("ANALYZER_%s", cname);
|
||||
tag_enum_type->AddName("Files", id.c_str(),
|
||||
component->Tag().AsEnumVal()->InternalInt(), true);
|
||||
RegisterComponent(*i, "ANALYZER_");
|
||||
}
|
||||
|
||||
void Manager::InitPostScript()
|
||||
|
@ -193,7 +172,7 @@ void Manager::SetSize(uint64 size, analyzer::Tag tag, Connection* conn,
|
|||
|
||||
bool Manager::SetTimeoutInterval(const string& file_id, double interval) const
|
||||
{
|
||||
File* file = Lookup(file_id);
|
||||
File* file = LookupFile(file_id);
|
||||
|
||||
if ( ! file )
|
||||
return false;
|
||||
|
@ -205,24 +184,26 @@ bool Manager::SetTimeoutInterval(const string& file_id, double interval) const
|
|||
return true;
|
||||
}
|
||||
|
||||
bool Manager::AddAnalyzer(const string& file_id, RecordVal* args) const
|
||||
bool Manager::AddAnalyzer(const string& file_id, file_analysis::Tag tag,
|
||||
RecordVal* args) const
|
||||
{
|
||||
File* file = Lookup(file_id);
|
||||
File* file = LookupFile(file_id);
|
||||
|
||||
if ( ! file )
|
||||
return false;
|
||||
|
||||
return file->AddAnalyzer(args);
|
||||
return file->AddAnalyzer(tag, args);
|
||||
}
|
||||
|
||||
bool Manager::RemoveAnalyzer(const string& file_id, const RecordVal* args) const
|
||||
bool Manager::RemoveAnalyzer(const string& file_id, file_analysis::Tag tag,
|
||||
RecordVal* args) const
|
||||
{
|
||||
File* file = Lookup(file_id);
|
||||
File* file = LookupFile(file_id);
|
||||
|
||||
if ( ! file )
|
||||
return false;
|
||||
|
||||
return file->RemoveAnalyzer(args);
|
||||
return file->RemoveAnalyzer(tag, args);
|
||||
}
|
||||
|
||||
File* Manager::GetFile(const string& file_id, Connection* conn,
|
||||
|
@ -255,7 +236,7 @@ File* Manager::GetFile(const string& file_id, Connection* conn,
|
|||
return rval;
|
||||
}
|
||||
|
||||
File* Manager::Lookup(const string& file_id) const
|
||||
File* Manager::LookupFile(const string& file_id) const
|
||||
{
|
||||
IDMap::const_iterator it = id_map.find(file_id);
|
||||
|
||||
|
@ -267,7 +248,7 @@ File* Manager::Lookup(const string& file_id) const
|
|||
|
||||
void Manager::Timeout(const string& file_id, bool is_terminating)
|
||||
{
|
||||
File* file = Lookup(file_id);
|
||||
File* file = LookupFile(file_id);
|
||||
|
||||
if ( ! file )
|
||||
return;
|
||||
|
@ -366,15 +347,13 @@ bool Manager::IsDisabled(analyzer::Tag tag)
|
|||
return rval;
|
||||
}
|
||||
|
||||
Analyzer* Manager::InstantiateAnalyzer(int tag, RecordVal* args, File* f) const
|
||||
Analyzer* Manager::InstantiateAnalyzer(Tag tag, RecordVal* args, File* f) const
|
||||
{
|
||||
analyzer_map_by_val::const_iterator it = analyzers_by_val.find(tag);
|
||||
Component* c = Lookup(tag);
|
||||
|
||||
if ( it == analyzers_by_val.end() )
|
||||
reporter->InternalError("cannot instantiate unknown file analyzer: %d",
|
||||
tag);
|
||||
|
||||
Component* c = it->second;
|
||||
if ( ! c )
|
||||
reporter->InternalError("cannot instantiate unknown file analyzer: %s",
|
||||
tag.AsString().c_str());
|
||||
|
||||
if ( ! c->Factory() )
|
||||
reporter->InternalError("file analyzer %s cannot be instantiated "
|
||||
|
@ -382,14 +361,3 @@ Analyzer* Manager::InstantiateAnalyzer(int tag, RecordVal* args, File* f) const
|
|||
|
||||
return c->Factory()(args, f);
|
||||
}
|
||||
|
||||
const char* Manager::GetAnalyzerName(int tag) const
|
||||
{
|
||||
analyzer_map_by_val::const_iterator it = analyzers_by_val.find(tag);
|
||||
|
||||
if ( it == analyzers_by_val.end() )
|
||||
reporter->InternalError("cannot get name of unknown file analyzer: %d",
|
||||
tag);
|
||||
|
||||
return it->second->CanonicalName();
|
||||
}
|
||||
|
|
|
@ -18,7 +18,8 @@
|
|||
#include "File.h"
|
||||
#include "FileTimer.h"
|
||||
#include "Component.h"
|
||||
|
||||
#include "Tag.h"
|
||||
#include "plugin/ComponentManager.h"
|
||||
#include "analyzer/Tag.h"
|
||||
|
||||
#include "file_analysis/file_analysis.bif.h"
|
||||
|
@ -28,7 +29,7 @@ namespace file_analysis {
|
|||
/**
|
||||
* Main entry point for interacting with file analysis.
|
||||
*/
|
||||
class Manager {
|
||||
class Manager : public plugin::ComponentManager<Tag, Component> {
|
||||
public:
|
||||
|
||||
/**
|
||||
|
@ -177,18 +178,22 @@ public:
|
|||
* analyzers of a given type can be attached per file identifier at a time
|
||||
* as long as the arguments differ.
|
||||
* @param file_id the file identifier/hash.
|
||||
* @param tag the analyzer tag of the file analyzer to add.
|
||||
* @param args a \c AnalyzerArgs value which describes a file analyzer.
|
||||
* @return false if the analyzer failed to be instantiated, else true.
|
||||
*/
|
||||
bool AddAnalyzer(const string& file_id, RecordVal* args) const;
|
||||
bool AddAnalyzer(const string& file_id, file_analysis::Tag tag,
|
||||
RecordVal* args) const;
|
||||
|
||||
/**
|
||||
* Queue removal of an analyzer for a given file identifier.
|
||||
* @param file_id the file identifier/hash.
|
||||
* @param tag the analyzer tag of the file analyzer to remove.
|
||||
* @param args a \c AnalyzerArgs value which describes a file analyzer.
|
||||
* @return true if the analyzer is active at the time of call, else false.
|
||||
*/
|
||||
bool RemoveAnalyzer(const string& file_id, const RecordVal* args) const;
|
||||
bool RemoveAnalyzer(const string& file_id, file_analysis::Tag tag,
|
||||
RecordVal* args) const;
|
||||
|
||||
/**
|
||||
* Tells whether analysis for a file is active or ignored.
|
||||
|
@ -204,15 +209,7 @@ public:
|
|||
* @param f The file analzer is to be associated with.
|
||||
* @return The new analyzer instance or null if tag is invalid.
|
||||
*/
|
||||
Analyzer* InstantiateAnalyzer(int tag, RecordVal* args, File* f) const;
|
||||
|
||||
/**
|
||||
* Translates a script-level file analyzer tag in to corresponding file
|
||||
* analyzer name.
|
||||
* @param tag The enum val of a file analyzer.
|
||||
* @return The human-readable name of the file analyzer.
|
||||
*/
|
||||
const char* GetAnalyzerName(int tag) const;
|
||||
Analyzer* InstantiateAnalyzer(Tag tag, RecordVal* args, File* f) const;
|
||||
|
||||
protected:
|
||||
friend class FileTimer;
|
||||
|
@ -247,7 +244,7 @@ protected:
|
|||
* @return the File object mapped to \a file_id, or a null pointer if no
|
||||
* mapping exists.
|
||||
*/
|
||||
File* Lookup(const string& file_id) const;
|
||||
File* LookupFile(const string& file_id) const;
|
||||
|
||||
/**
|
||||
* Evaluate timeout policy for a file and remove the File object mapped to
|
||||
|
@ -287,20 +284,10 @@ protected:
|
|||
static bool IsDisabled(analyzer::Tag tag);
|
||||
|
||||
private:
|
||||
typedef map<string, Component*> analyzer_map_by_name;
|
||||
typedef map<analyzer::Tag, Component*> analyzer_map_by_tag;
|
||||
typedef map<int, Component*> analyzer_map_by_val;
|
||||
|
||||
void RegisterAnalyzerComponent(Component* component);
|
||||
|
||||
IDMap id_map; /**< Map file ID to file_analysis::File records. */
|
||||
IDSet ignored; /**< Ignored files. Will be finally removed on EOF. */
|
||||
string current_file_id; /**< Hash of what get_file_handle event sets. */
|
||||
EnumType* tag_enum_type; /**< File analyzer tag type. */
|
||||
|
||||
analyzer_map_by_name analyzers_by_name;
|
||||
analyzer_map_by_tag analyzers_by_tag;
|
||||
analyzer_map_by_val analyzers_by_val;
|
||||
|
||||
static TableVal* disabled; /**< Table of disabled analyzers. */
|
||||
static string salt; /**< A salt added to file handles before hashing. */
|
||||
|
|
24
src/file_analysis/Tag.cc
Normal file
24
src/file_analysis/Tag.cc
Normal file
|
@ -0,0 +1,24 @@
|
|||
// See the file "COPYING" in the main distribution directory for copyright.
|
||||
|
||||
#include "Tag.h"
|
||||
#include "Manager.h"
|
||||
|
||||
using namespace file_analysis;
|
||||
|
||||
file_analysis::Tag file_analysis::Tag::Error;
|
||||
|
||||
file_analysis::Tag::Tag(type_t type, subtype_t subtype)
|
||||
: ::Tag(file_mgr->GetTagEnumType(), type, subtype)
|
||||
{
|
||||
}
|
||||
|
||||
file_analysis::Tag& file_analysis::Tag::operator=(const file_analysis::Tag& other)
|
||||
{
|
||||
::Tag::operator=(other);
|
||||
return *this;
|
||||
}
|
||||
|
||||
EnumVal* file_analysis::Tag::AsEnumVal() const
|
||||
{
|
||||
return ::Tag::AsEnumVal(file_mgr->GetTagEnumType());
|
||||
}
|
116
src/file_analysis/Tag.h
Normal file
116
src/file_analysis/Tag.h
Normal file
|
@ -0,0 +1,116 @@
|
|||
// See the file "COPYING" in the main distribution directory for copyright.
|
||||
|
||||
#ifndef FILE_ANALYZER_TAG_H
|
||||
#define FILE_ANALYZER_TAG_H
|
||||
|
||||
#include "config.h"
|
||||
#include "util.h"
|
||||
#include "../Tag.h"
|
||||
#include "plugin/TaggedComponent.h"
|
||||
#include "plugin/ComponentManager.h"
|
||||
|
||||
class EnumVal;
|
||||
|
||||
namespace file_analysis {
|
||||
|
||||
class Component;
|
||||
|
||||
/**
|
||||
* Class to identify a file analyzer type.
|
||||
*
|
||||
* The script-layer analogue is Files::Tag.
|
||||
*/
|
||||
class Tag : public ::Tag {
|
||||
public:
|
||||
/*
|
||||
* Copy constructor.
|
||||
*/
|
||||
Tag(const Tag& other) : ::Tag(other) {}
|
||||
|
||||
/**
|
||||
* Default constructor. This initializes the tag with an error value
|
||||
* that will make \c operator \c bool return false.
|
||||
*/
|
||||
Tag() : ::Tag() {}
|
||||
|
||||
/**
|
||||
* Destructor.
|
||||
*/
|
||||
~Tag() {}
|
||||
|
||||
/**
|
||||
* Returns false if the tag represents an error value rather than a
|
||||
* legal analyzer type.
|
||||
* TODO: make this conversion operator "explicit" (C++11) or use a
|
||||
* "safe bool" idiom (not necessary if "explicit" is available),
|
||||
* otherwise this may allow nonsense/undesired comparison operations.
|
||||
*
|
||||
*/
|
||||
operator bool() const { return *this != Tag(); }
|
||||
|
||||
/**
|
||||
* Assignment operator.
|
||||
*/
|
||||
Tag& operator=(const Tag& other);
|
||||
|
||||
/**
|
||||
* Compares two tags for equality.
|
||||
*/
|
||||
bool operator==(const Tag& other) const
|
||||
{
|
||||
return ::Tag::operator==(other);
|
||||
}
|
||||
|
||||
/**
|
||||
* Compares two tags for inequality.
|
||||
*/
|
||||
bool operator!=(const Tag& other) const
|
||||
{
|
||||
return ::Tag::operator!=(other);
|
||||
}
|
||||
|
||||
/**
|
||||
* Compares two tags for less-than relationship.
|
||||
*/
|
||||
bool operator<(const Tag& other) const
|
||||
{
|
||||
return ::Tag::operator<(other);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the \c Files::Tag enum that corresponds to this tag.
|
||||
* The returned value does not have its ref-count increased.
|
||||
*
|
||||
* @param etype the script-layer enum type associated with the tag.
|
||||
*/
|
||||
EnumVal* AsEnumVal() const;
|
||||
|
||||
static Tag Error;
|
||||
|
||||
protected:
|
||||
friend class plugin::ComponentManager<Tag, Component>;
|
||||
friend class plugin::TaggedComponent<Tag>;
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
*
|
||||
* @param type The main type. Note that the \a file_analysis::Manager
|
||||
* manages the value space internally, so noone else should assign
|
||||
* main types.
|
||||
*
|
||||
* @param subtype The sub type, which is left to an analyzer for
|
||||
* interpretation. By default it's set to zero.
|
||||
*/
|
||||
Tag(type_t type, subtype_t subtype = 0);
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
*
|
||||
* @param val An enum value of script type \c Files::Tag.
|
||||
*/
|
||||
Tag(EnumVal* val) : ::Tag(val) {}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
|
@ -4,5 +4,5 @@ include_directories(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}
|
|||
${CMAKE_CURRENT_BINARY_DIR})
|
||||
|
||||
bro_plugin_begin(Bro FileDataEvent)
|
||||
bro_plugin_cc(DataEvent.cc Plugin.cc)
|
||||
bro_plugin_cc(DataEvent.cc Plugin.cc ../../Analyzer.cc)
|
||||
bro_plugin_end()
|
||||
|
|
|
@ -6,12 +6,15 @@
|
|||
#include "EventRegistry.h"
|
||||
#include "Event.h"
|
||||
#include "util.h"
|
||||
#include "file_analysis/Manager.h"
|
||||
|
||||
using namespace file_analysis;
|
||||
|
||||
DataEvent::DataEvent(RecordVal* args, File* file,
|
||||
EventHandlerPtr ce, EventHandlerPtr se)
|
||||
: file_analysis::Analyzer(args, file), chunk_event(ce), stream_event(se)
|
||||
: file_analysis::Analyzer(file_mgr->GetComponentTag("DATA_EVENT"),
|
||||
args, file),
|
||||
chunk_event(ce), stream_event(se)
|
||||
{
|
||||
}
|
||||
|
||||
|
|
|
@ -4,5 +4,5 @@ include_directories(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}
|
|||
${CMAKE_CURRENT_BINARY_DIR})
|
||||
|
||||
bro_plugin_begin(Bro FileExtract)
|
||||
bro_plugin_cc(Extract.cc Plugin.cc)
|
||||
bro_plugin_cc(Extract.cc Plugin.cc ../../Analyzer.cc)
|
||||
bro_plugin_end()
|
||||
|
|
|
@ -4,11 +4,13 @@
|
|||
|
||||
#include "Extract.h"
|
||||
#include "util.h"
|
||||
#include "file_analysis/Manager.h"
|
||||
|
||||
using namespace file_analysis;
|
||||
|
||||
Extract::Extract(RecordVal* args, File* file, const string& arg_filename)
|
||||
: file_analysis::Analyzer(args, file), filename(arg_filename)
|
||||
: file_analysis::Analyzer(file_mgr->GetComponentTag("EXTRACT"), args, file),
|
||||
filename(arg_filename)
|
||||
{
|
||||
fd = open(filename.c_str(), O_WRONLY | O_CREAT | O_TRUNC, 0666);
|
||||
|
||||
|
|
|
@ -4,6 +4,6 @@ include_directories(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}
|
|||
${CMAKE_CURRENT_BINARY_DIR})
|
||||
|
||||
bro_plugin_begin(Bro FileHash)
|
||||
bro_plugin_cc(Hash.cc Plugin.cc)
|
||||
bro_plugin_cc(Hash.cc Plugin.cc ../../Analyzer.cc)
|
||||
bro_plugin_bif(events.bif)
|
||||
bro_plugin_end()
|
||||
|
|
|
@ -5,11 +5,12 @@
|
|||
#include "Hash.h"
|
||||
#include "util.h"
|
||||
#include "Event.h"
|
||||
#include "file_analysis/Manager.h"
|
||||
|
||||
using namespace file_analysis;
|
||||
|
||||
Hash::Hash(RecordVal* args, File* file, HashVal* hv, const char* arg_kind)
|
||||
: file_analysis::Analyzer(args, file), hash(hv), fed(false), kind(arg_kind)
|
||||
: file_analysis::Analyzer(file_mgr->GetComponentTag(to_upper(arg_kind).c_str()), args, file), hash(hv), fed(false), kind(arg_kind)
|
||||
{
|
||||
hash->Init();
|
||||
}
|
||||
|
|
|
@ -16,21 +16,23 @@ function Files::__set_timeout_interval%(file_id: string, t: interval%): bool
|
|||
%}
|
||||
|
||||
## :bro:see:`Files::add_analyzer`.
|
||||
function Files::__add_analyzer%(file_id: string, args: any%): bool
|
||||
function Files::__add_analyzer%(file_id: string, tag: Files::Tag, args: any%): bool
|
||||
%{
|
||||
using BifType::Record::Files::AnalyzerArgs;
|
||||
RecordVal* rv = args->AsRecordVal()->CoerceTo(AnalyzerArgs);
|
||||
bool result = file_mgr->AddAnalyzer(file_id->CheckString(), rv);
|
||||
bool result = file_mgr->AddAnalyzer(file_id->CheckString(),
|
||||
file_mgr->GetComponentTag(tag), rv);
|
||||
Unref(rv);
|
||||
return new Val(result, TYPE_BOOL);
|
||||
%}
|
||||
|
||||
## :bro:see:`Files::remove_analyzer`.
|
||||
function Files::__remove_analyzer%(file_id: string, args: any%): bool
|
||||
function Files::__remove_analyzer%(file_id: string, tag: Files::Tag, args: any%): bool
|
||||
%{
|
||||
using BifType::Record::Files::AnalyzerArgs;
|
||||
RecordVal* rv = args->AsRecordVal()->CoerceTo(AnalyzerArgs);
|
||||
bool result = file_mgr->RemoveAnalyzer(file_id->CheckString(), rv);
|
||||
bool result = file_mgr->RemoveAnalyzer(file_id->CheckString(),
|
||||
file_mgr->GetComponentTag(tag) , rv);
|
||||
Unref(rv);
|
||||
return new Val(result, TYPE_BOOL);
|
||||
%}
|
||||
|
@ -45,7 +47,7 @@ function Files::__stop%(file_id: string%): bool
|
|||
## :bro:see:`Files::analyzer_name`.
|
||||
function Files::__analyzer_name%(tag: Files::Tag%) : string
|
||||
%{
|
||||
return new StringVal(file_mgr->GetAnalyzerName(tag->InternalInt()));
|
||||
return new StringVal(file_mgr->GetComponentName(tag));
|
||||
%}
|
||||
|
||||
module GLOBAL;
|
||||
|
|
|
@ -872,6 +872,7 @@ int main(int argc, char** argv)
|
|||
if ( generate_documentation )
|
||||
{
|
||||
CreateProtoAnalyzerDoc("proto-analyzers.rst");
|
||||
CreateFileAnalyzerDoc("file-analyzers.rst");
|
||||
|
||||
std::list<BroDoc*>::iterator it;
|
||||
|
||||
|
|
248
src/plugin/ComponentManager.h
Normal file
248
src/plugin/ComponentManager.h
Normal file
|
@ -0,0 +1,248 @@
|
|||
#ifndef PLUGIN_COMPONENT_MANAGER_H
|
||||
#define PLUGIN_COMPONENT_MANAGER_H
|
||||
|
||||
#include <map>
|
||||
#include <list>
|
||||
#include <string>
|
||||
|
||||
#include "Type.h"
|
||||
#include "ID.h"
|
||||
#include "Var.h"
|
||||
#include "Val.h"
|
||||
#include "Reporter.h"
|
||||
|
||||
namespace plugin {
|
||||
|
||||
/**
|
||||
* A class that manages tracking of plugin components (e.g. analyzers) and
|
||||
* installs identifiers in the script-layer to identify them by a unique tag,
|
||||
* (a script-layer enum value).
|
||||
*
|
||||
* @tparam T A ::Tag type or derivative.
|
||||
* @tparam C A plugin::TaggedComponent type derivative.
|
||||
*/
|
||||
template <class T, class C>
|
||||
class ComponentManager {
|
||||
public:
|
||||
|
||||
/**
|
||||
* Constructor creates a new enum type called a "Tag" to associate with
|
||||
* a component.
|
||||
*
|
||||
* @param module The script-layer module in which to install the "Tag" ID
|
||||
* representing an enum type.
|
||||
*/
|
||||
ComponentManager(const string& module);
|
||||
|
||||
/**
|
||||
* @return The script-layer module in which the component's "Tag" ID lives.
|
||||
*/
|
||||
const char* GetModule() const;
|
||||
|
||||
/**
|
||||
* @return A list of all registered components.
|
||||
*/
|
||||
list<C*> GetComponents() const;
|
||||
|
||||
/**
|
||||
* @return The enum type associated with the script-layer "Tag".
|
||||
*/
|
||||
EnumType* GetTagEnumType() const;
|
||||
|
||||
/**
|
||||
* Get a component name from its tag.
|
||||
*
|
||||
* @param tag A component's tag.
|
||||
* @return The canonical component name.
|
||||
*/
|
||||
const char* GetComponentName(T tag) const;
|
||||
|
||||
/**
|
||||
* Get a component name from it's enum value.
|
||||
*
|
||||
* @param val A component's enum value.
|
||||
* @return The canonical component name.
|
||||
*/
|
||||
const char* GetComponentName(Val* val) const;
|
||||
|
||||
/**
|
||||
* Get a component tag from its name.
|
||||
*
|
||||
* @param name A component's canonical name.
|
||||
* @return The component's tag, or a tag representing an error if
|
||||
* no such component assoicated with the name exists.
|
||||
*/
|
||||
T GetComponentTag(const string& name) const;
|
||||
|
||||
/**
|
||||
* Get a component tag from its enum value.
|
||||
*
|
||||
* @param v A component's enum value.
|
||||
* @return The component's tag, or a tag representing an error if
|
||||
* no such component assoicated with the value exists.
|
||||
*/
|
||||
T GetComponentTag(Val* v) const;
|
||||
|
||||
protected:
|
||||
|
||||
/**
|
||||
* Add a component the internal maps used to keep track of it and create
|
||||
* a script-layer ID for the component's enum value.
|
||||
*
|
||||
* @param component A component to track.
|
||||
* @param prefix The script-layer ID associated with the component's enum
|
||||
* value will be a concatenation of this prefix and the component's
|
||||
* canonical name.
|
||||
*/
|
||||
void RegisterComponent(C* component, const string& prefix = "");
|
||||
|
||||
/**
|
||||
* @param name The canonical name of a component.
|
||||
* @return The component associated with the name or a null pointer if no
|
||||
* such component exists.
|
||||
*/
|
||||
C* Lookup(const string& name) const;
|
||||
|
||||
/**
|
||||
* @param name A component tag.
|
||||
* @return The component associated with the tag or a null pointer if no
|
||||
* such component exists.
|
||||
*/
|
||||
C* Lookup(const T& tag) const;
|
||||
|
||||
/**
|
||||
* @param name A component's enum value.
|
||||
* @return The component associated with the value or a null pointer if no
|
||||
* such component exists.
|
||||
*/
|
||||
C* Lookup(EnumVal* val) const;
|
||||
|
||||
private:
|
||||
|
||||
string module; /**< Script layer module in which component tags live. */
|
||||
EnumType* tag_enum_type; /**< Enum type of component tags. */
|
||||
map<string, C*> components_by_name;
|
||||
map<T, C*> components_by_tag;
|
||||
map<int, C*> components_by_val;
|
||||
};
|
||||
|
||||
template <class T, class C>
|
||||
ComponentManager<T, C>::ComponentManager(const string& arg_module)
|
||||
: module(arg_module)
|
||||
{
|
||||
tag_enum_type = new EnumType(module + "::Tag");
|
||||
::ID* id = install_ID("Tag", module.c_str(), true, true);
|
||||
add_type(id, tag_enum_type, 0, 0);
|
||||
}
|
||||
|
||||
template <class T, class C>
|
||||
const char* ComponentManager<T, C>::GetModule() const
|
||||
{
|
||||
return module.c_str();
|
||||
}
|
||||
|
||||
template <class T, class C>
|
||||
list<C*> ComponentManager<T, C>::GetComponents() const
|
||||
{
|
||||
list<C*> rval;
|
||||
typename map<T, C*>::const_iterator i;
|
||||
|
||||
for ( i = components_by_tag.begin(); i != components_by_tag.end(); ++i )
|
||||
rval.push_back(i->second);
|
||||
|
||||
return rval;
|
||||
}
|
||||
|
||||
template <class T, class C>
|
||||
EnumType* ComponentManager<T, C>::GetTagEnumType() const
|
||||
{
|
||||
return tag_enum_type;
|
||||
}
|
||||
|
||||
template <class T, class C>
|
||||
const char* ComponentManager<T, C>::GetComponentName(T tag) const
|
||||
{
|
||||
static const char* error = "<error>";
|
||||
|
||||
if ( ! tag )
|
||||
return error;
|
||||
|
||||
C* c = Lookup(tag);
|
||||
|
||||
if ( ! c )
|
||||
reporter->InternalError("request for name of unknown component tag %s",
|
||||
tag.AsString().c_str());
|
||||
|
||||
return c->CanonicalName();
|
||||
}
|
||||
|
||||
template <class T, class C>
|
||||
const char* ComponentManager<T, C>::GetComponentName(Val* val) const
|
||||
{
|
||||
return GetComponentName(T(val->AsEnumVal()));
|
||||
}
|
||||
|
||||
template <class T, class C>
|
||||
T ComponentManager<T, C>::GetComponentTag(const string& name) const
|
||||
{
|
||||
C* c = Lookup(name);
|
||||
return c ? c->Tag() : T();
|
||||
}
|
||||
|
||||
template <class T, class C>
|
||||
T ComponentManager<T, C>::GetComponentTag(Val* v) const
|
||||
{
|
||||
C* c = Lookup(v->AsEnumVal());
|
||||
return c ? c->Tag() : T();
|
||||
}
|
||||
|
||||
template <class T, class C>
|
||||
C* ComponentManager<T, C>::Lookup(const string& name) const
|
||||
{
|
||||
typename map<string, C*>::const_iterator i =
|
||||
components_by_name.find(to_upper(name));
|
||||
return i != components_by_name.end() ? i->second : 0;
|
||||
}
|
||||
|
||||
template <class T, class C>
|
||||
C* ComponentManager<T, C>::Lookup(const T& tag) const
|
||||
{
|
||||
typename map<T, C*>::const_iterator i = components_by_tag.find(tag);
|
||||
return i != components_by_tag.end() ? i->second : 0;
|
||||
}
|
||||
|
||||
template <class T, class C>
|
||||
C* ComponentManager<T, C>::Lookup(EnumVal* val) const
|
||||
{
|
||||
typename map<int, C*>::const_iterator i =
|
||||
components_by_val.find(val->InternalInt());
|
||||
return i != components_by_val.end() ? i->second : 0;
|
||||
}
|
||||
|
||||
template <class T, class C>
|
||||
void ComponentManager<T, C>::RegisterComponent(C* component,
|
||||
const string& prefix)
|
||||
{
|
||||
const char* cname = component->CanonicalName();
|
||||
|
||||
if ( Lookup(cname) )
|
||||
reporter->FatalError("Component '%s::%s' defined more than once",
|
||||
module.c_str(), cname);
|
||||
|
||||
DBG_LOG(DBG_PLUGINS, "Registering component %s (tag %s)",
|
||||
component->Name(), component->Tag().AsString().c_str());
|
||||
|
||||
components_by_name.insert(std::make_pair(cname, component));
|
||||
components_by_tag.insert(std::make_pair(component->Tag(), component));
|
||||
components_by_val.insert(std::make_pair(
|
||||
component->Tag().AsEnumVal()->InternalInt(), component));
|
||||
|
||||
// Install an identfier for enum value
|
||||
string id = fmt("%s%s", prefix.c_str(), cname);
|
||||
tag_enum_type->AddName(module, id.c_str(),
|
||||
component->Tag().AsEnumVal()->InternalInt(), true);
|
||||
}
|
||||
|
||||
} // namespace plugin
|
||||
|
||||
#endif
|
85
src/plugin/TaggedComponent.h
Normal file
85
src/plugin/TaggedComponent.h
Normal file
|
@ -0,0 +1,85 @@
|
|||
#ifndef PLUGIN_TAGGED_COMPONENT_H
|
||||
#define PLUGIN_TAGGED_COMPONENT_H
|
||||
|
||||
namespace plugin {
|
||||
|
||||
/**
|
||||
* A class which has a tag of a given type associated with it.
|
||||
*
|
||||
* @tparam T A ::Tag type or derivative.
|
||||
*/
|
||||
template <class T>
|
||||
class TaggedComponent {
|
||||
public:
|
||||
|
||||
/**
|
||||
* Constructor creates a unique tag value for this component.
|
||||
*
|
||||
* @param subtype A subtype associated with this component that
|
||||
* further distinguishes it. The subtype will be integrated into
|
||||
* the Tag that the manager associates with this component,
|
||||
* and component instances can accordingly access it via Tag().
|
||||
* If not used, leave at zero.
|
||||
*/
|
||||
TaggedComponent(typename T::subtype_t subtype = 0);
|
||||
|
||||
/**
|
||||
* Copy constructor.
|
||||
*
|
||||
* @param other Another component from which to copy its tag value.
|
||||
*/
|
||||
TaggedComponent(const TaggedComponent& other);
|
||||
|
||||
/**
|
||||
* Assignment operator.
|
||||
*
|
||||
* @param other A component to assign.
|
||||
* @return The assigned object.
|
||||
*/
|
||||
TaggedComponent& operator=(const TaggedComponent& other);
|
||||
|
||||
/**
|
||||
* @return The component's tag.
|
||||
*/
|
||||
T Tag() const;
|
||||
|
||||
private:
|
||||
|
||||
T tag; /**< The automatically assigned analyzer tag. */
|
||||
static typename T::type_t type_counter; /**< Used to generate globally
|
||||
unique tags. */
|
||||
};
|
||||
|
||||
template <class T>
|
||||
TaggedComponent<T>::TaggedComponent(typename T::subtype_t subtype)
|
||||
{
|
||||
tag = T(++type_counter, subtype);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
TaggedComponent<T>::TaggedComponent(const TaggedComponent<T>& other)
|
||||
{
|
||||
tag = other.tag;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
TaggedComponent<T>&
|
||||
TaggedComponent<T>::operator =(const TaggedComponent<T>& other)
|
||||
{
|
||||
if ( &other != this )
|
||||
tag = other.tag;
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
T TaggedComponent<T>::Tag() const
|
||||
{
|
||||
return tag;
|
||||
}
|
||||
|
||||
template <class T> typename T::type_t TaggedComponent<T>::type_counter(0);
|
||||
|
||||
} // namespace plugin
|
||||
|
||||
#endif
|
|
@ -1,10 +1,12 @@
|
|||
// See the file "COPYING" in the main distribution directory for copyright.
|
||||
|
||||
#include "BitVector.h"
|
||||
|
||||
#include <openssl/sha.h>
|
||||
#include <cassert>
|
||||
#include <limits>
|
||||
|
||||
#include "BitVector.h"
|
||||
#include "Serializer.h"
|
||||
#include "digest.h"
|
||||
|
||||
using namespace probabilistic;
|
||||
|
||||
|
@ -490,6 +492,21 @@ BitVector::size_type BitVector::FindNext(size_type i) const
|
|||
return block ? bi * bits_per_block + lowest_bit(block) : find_from(bi + 1);
|
||||
}
|
||||
|
||||
size_t BitVector::Hash() const
|
||||
{
|
||||
size_t hash = 0;
|
||||
|
||||
u_char buf[SHA256_DIGEST_LENGTH];
|
||||
SHA256_CTX ctx;
|
||||
sha256_init(&ctx);
|
||||
|
||||
for ( size_type i = 0; i < Blocks(); ++i )
|
||||
sha256_update(&ctx, &bits[i], sizeof(bits[i]));
|
||||
|
||||
sha256_final(&ctx, buf);
|
||||
return *reinterpret_cast<size_t*>(buf); // Use the first bytes as seed.
|
||||
}
|
||||
|
||||
BitVector::size_type BitVector::lowest_bit(block_type block)
|
||||
{
|
||||
block_type x = block - (block & (block - 1));
|
||||
|
|
|
@ -276,6 +276,13 @@ public:
|
|||
*/
|
||||
size_type FindNext(size_type i) const;
|
||||
|
||||
/** Computes a hash value of the internal representation.
|
||||
* This is mainly for debugging/testing purposes.
|
||||
*
|
||||
* @return The hash.
|
||||
*/
|
||||
size_t Hash() const;
|
||||
|
||||
/**
|
||||
* Serializes the bit vector.
|
||||
*
|
||||
|
|
|
@ -9,6 +9,8 @@
|
|||
#include "CounterVector.h"
|
||||
#include "Serializer.h"
|
||||
|
||||
#include "../util.h"
|
||||
|
||||
using namespace probabilistic;
|
||||
|
||||
BloomFilter::BloomFilter()
|
||||
|
@ -40,28 +42,15 @@ bool BloomFilter::DoSerialize(SerialInfo* info) const
|
|||
{
|
||||
DO_SERIALIZE(SER_BLOOMFILTER, SerialObj);
|
||||
|
||||
if ( ! SERIALIZE(static_cast<uint16>(hasher->K())) )
|
||||
return false;
|
||||
|
||||
return SERIALIZE_STR(hasher->Name().c_str(), hasher->Name().size());
|
||||
return hasher->Serialize(info);
|
||||
}
|
||||
|
||||
bool BloomFilter::DoUnserialize(UnserialInfo* info)
|
||||
{
|
||||
DO_UNSERIALIZE(SerialObj);
|
||||
|
||||
uint16 k;
|
||||
if ( ! UNSERIALIZE(&k) )
|
||||
return false;
|
||||
|
||||
const char* name;
|
||||
if ( ! UNSERIALIZE_STR(&name, 0) )
|
||||
return false;
|
||||
|
||||
hasher = Hasher::Create(k, name);
|
||||
|
||||
delete [] name;
|
||||
return true;
|
||||
hasher = Hasher::Unserialize(info);
|
||||
return hasher != 0;
|
||||
}
|
||||
|
||||
size_t BasicBloomFilter::M(double fp, size_t capacity)
|
||||
|
@ -120,6 +109,11 @@ BasicBloomFilter* BasicBloomFilter::Clone() const
|
|||
return copy;
|
||||
}
|
||||
|
||||
std::string BasicBloomFilter::InternalState() const
|
||||
{
|
||||
return fmt("%" PRIu64, (uint64_t)bits->Hash());
|
||||
}
|
||||
|
||||
BasicBloomFilter::BasicBloomFilter()
|
||||
{
|
||||
bits = 0;
|
||||
|
@ -146,14 +140,18 @@ bool BasicBloomFilter::DoUnserialize(UnserialInfo* info)
|
|||
return (bits != 0);
|
||||
}
|
||||
|
||||
void BasicBloomFilter::AddImpl(const Hasher::digest_vector& h)
|
||||
void BasicBloomFilter::Add(const HashKey* key)
|
||||
{
|
||||
Hasher::digest_vector h = hasher->Hash(key);
|
||||
|
||||
for ( size_t i = 0; i < h.size(); ++i )
|
||||
bits->Set(h[i] % bits->Size());
|
||||
}
|
||||
|
||||
size_t BasicBloomFilter::CountImpl(const Hasher::digest_vector& h) const
|
||||
size_t BasicBloomFilter::Count(const HashKey* key) const
|
||||
{
|
||||
Hasher::digest_vector h = hasher->Hash(key);
|
||||
|
||||
for ( size_t i = 0; i < h.size(); ++i )
|
||||
{
|
||||
if ( ! (*bits)[h[i] % bits->Size()] )
|
||||
|
@ -219,6 +217,11 @@ CountingBloomFilter* CountingBloomFilter::Clone() const
|
|||
return copy;
|
||||
}
|
||||
|
||||
string CountingBloomFilter::InternalState() const
|
||||
{
|
||||
return fmt("%" PRIu64, (uint64_t)cells->Hash());
|
||||
}
|
||||
|
||||
IMPLEMENT_SERIAL(CountingBloomFilter, SER_COUNTINGBLOOMFILTER)
|
||||
|
||||
bool CountingBloomFilter::DoSerialize(SerialInfo* info) const
|
||||
|
@ -235,14 +238,18 @@ bool CountingBloomFilter::DoUnserialize(UnserialInfo* info)
|
|||
}
|
||||
|
||||
// TODO: Use partitioning in add/count to allow for reusing CMS bounds.
|
||||
void CountingBloomFilter::AddImpl(const Hasher::digest_vector& h)
|
||||
void CountingBloomFilter::Add(const HashKey* key)
|
||||
{
|
||||
Hasher::digest_vector h = hasher->Hash(key);
|
||||
|
||||
for ( size_t i = 0; i < h.size(); ++i )
|
||||
cells->Increment(h[i] % cells->Size());
|
||||
}
|
||||
|
||||
size_t CountingBloomFilter::CountImpl(const Hasher::digest_vector& h) const
|
||||
size_t CountingBloomFilter::Count(const HashKey* key) const
|
||||
{
|
||||
Hasher::digest_vector h = hasher->Hash(key);
|
||||
|
||||
CounterVector::size_type min =
|
||||
std::numeric_limits<CounterVector::size_type>::max();
|
||||
|
||||
|
|
|
@ -13,9 +13,6 @@ class CounterVector;
|
|||
|
||||
/**
|
||||
* The abstract base class for Bloom filters.
|
||||
*
|
||||
* At this point we won't let the user choose the hasher, but we might open
|
||||
* up the interface in the future.
|
||||
*/
|
||||
class BloomFilter : public SerialObj {
|
||||
public:
|
||||
|
@ -25,27 +22,20 @@ public:
|
|||
virtual ~BloomFilter();
|
||||
|
||||
/**
|
||||
* Adds an element of type T to the Bloom filter.
|
||||
* @param x The element to add
|
||||
* Adds an element to the Bloom filter.
|
||||
*
|
||||
* @param key The key associated with the element to add.
|
||||
*/
|
||||
template <typename T>
|
||||
void Add(const T& x)
|
||||
{
|
||||
AddImpl((*hasher)(x));
|
||||
}
|
||||
virtual void Add(const HashKey* key) = 0;
|
||||
|
||||
/**
|
||||
* Retrieves the associated count of a given value.
|
||||
*
|
||||
* @param x The value of type `T` to check.
|
||||
* @param key The key associated with the element to check.
|
||||
*
|
||||
* @return The counter associated with *x*.
|
||||
* @return The counter associated with *key*.
|
||||
*/
|
||||
template <typename T>
|
||||
size_t Count(const T& x) const
|
||||
{
|
||||
return CountImpl((*hasher)(x));
|
||||
}
|
||||
virtual size_t Count(const HashKey* key) const = 0;
|
||||
|
||||
/**
|
||||
* Checks whether the Bloom filter is empty.
|
||||
|
@ -75,6 +65,12 @@ public:
|
|||
*/
|
||||
virtual BloomFilter* Clone() const = 0;
|
||||
|
||||
/**
|
||||
* Returns a string with a representation of the Bloom filter's
|
||||
* internal state. This is for debugging/testing purposes only.
|
||||
*/
|
||||
virtual string InternalState() const = 0;
|
||||
|
||||
/**
|
||||
* Serializes the Bloom filter.
|
||||
*
|
||||
|
@ -109,25 +105,6 @@ protected:
|
|||
*/
|
||||
BloomFilter(const Hasher* hasher);
|
||||
|
||||
/**
|
||||
* Abstract method for implementinng the *Add* operation.
|
||||
*
|
||||
* @param hashes A set of *k* hashes for the item to add, computed by
|
||||
* the internal hasher object.
|
||||
*
|
||||
*/
|
||||
virtual void AddImpl(const Hasher::digest_vector& hashes) = 0;
|
||||
|
||||
/**
|
||||
* Abstract method for implementing the *Count* operation.
|
||||
*
|
||||
* @param hashes A set of *k* hashes for the item to add, computed by
|
||||
* the internal hasher object.
|
||||
*
|
||||
* @return Returns the counter associated with the hashed element.
|
||||
*/
|
||||
virtual size_t CountImpl(const Hasher::digest_vector& hashes) const = 0;
|
||||
|
||||
const Hasher* hasher;
|
||||
};
|
||||
|
||||
|
@ -180,6 +157,7 @@ public:
|
|||
virtual void Clear();
|
||||
virtual bool Merge(const BloomFilter* other);
|
||||
virtual BasicBloomFilter* Clone() const;
|
||||
virtual string InternalState() const;
|
||||
|
||||
protected:
|
||||
DECLARE_SERIAL(BasicBloomFilter);
|
||||
|
@ -190,8 +168,8 @@ protected:
|
|||
BasicBloomFilter();
|
||||
|
||||
// Overridden from BloomFilter.
|
||||
virtual void AddImpl(const Hasher::digest_vector& h);
|
||||
virtual size_t CountImpl(const Hasher::digest_vector& h) const;
|
||||
virtual void Add(const HashKey* key);
|
||||
virtual size_t Count(const HashKey* key) const;
|
||||
|
||||
private:
|
||||
BitVector* bits;
|
||||
|
@ -219,6 +197,7 @@ public:
|
|||
virtual void Clear();
|
||||
virtual bool Merge(const BloomFilter* other);
|
||||
virtual CountingBloomFilter* Clone() const;
|
||||
virtual string InternalState() const;
|
||||
|
||||
protected:
|
||||
DECLARE_SERIAL(CountingBloomFilter);
|
||||
|
@ -229,8 +208,8 @@ protected:
|
|||
CountingBloomFilter();
|
||||
|
||||
// Overridden from BloomFilter.
|
||||
virtual void AddImpl(const Hasher::digest_vector& h);
|
||||
virtual size_t CountImpl(const Hasher::digest_vector& h) const;
|
||||
virtual void Add(const HashKey* key);
|
||||
virtual size_t Count(const HashKey* key) const;
|
||||
|
||||
private:
|
||||
CounterVector* cells;
|
||||
|
|
|
@ -10,9 +10,11 @@ set(probabilistic_SRCS
|
|||
BitVector.cc
|
||||
BloomFilter.cc
|
||||
CounterVector.cc
|
||||
Hasher.cc)
|
||||
Hasher.cc
|
||||
Topk.cc)
|
||||
|
||||
bif_target(bloom-filter.bif)
|
||||
bif_target(top-k.bif)
|
||||
bro_add_subdir_library(probabilistic ${probabilistic_SRCS})
|
||||
|
||||
add_dependencies(bro_probabilistic generate_outputs)
|
||||
|
|
|
@ -153,6 +153,11 @@ CounterVector operator|(const CounterVector& x, const CounterVector& y)
|
|||
|
||||
}
|
||||
|
||||
size_t CounterVector::Hash() const
|
||||
{
|
||||
return bits->Hash();
|
||||
}
|
||||
|
||||
bool CounterVector::Serialize(SerialInfo* info) const
|
||||
{
|
||||
return SerialObj::Serialize(info);
|
||||
|
|
|
@ -126,6 +126,13 @@ public:
|
|||
*/
|
||||
CounterVector& operator|=(const CounterVector& other);
|
||||
|
||||
/** Computes a hash value of the internal representation.
|
||||
* This is mainly for debugging/testing purposes.
|
||||
*
|
||||
* @return The hash.
|
||||
*/
|
||||
size_t Hash() const;
|
||||
|
||||
/**
|
||||
* Serializes the bit vector.
|
||||
*
|
||||
|
|
|
@ -1,61 +1,119 @@
|
|||
// See the file "COPYING" in the main distribution directory for copyright.
|
||||
|
||||
#include <typeinfo>
|
||||
#include <openssl/md5.h>
|
||||
|
||||
#include "Hasher.h"
|
||||
#include "NetVar.h"
|
||||
#include "digest.h"
|
||||
#include "Serializer.h"
|
||||
|
||||
using namespace probabilistic;
|
||||
|
||||
UHF::UHF(size_t seed, const std::string& extra)
|
||||
: h(compute_seed(seed, extra))
|
||||
{
|
||||
}
|
||||
|
||||
Hasher::digest UHF::hash(const void* x, size_t n) const
|
||||
{
|
||||
assert(n <= UHASH_KEY_SIZE);
|
||||
return n == 0 ? 0 : h(x, n);
|
||||
}
|
||||
|
||||
size_t UHF::compute_seed(size_t seed, const std::string& extra)
|
||||
size_t Hasher::MakeSeed(const void* data, size_t size)
|
||||
{
|
||||
u_char buf[SHA256_DIGEST_LENGTH];
|
||||
SHA256_CTX ctx;
|
||||
sha256_init(&ctx);
|
||||
|
||||
if ( extra.empty() )
|
||||
if ( data )
|
||||
sha256_update(&ctx, data, size);
|
||||
|
||||
else if ( global_hash_seed && global_hash_seed->Len() > 0 )
|
||||
sha256_update(&ctx, global_hash_seed->Bytes(), global_hash_seed->Len());
|
||||
|
||||
else
|
||||
{
|
||||
unsigned int first_seed = initial_seed();
|
||||
sha256_update(&ctx, &first_seed, sizeof(first_seed));
|
||||
}
|
||||
|
||||
else
|
||||
sha256_update(&ctx, extra.c_str(), extra.size());
|
||||
|
||||
sha256_update(&ctx, &seed, sizeof(seed));
|
||||
sha256_final(&ctx, buf);
|
||||
|
||||
// Take the first sizeof(size_t) bytes as seed.
|
||||
return *reinterpret_cast<size_t*>(buf);
|
||||
return *reinterpret_cast<size_t*>(buf); // Use the first bytes as seed.
|
||||
}
|
||||
|
||||
Hasher* Hasher::Create(size_t k, const std::string& name)
|
||||
Hasher::digest_vector Hasher::Hash(const HashKey* key) const
|
||||
{
|
||||
return new DefaultHasher(k, name);
|
||||
return Hash(key->Key(), key->Size());
|
||||
}
|
||||
|
||||
Hasher::Hasher(size_t k, const std::string& arg_name)
|
||||
: k(k)
|
||||
bool Hasher::Serialize(SerialInfo* info) const
|
||||
{
|
||||
name = arg_name;
|
||||
return SerialObj::Serialize(info);
|
||||
}
|
||||
|
||||
DefaultHasher::DefaultHasher(size_t k, const std::string& name)
|
||||
: Hasher(k, name)
|
||||
Hasher* Hasher::Unserialize(UnserialInfo* info)
|
||||
{
|
||||
for ( size_t i = 0; i < k; ++i )
|
||||
hash_functions.push_back(UHF(i, name));
|
||||
return reinterpret_cast<Hasher*>(SerialObj::Unserialize(info, SER_HASHER));
|
||||
}
|
||||
|
||||
bool Hasher::DoSerialize(SerialInfo* info) const
|
||||
{
|
||||
DO_SERIALIZE(SER_HASHER, SerialObj);
|
||||
|
||||
if ( ! SERIALIZE(static_cast<uint16>(k)) )
|
||||
return false;
|
||||
|
||||
return SERIALIZE(static_cast<uint64>(seed));
|
||||
}
|
||||
|
||||
bool Hasher::DoUnserialize(UnserialInfo* info)
|
||||
{
|
||||
DO_UNSERIALIZE(SerialObj);
|
||||
|
||||
uint16 serial_k;
|
||||
if ( ! UNSERIALIZE(&serial_k) )
|
||||
return false;
|
||||
|
||||
k = serial_k;
|
||||
assert(k > 0);
|
||||
|
||||
uint64 serial_seed;
|
||||
if ( ! UNSERIALIZE(&serial_seed) )
|
||||
return false;
|
||||
|
||||
seed = serial_seed;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
Hasher::Hasher(size_t arg_k, size_t arg_seed)
|
||||
{
|
||||
k = arg_k;
|
||||
seed = arg_seed;
|
||||
}
|
||||
|
||||
UHF::UHF(size_t arg_seed)
|
||||
: h(arg_seed)
|
||||
{
|
||||
seed = arg_seed;
|
||||
}
|
||||
|
||||
// This function is almost equivalent to HashKey::HashBytes except that it
|
||||
// does not depend on global state and that we mix in the seed multiple
|
||||
// times.
|
||||
Hasher::digest UHF::hash(const void* x, size_t n) const
|
||||
{
|
||||
if ( n <= UHASH_KEY_SIZE )
|
||||
return n == 0 ? 0 : h(x, n);
|
||||
|
||||
unsigned char d[16];
|
||||
MD5(reinterpret_cast<const unsigned char*>(x), n, d);
|
||||
|
||||
const unsigned char* s = reinterpret_cast<const unsigned char*>(&seed);
|
||||
for ( size_t i = 0; i < 16; ++i )
|
||||
d[i] ^= s[i % sizeof(seed)];
|
||||
|
||||
MD5(d, 16, d);
|
||||
|
||||
return d[0];
|
||||
}
|
||||
|
||||
DefaultHasher::DefaultHasher(size_t k, size_t seed)
|
||||
: Hasher(k, seed)
|
||||
{
|
||||
for ( size_t i = 1; i <= k; ++i )
|
||||
hash_functions.push_back(UHF(Seed() + bro_prng(i)));
|
||||
}
|
||||
|
||||
Hasher::digest_vector DefaultHasher::Hash(const void* x, size_t n) const
|
||||
|
@ -82,8 +140,29 @@ bool DefaultHasher::Equals(const Hasher* other) const
|
|||
return hash_functions == o->hash_functions;
|
||||
}
|
||||
|
||||
DoubleHasher::DoubleHasher(size_t k, const std::string& name)
|
||||
: Hasher(k, name), h1(1, name), h2(2, name)
|
||||
IMPLEMENT_SERIAL(DefaultHasher, SER_DEFAULTHASHER)
|
||||
|
||||
bool DefaultHasher::DoSerialize(SerialInfo* info) const
|
||||
{
|
||||
DO_SERIALIZE(SER_DEFAULTHASHER, Hasher);
|
||||
|
||||
// Nothing to do here, the base class has all we need serialized already.
|
||||
return true;
|
||||
}
|
||||
|
||||
bool DefaultHasher::DoUnserialize(UnserialInfo* info)
|
||||
{
|
||||
DO_UNSERIALIZE(Hasher);
|
||||
|
||||
hash_functions.clear();
|
||||
for ( size_t i = 0; i < K(); ++i )
|
||||
hash_functions.push_back(UHF(Seed() + bro_prng(i)));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
DoubleHasher::DoubleHasher(size_t k, size_t seed)
|
||||
: Hasher(k, seed), h1(seed + bro_prng(1)), h2(seed + bro_prng(2))
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -112,3 +191,23 @@ bool DoubleHasher::Equals(const Hasher* other) const
|
|||
const DoubleHasher* o = static_cast<const DoubleHasher*>(other);
|
||||
return h1 == o->h1 && h2 == o->h2;
|
||||
}
|
||||
|
||||
IMPLEMENT_SERIAL(DoubleHasher, SER_DOUBLEHASHER)
|
||||
|
||||
bool DoubleHasher::DoSerialize(SerialInfo* info) const
|
||||
{
|
||||
DO_SERIALIZE(SER_DOUBLEHASHER, Hasher);
|
||||
|
||||
// Nothing to do here, the base class has all we need serialized already.
|
||||
return true;
|
||||
}
|
||||
|
||||
bool DoubleHasher::DoUnserialize(UnserialInfo* info)
|
||||
{
|
||||
DO_UNSERIALIZE(Hasher);
|
||||
|
||||
h1 = UHF(Seed() + bro_prng(1));
|
||||
h2 = UHF(Seed() + bro_prng(2));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
|
||||
#include "Hash.h"
|
||||
#include "H3.h"
|
||||
#include "SerialObj.h"
|
||||
|
||||
namespace probabilistic {
|
||||
|
||||
|
@ -12,11 +13,25 @@ namespace probabilistic {
|
|||
* Abstract base class for hashers. A hasher creates a family of hash
|
||||
* functions to hash an element *k* times.
|
||||
*/
|
||||
class Hasher {
|
||||
class Hasher : public SerialObj {
|
||||
public:
|
||||
typedef hash_t digest;
|
||||
typedef std::vector<digest> digest_vector;
|
||||
|
||||
/**
|
||||
* Creates a valid hasher seed from an arbitrary string.
|
||||
*
|
||||
* @param data A pointer to contiguous data that should be crunched into a
|
||||
* seed. If 0, the function tries to find a global_hash_seed script variable
|
||||
* to derive a seed from. If this variable does not exist, the function uses
|
||||
* the initial seed generated at Bro startup.
|
||||
*
|
||||
* @param size The number of bytes of *data*.
|
||||
*
|
||||
* @return A seed suitable for hashers.
|
||||
*/
|
||||
static size_t MakeSeed(const void* data, size_t size);
|
||||
|
||||
/**
|
||||
* Destructor.
|
||||
*/
|
||||
|
@ -35,6 +50,15 @@ public:
|
|||
return Hash(&x, sizeof(T));
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes hash values for an element.
|
||||
*
|
||||
* @param x The key of the value to hash.
|
||||
*
|
||||
* @return Vector of *k* hash values.
|
||||
*/
|
||||
digest_vector Hash(const HashKey* key) const;
|
||||
|
||||
/**
|
||||
* Computes the hashes for a set of bytes.
|
||||
*
|
||||
|
@ -63,38 +87,30 @@ public:
|
|||
size_t K() const { return k; }
|
||||
|
||||
/**
|
||||
* Returns the hasher's name. TODO: What's this?
|
||||
* Returns the seed used to construct the hasher.
|
||||
*/
|
||||
const std::string& Name() const { return name; }
|
||||
size_t Seed() const { return seed; }
|
||||
|
||||
/**
|
||||
* Constructs the hasher used by the implementation. This hardcodes a
|
||||
* specific hashing policy. It exists only because the HashingPolicy
|
||||
* class hierachy is not yet serializable.
|
||||
*
|
||||
* @param k The number of hash functions to apply.
|
||||
*
|
||||
* @param name The hasher's name. Hashers with the same name should
|
||||
* provide consistent results.
|
||||
*
|
||||
* @return Returns a new hasher instance.
|
||||
*/
|
||||
static Hasher* Create(size_t k, const std::string& name);
|
||||
bool Serialize(SerialInfo* info) const;
|
||||
static Hasher* Unserialize(UnserialInfo* info);
|
||||
|
||||
protected:
|
||||
DECLARE_ABSTRACT_SERIAL(Hasher);
|
||||
|
||||
Hasher() { }
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
*
|
||||
* @param k the number of hash functions.
|
||||
* @param arg_k the number of hash functions.
|
||||
*
|
||||
* @param name A name for the hasher. Hashers with the same name
|
||||
* should provide consistent results.
|
||||
* @param arg_seed The seed for the hasher.
|
||||
*/
|
||||
Hasher(size_t k, const std::string& name);
|
||||
Hasher(size_t arg_k, size_t arg_seed);
|
||||
|
||||
private:
|
||||
const size_t k;
|
||||
std::string name;
|
||||
size_t k;
|
||||
size_t seed;
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -107,13 +123,9 @@ public:
|
|||
* Constructs an H3 hash function seeded with a given seed and an
|
||||
* optional extra seed to replace the initial Bro seed.
|
||||
*
|
||||
* @param seed The seed to use for this instance.
|
||||
*
|
||||
* @param extra If not empty, this parameter replaces the initial
|
||||
* seed to compute the seed for t to compute the seed NUL-terminated
|
||||
* string as additional seed.
|
||||
* @param arg_seed The seed to use for this instance.
|
||||
*/
|
||||
UHF(size_t seed, const std::string& extra = "");
|
||||
UHF(size_t arg_seed = 0);
|
||||
|
||||
template <typename T>
|
||||
Hasher::digest operator()(const T& x) const
|
||||
|
@ -156,9 +168,10 @@ public:
|
|||
}
|
||||
|
||||
private:
|
||||
static size_t compute_seed(size_t seed, const std::string& extra);
|
||||
static size_t compute_seed(size_t seed);
|
||||
|
||||
H3<Hasher::digest, UHASH_KEY_SIZE> h;
|
||||
size_t seed;
|
||||
};
|
||||
|
||||
|
||||
|
@ -173,16 +186,20 @@ public:
|
|||
*
|
||||
* @param k The number of hash functions to use.
|
||||
*
|
||||
* @param name The name of the hasher.
|
||||
* @param seed The seed for the hasher.
|
||||
*/
|
||||
DefaultHasher(size_t k, const std::string& name);
|
||||
DefaultHasher(size_t k, size_t seed);
|
||||
|
||||
// Overridden from Hasher.
|
||||
virtual digest_vector Hash(const void* x, size_t n) const /* final */;
|
||||
virtual DefaultHasher* Clone() const /* final */;
|
||||
virtual bool Equals(const Hasher* other) const /* final */;
|
||||
|
||||
DECLARE_SERIAL(DefaultHasher);
|
||||
|
||||
private:
|
||||
DefaultHasher() { }
|
||||
|
||||
std::vector<UHF> hash_functions;
|
||||
};
|
||||
|
||||
|
@ -197,16 +214,20 @@ public:
|
|||
*
|
||||
* @param k The number of hash functions to use.
|
||||
*
|
||||
* @param name The name of the hasher.
|
||||
* @param seed The seed for the hasher.
|
||||
*/
|
||||
DoubleHasher(size_t k, const std::string& name);
|
||||
DoubleHasher(size_t k, size_t seed);
|
||||
|
||||
// Overridden from Hasher.
|
||||
virtual digest_vector Hash(const void* x, size_t n) const /* final */;
|
||||
virtual DoubleHasher* Clone() const /* final */;
|
||||
virtual bool Equals(const Hasher* other) const /* final */;
|
||||
|
||||
DECLARE_SERIAL(DoubleHasher);
|
||||
|
||||
private:
|
||||
DoubleHasher() { }
|
||||
|
||||
UHF h1;
|
||||
UHF h2;
|
||||
};
|
||||
|
|
499
src/probabilistic/Topk.cc
Normal file
499
src/probabilistic/Topk.cc
Normal file
|
@ -0,0 +1,499 @@
|
|||
// See the file "COPYING" in the main distribution directory for copyright.
|
||||
|
||||
#include "probabilistic/Topk.h"
|
||||
#include "CompHash.h"
|
||||
#include "Reporter.h"
|
||||
#include "Serializer.h"
|
||||
#include "NetVar.h"
|
||||
|
||||
namespace probabilistic {
|
||||
|
||||
IMPLEMENT_SERIAL(TopkVal, SER_TOPK_VAL);
|
||||
|
||||
static void topk_element_hash_delete_func(void* val)
|
||||
{
|
||||
Element* e = (Element*) val;
|
||||
delete e;
|
||||
}
|
||||
|
||||
Element::~Element()
|
||||
{
|
||||
Unref(value);
|
||||
}
|
||||
|
||||
void TopkVal::Typify(BroType* t)
|
||||
{
|
||||
assert(!hash && !type);
|
||||
type = t->Ref();
|
||||
TypeList* tl = new TypeList(t);
|
||||
tl->Append(t->Ref());
|
||||
hash = new CompositeHash(tl);
|
||||
Unref(tl);
|
||||
}
|
||||
|
||||
HashKey* TopkVal::GetHash(Val* v) const
|
||||
{
|
||||
HashKey* key = hash->ComputeHash(v, 1);
|
||||
assert(key);
|
||||
return key;
|
||||
}
|
||||
|
||||
TopkVal::TopkVal(uint64 arg_size) : OpaqueVal(topk_type)
|
||||
{
|
||||
elementDict = new PDict(Element);
|
||||
elementDict->SetDeleteFunc(topk_element_hash_delete_func);
|
||||
size = arg_size;
|
||||
type = 0;
|
||||
numElements = 0;
|
||||
pruned = false;
|
||||
hash = 0;
|
||||
}
|
||||
|
||||
TopkVal::TopkVal() : OpaqueVal(topk_type)
|
||||
{
|
||||
elementDict = new PDict(Element);
|
||||
elementDict->SetDeleteFunc(topk_element_hash_delete_func);
|
||||
size = 0;
|
||||
type = 0;
|
||||
numElements = 0;
|
||||
hash = 0;
|
||||
}
|
||||
|
||||
TopkVal::~TopkVal()
|
||||
{
|
||||
elementDict->Clear();
|
||||
delete elementDict;
|
||||
|
||||
// now all elements are already gone - delete the buckets
|
||||
std::list<Bucket*>::iterator bi = buckets.begin();
|
||||
while ( bi != buckets.end() )
|
||||
{
|
||||
delete *bi;
|
||||
bi++;
|
||||
}
|
||||
|
||||
Unref(type);
|
||||
delete hash;
|
||||
}
|
||||
|
||||
void TopkVal::Merge(const TopkVal* value, bool doPrune)
|
||||
{
|
||||
if ( type == 0 )
|
||||
{
|
||||
assert(numElements == 0);
|
||||
Typify(value->type);
|
||||
}
|
||||
|
||||
else
|
||||
{
|
||||
if ( ! same_type(type, value->type) )
|
||||
{
|
||||
reporter->Error("Cannot merge top-k elements of differing types.");
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
std::list<Bucket*>::const_iterator it = value->buckets.begin();
|
||||
while ( it != value->buckets.end() )
|
||||
{
|
||||
Bucket* b = *it;
|
||||
uint64_t currcount = b->count;
|
||||
std::list<Element*>::const_iterator eit = b->elements.begin();
|
||||
|
||||
while ( eit != b->elements.end() )
|
||||
{
|
||||
Element* e = *eit;
|
||||
// lookup if we already know this one...
|
||||
HashKey* key = GetHash(e->value);
|
||||
Element* olde = (Element*) elementDict->Lookup(key);
|
||||
|
||||
if ( olde == 0 )
|
||||
{
|
||||
olde = new Element();
|
||||
olde->epsilon = 0;
|
||||
olde->value = e->value->Ref();
|
||||
// insert at bucket position 0
|
||||
if ( buckets.size() > 0 )
|
||||
{
|
||||
assert (buckets.front()-> count > 0 );
|
||||
}
|
||||
|
||||
Bucket* newbucket = new Bucket();
|
||||
newbucket->count = 0;
|
||||
newbucket->bucketPos = buckets.insert(buckets.begin(), newbucket);
|
||||
|
||||
olde->parent = newbucket;
|
||||
newbucket->elements.insert(newbucket->elements.end(), olde);
|
||||
|
||||
elementDict->Insert(key, olde);
|
||||
numElements++;
|
||||
|
||||
}
|
||||
|
||||
// now that we are sure that the old element is present - increment epsilon
|
||||
olde->epsilon += e->epsilon;
|
||||
|
||||
// and increment position...
|
||||
IncrementCounter(olde, currcount);
|
||||
delete key;
|
||||
|
||||
eit++;
|
||||
}
|
||||
|
||||
it++;
|
||||
}
|
||||
|
||||
// now we have added everything. And our top-k table could be too big.
|
||||
// prune everything...
|
||||
|
||||
assert(size > 0);
|
||||
|
||||
if ( ! doPrune )
|
||||
return;
|
||||
|
||||
while ( numElements > size )
|
||||
{
|
||||
pruned = true;
|
||||
assert(buckets.size() > 0 );
|
||||
Bucket* b = buckets.front();
|
||||
assert(b->elements.size() > 0);
|
||||
|
||||
Element* e = b->elements.front();
|
||||
HashKey* key = GetHash(e->value);
|
||||
elementDict->RemoveEntry(key);
|
||||
delete e;
|
||||
|
||||
b->elements.pop_front();
|
||||
|
||||
if ( b->elements.size() == 0 )
|
||||
{
|
||||
delete b;
|
||||
buckets.pop_front();
|
||||
}
|
||||
|
||||
numElements--;
|
||||
}
|
||||
}
|
||||
|
||||
bool TopkVal::DoSerialize(SerialInfo* info) const
|
||||
{
|
||||
DO_SERIALIZE(SER_TOPK_VAL, OpaqueVal);
|
||||
|
||||
bool v = true;
|
||||
|
||||
v &= SERIALIZE(size);
|
||||
v &= SERIALIZE(numElements);
|
||||
v &= SERIALIZE(pruned);
|
||||
|
||||
bool type_present = (type != 0);
|
||||
v &= SERIALIZE(type_present);
|
||||
|
||||
if ( type_present )
|
||||
v &= type->Serialize(info);
|
||||
else
|
||||
assert(numElements == 0);
|
||||
|
||||
uint64_t i = 0;
|
||||
std::list<Bucket*>::const_iterator it = buckets.begin();
|
||||
while ( it != buckets.end() )
|
||||
{
|
||||
Bucket* b = *it;
|
||||
uint32_t elements_count = b->elements.size();
|
||||
v &= SERIALIZE(elements_count);
|
||||
v &= SERIALIZE(b->count);
|
||||
|
||||
std::list<Element*>::const_iterator eit = b->elements.begin();
|
||||
while ( eit != b->elements.end() )
|
||||
{
|
||||
Element* element = *eit;
|
||||
v &= SERIALIZE(element->epsilon);
|
||||
v &= element->value->Serialize(info);
|
||||
|
||||
eit++;
|
||||
i++;
|
||||
}
|
||||
|
||||
it++;
|
||||
}
|
||||
|
||||
assert(i == numElements);
|
||||
|
||||
return v;
|
||||
}
|
||||
|
||||
bool TopkVal::DoUnserialize(UnserialInfo* info)
|
||||
{
|
||||
DO_UNSERIALIZE(OpaqueVal);
|
||||
|
||||
bool v = true;
|
||||
|
||||
v &= UNSERIALIZE(&size);
|
||||
v &= UNSERIALIZE(&numElements);
|
||||
v &= UNSERIALIZE(&pruned);
|
||||
|
||||
bool type_present = false;
|
||||
v &= UNSERIALIZE(&type_present);
|
||||
if ( type_present )
|
||||
{
|
||||
BroType* deserialized_type = BroType::Unserialize(info);
|
||||
|
||||
Typify(deserialized_type);
|
||||
Unref(deserialized_type);
|
||||
assert(type);
|
||||
}
|
||||
else
|
||||
assert(numElements == 0);
|
||||
|
||||
uint64_t i = 0;
|
||||
while ( i < numElements )
|
||||
{
|
||||
Bucket* b = new Bucket();
|
||||
uint32_t elements_count;
|
||||
v &= UNSERIALIZE(&elements_count);
|
||||
v &= UNSERIALIZE(&b->count);
|
||||
b->bucketPos = buckets.insert(buckets.end(), b);
|
||||
|
||||
for ( uint64_t j = 0; j < elements_count; j++ )
|
||||
{
|
||||
Element* e = new Element();
|
||||
v &= UNSERIALIZE(&e->epsilon);
|
||||
e->value = Val::Unserialize(info, type);
|
||||
e->parent = b;
|
||||
|
||||
b->elements.insert(b->elements.end(), e);
|
||||
|
||||
HashKey* key = GetHash(e->value);
|
||||
assert (elementDict->Lookup(key) == 0);
|
||||
|
||||
elementDict->Insert(key, e);
|
||||
delete key;
|
||||
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
assert(i == numElements);
|
||||
|
||||
return v;
|
||||
}
|
||||
|
||||
|
||||
VectorVal* TopkVal::GetTopK(int k) const // returns vector
|
||||
{
|
||||
if ( numElements == 0 )
|
||||
{
|
||||
reporter->Error("Cannot return topk of empty");
|
||||
return 0;
|
||||
}
|
||||
|
||||
TypeList* vector_index = new TypeList(type);
|
||||
vector_index->Append(type->Ref());
|
||||
VectorType* v = new VectorType(vector_index);
|
||||
VectorVal* t = new VectorVal(v);
|
||||
|
||||
// this does no estimation if the results is correct!
|
||||
// in any case - just to make this future-proof (and I am lazy) - this can return more than k.
|
||||
|
||||
int read = 0;
|
||||
std::list<Bucket*>::const_iterator it = buckets.end();
|
||||
it--;
|
||||
while (read < k )
|
||||
{
|
||||
//printf("Bucket %llu\n", (*it)->count);
|
||||
std::list<Element*>::iterator eit = (*it)->elements.begin();
|
||||
while ( eit != (*it)->elements.end() )
|
||||
{
|
||||
//printf("Size: %ld\n", (*it)->elements.size());
|
||||
t->Assign(read, (*eit)->value->Ref());
|
||||
read++;
|
||||
eit++;
|
||||
}
|
||||
|
||||
if ( it == buckets.begin() )
|
||||
break;
|
||||
|
||||
it--;
|
||||
}
|
||||
|
||||
Unref(v);
|
||||
return t;
|
||||
}
|
||||
|
||||
uint64_t TopkVal::GetCount(Val* value) const
|
||||
{
|
||||
HashKey* key = GetHash(value);
|
||||
Element* e = (Element*) elementDict->Lookup(key);
|
||||
|
||||
if ( e == 0 )
|
||||
{
|
||||
reporter->Error("GetCount for element that is not in top-k");
|
||||
return 0;
|
||||
}
|
||||
|
||||
delete key;
|
||||
return e->parent->count;
|
||||
}
|
||||
|
||||
uint64_t TopkVal::GetEpsilon(Val* value) const
|
||||
{
|
||||
HashKey* key = GetHash(value);
|
||||
Element* e = (Element*) elementDict->Lookup(key);
|
||||
|
||||
if ( e == 0 )
|
||||
{
|
||||
reporter->Error("GetEpsilon for element that is not in top-k");
|
||||
return 0;
|
||||
}
|
||||
|
||||
delete key;
|
||||
return e->epsilon;
|
||||
}
|
||||
|
||||
uint64_t TopkVal::GetSum() const
|
||||
{
|
||||
uint64_t sum = 0;
|
||||
|
||||
std::list<Bucket*>::const_iterator it = buckets.begin();
|
||||
while ( it != buckets.end() )
|
||||
{
|
||||
sum += (*it)->elements.size() * (*it)->count;
|
||||
|
||||
it++;
|
||||
}
|
||||
|
||||
if ( pruned )
|
||||
reporter->Warning("TopkVal::GetSum() was used on a pruned data structure. Result values do not represent total element count");
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
void TopkVal::Encountered(Val* encountered)
|
||||
{
|
||||
// ok, let's see if we already know this one.
|
||||
|
||||
if ( numElements == 0 )
|
||||
Typify(encountered->Type());
|
||||
else
|
||||
if ( ! same_type(type, encountered->Type()) )
|
||||
{
|
||||
reporter->Error("Trying to add element to topk with differing type from other elements");
|
||||
return;
|
||||
}
|
||||
|
||||
// Step 1 - get the hash.
|
||||
HashKey* key = GetHash(encountered);
|
||||
Element* e = (Element*) elementDict->Lookup(key);
|
||||
|
||||
if ( e == 0 )
|
||||
{
|
||||
e = new Element();
|
||||
e->epsilon = 0;
|
||||
e->value = encountered->Ref(); // or no ref?
|
||||
|
||||
// well, we do not know this one yet...
|
||||
if ( numElements < size )
|
||||
{
|
||||
// brilliant. just add it at position 1
|
||||
if ( buckets.size() == 0 || (*buckets.begin())->count > 1 )
|
||||
{
|
||||
Bucket* b = new Bucket();
|
||||
b->count = 1;
|
||||
std::list<Bucket*>::iterator pos = buckets.insert(buckets.begin(), b);
|
||||
b->bucketPos = pos;
|
||||
b->elements.insert(b->elements.end(), e);
|
||||
e->parent = b;
|
||||
}
|
||||
else
|
||||
{
|
||||
Bucket* b = *buckets.begin();
|
||||
assert(b->count == 1);
|
||||
b->elements.insert(b->elements.end(), e);
|
||||
e->parent = b;
|
||||
}
|
||||
|
||||
elementDict->Insert(key, e);
|
||||
numElements++;
|
||||
delete key;
|
||||
|
||||
return; // done. it is at pos 1.
|
||||
}
|
||||
|
||||
else
|
||||
{
|
||||
// replace element with min-value
|
||||
Bucket* b = *buckets.begin(); // bucket with smallest elements
|
||||
|
||||
// evict oldest element with least hits.
|
||||
assert(b->elements.size() > 0);
|
||||
HashKey* deleteKey = GetHash((*(b->elements.begin()))->value);
|
||||
b->elements.erase(b->elements.begin());
|
||||
Element* deleteElement = (Element*) elementDict->RemoveEntry(deleteKey);
|
||||
assert(deleteElement); // there has to have been a minimal element...
|
||||
delete deleteElement;
|
||||
delete deleteKey;
|
||||
|
||||
// and add the new one to the end
|
||||
e->epsilon = b->count;
|
||||
b->elements.insert(b->elements.end(), e);
|
||||
elementDict->Insert(key, e);
|
||||
e->parent = b;
|
||||
|
||||
// fallthrough, increment operation has to run!
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// ok, we now have an element in e
|
||||
delete key;
|
||||
IncrementCounter(e); // well, this certainly was anticlimatic.
|
||||
}
|
||||
|
||||
// increment by count
|
||||
void TopkVal::IncrementCounter(Element* e, unsigned int count)
|
||||
{
|
||||
Bucket* currBucket = e->parent;
|
||||
uint64 currcount = currBucket->count;
|
||||
|
||||
// well, let's test if there is a bucket for currcount++
|
||||
std::list<Bucket*>::iterator bucketIter = currBucket->bucketPos;
|
||||
|
||||
Bucket* nextBucket = 0;
|
||||
|
||||
bucketIter++;
|
||||
|
||||
while ( bucketIter != buckets.end() && (*bucketIter)->count < currcount+count )
|
||||
bucketIter++;
|
||||
|
||||
if ( bucketIter != buckets.end() && (*bucketIter)->count == currcount+count )
|
||||
nextBucket = *bucketIter;
|
||||
|
||||
if ( nextBucket == 0 )
|
||||
{
|
||||
// the bucket for the value that we want does not exist.
|
||||
// create it...
|
||||
|
||||
Bucket* b = new Bucket();
|
||||
b->count = currcount+count;
|
||||
|
||||
std::list<Bucket*>::iterator nextBucketPos = buckets.insert(bucketIter, b);
|
||||
b->bucketPos = nextBucketPos; // and give it the iterator we know now.
|
||||
|
||||
nextBucket = b;
|
||||
}
|
||||
|
||||
// ok, now we have the new bucket in nextBucket. Shift the element over...
|
||||
currBucket->elements.remove(e);
|
||||
nextBucket->elements.insert(nextBucket->elements.end(), e);
|
||||
|
||||
e->parent = nextBucket;
|
||||
|
||||
// if currBucket is empty, we have to delete it now
|
||||
if ( currBucket->elements.size() == 0 )
|
||||
{
|
||||
buckets.remove(currBucket);
|
||||
delete currBucket;
|
||||
currBucket = 0;
|
||||
}
|
||||
}
|
||||
|
||||
};
|
170
src/probabilistic/Topk.h
Normal file
170
src/probabilistic/Topk.h
Normal file
|
@ -0,0 +1,170 @@
|
|||
// See the file "COPYING" in the main distribution directory for copyright.
|
||||
|
||||
#ifndef topk_h
|
||||
#define topk_h
|
||||
|
||||
#include <list>
|
||||
#include "Val.h"
|
||||
#include "CompHash.h"
|
||||
#include "OpaqueVal.h"
|
||||
|
||||
// This class implements the top-k algorithm. Or - to be more precise - an
|
||||
// interpretation of it.
|
||||
|
||||
namespace probabilistic {
|
||||
|
||||
struct Element;
|
||||
|
||||
struct Bucket {
|
||||
uint64 count;
|
||||
std::list<Element*> elements;
|
||||
|
||||
// Iterators only get invalidated for removed elements. This one
|
||||
// points to us - so it is invalid when we are no longer there. Cute,
|
||||
// isn't it?
|
||||
std::list<Bucket*>::iterator bucketPos;
|
||||
};
|
||||
|
||||
struct Element {
|
||||
uint64 epsilon;
|
||||
Val* value;
|
||||
Bucket* parent;
|
||||
|
||||
~Element();
|
||||
};
|
||||
|
||||
declare(PDict, Element);
|
||||
|
||||
class TopkVal : public OpaqueVal {
|
||||
|
||||
public:
|
||||
/**
|
||||
* Construct a TopkVal.
|
||||
*
|
||||
* @param size specifies how many total elements are tracked
|
||||
*
|
||||
* @return A newly initialized TopkVal
|
||||
*/
|
||||
TopkVal(uint64 size);
|
||||
|
||||
/**
|
||||
* Destructor.
|
||||
*/
|
||||
~TopkVal();
|
||||
|
||||
/**
|
||||
* Call this when a new value is encountered. Note that on the first
|
||||
* call, the Bro type of the value types that are counted is set. All
|
||||
* following calls to encountered have to specify the same type.
|
||||
*
|
||||
* @param value The encountered element
|
||||
*/
|
||||
void Encountered(Val* value);
|
||||
|
||||
/**
|
||||
* Get the first *k* elements of the result vector. At the moment,
|
||||
* this does not check if it is in the right order or if we can prove
|
||||
* that these are the correct top-k. Use count and epsilon for this.
|
||||
*
|
||||
* @param k Number of top-elements to return
|
||||
*
|
||||
* @returns The top-k encountered elements
|
||||
*/
|
||||
VectorVal* GetTopK(int k) const;
|
||||
|
||||
/**
|
||||
* Get the current count tracked in the top-k data structure for a
|
||||
* certain val. Returns 0 if the val is unknown (and logs the error
|
||||
* to reporter).
|
||||
*
|
||||
* @param value Bro value to get counts for
|
||||
*
|
||||
* @returns internal count for val, 0 if unknown
|
||||
*/
|
||||
uint64_t GetCount(Val* value) const;
|
||||
|
||||
/**
|
||||
* Get the current epsilon tracked in the top-k data structure for a
|
||||
* certain val.
|
||||
*
|
||||
* @param value Bro value to get epsilons for
|
||||
*
|
||||
* @returns the epsilon. Returns 0 if the val is unknown (and logs
|
||||
* the error to reporter)
|
||||
*/
|
||||
uint64_t GetEpsilon(Val* value) const;
|
||||
|
||||
/**
|
||||
* Get the size set in the constructor
|
||||
*
|
||||
* @returns size of the top-k structure
|
||||
*/
|
||||
uint64_t GetSize() const { return size; }
|
||||
|
||||
/**
|
||||
* Get the sum of all counts of all tracked elements. This is equal
|
||||
* to the number of total observations up to this moment, if no
|
||||
* elements were pruned from the data structure.
|
||||
*
|
||||
* @returns sum of all counts
|
||||
*/
|
||||
uint64_t GetSum() const;
|
||||
|
||||
/**
|
||||
* Merge another top-k data structure into this one. doPrune
|
||||
* specifies if the total count of elements is limited to size after
|
||||
* merging. Please note, that pruning will invalidate the results of
|
||||
* getSum.
|
||||
*
|
||||
* @param value TopkVal to merge into this TopkVal
|
||||
*
|
||||
* @param doPrune prune resulting TopkVal to size after merging
|
||||
*/
|
||||
void Merge(const TopkVal* value, bool doPrune=false);
|
||||
|
||||
protected:
|
||||
/**
|
||||
* Construct an empty TopkVal. Only used for deserialization
|
||||
*/
|
||||
TopkVal();
|
||||
|
||||
private:
|
||||
/**
|
||||
* Increment the counter for a specific element
|
||||
*
|
||||
* @param e element to increment counter for
|
||||
*
|
||||
* @param count increment counter by this much
|
||||
*/
|
||||
void IncrementCounter(Element* e, unsigned int count = 1);
|
||||
|
||||
/**
|
||||
* get the hashkey for a specific value
|
||||
*
|
||||
* @param v value to generate key for
|
||||
*
|
||||
* @returns HashKey for value
|
||||
*/
|
||||
HashKey* GetHash(Val* v) const; // this probably should go somewhere else.
|
||||
|
||||
/**
|
||||
* Set the type that this TopK instance tracks
|
||||
*
|
||||
* @param t type that is tracked
|
||||
*/
|
||||
void Typify(BroType* t);
|
||||
|
||||
BroType* type;
|
||||
CompositeHash* hash;
|
||||
std::list<Bucket*> buckets;
|
||||
PDict(Element)* elementDict;
|
||||
uint64 size; // how many elements are we tracking?
|
||||
uint64 numElements; // how many elements do we have at the moment
|
||||
bool pruned; // was this data structure pruned?
|
||||
|
||||
DECLARE_SERIAL(TopkVal);
|
||||
};
|
||||
|
||||
};
|
||||
|
||||
#endif
|
|
@ -20,23 +20,20 @@ module GLOBAL;
|
|||
|
||||
## Creates a basic Bloom filter.
|
||||
##
|
||||
## .. note:: A Bloom filter can have a name associated with it. In the future,
|
||||
## Bloom filters with the same name will be compatible across indepedent Bro
|
||||
## instances, i.e., it will be possible to merge them. Currently, however, that is
|
||||
## not yet supported.
|
||||
##
|
||||
## fp: The desired false-positive rate.
|
||||
##
|
||||
## capacity: the maximum number of elements that guarantees a false-positive
|
||||
## rate of *fp*.
|
||||
##
|
||||
## name: A name that uniquely identifies and seeds the Bloom filter. If empty,
|
||||
## the filter will remain tied to the current Bro process.
|
||||
## the filter will use :bro:id:`global_hash_seed` if that's set, and otherwise use
|
||||
## a local seed tied to the current Bro process. Only filters with the same seed
|
||||
## can be merged with :bro:id:`bloomfilter_merge` .
|
||||
##
|
||||
## Returns: A Bloom filter handle.
|
||||
##
|
||||
## .. bro:see:: bloomfilter_counting_init bloomfilter_add bloomfilter_lookup
|
||||
## bloomfilter_clear bloomfilter_merge
|
||||
## .. bro:see:: bloomfilter_basic_init2 bloomfilter_counting_init bloomfilter_add
|
||||
## bloomfilter_lookup bloomfilter_clear bloomfilter_merge global_hash_seed
|
||||
function bloomfilter_basic_init%(fp: double, capacity: count,
|
||||
name: string &default=""%): opaque of bloomfilter
|
||||
%{
|
||||
|
@ -48,18 +45,53 @@ function bloomfilter_basic_init%(fp: double, capacity: count,
|
|||
|
||||
size_t cells = BasicBloomFilter::M(fp, capacity);
|
||||
size_t optimal_k = BasicBloomFilter::K(cells, capacity);
|
||||
const Hasher* h = Hasher::Create(optimal_k, name->CheckString());
|
||||
size_t seed = Hasher::MakeSeed(name->Len() > 0 ? name->Bytes() : 0,
|
||||
name->Len());
|
||||
const Hasher* h = new DefaultHasher(optimal_k, seed);
|
||||
|
||||
return new BloomFilterVal(new BasicBloomFilter(h, cells));
|
||||
%}
|
||||
|
||||
## Creates a basic Bloom filter. This function serves as a low-level
|
||||
## alternative to bloomfilter_basic_init where the user has full control over
|
||||
## the number of hash functions and cells in the underlying bit vector.
|
||||
##
|
||||
## k: The number of hash functions to use.
|
||||
##
|
||||
## cells: The number of cells of the underlying bit vector.
|
||||
##
|
||||
## name: A name that uniquely identifies and seeds the Bloom filter. If empty,
|
||||
## the filter will use :bro:id:`global_hash_seed` if that's set, and otherwise use
|
||||
## a local seed tied to the current Bro process. Only filters with the same seed
|
||||
## can be merged with :bro:id:`bloomfilter_merge` .
|
||||
##
|
||||
## Returns: A Bloom filter handle.
|
||||
##
|
||||
## .. bro:see:: bloom_filter_basic_init bloomfilter_counting_init bloomfilter_add
|
||||
## bloomfilter_lookup bloomfilter_clear bloomfilter_merge global_hash_seed
|
||||
function bloomfilter_basic_init2%(k: count, cells: count,
|
||||
name: string &default=""%): opaque of bloomfilter
|
||||
%{
|
||||
if ( k == 0 )
|
||||
{
|
||||
reporter->Error("number of hash functions must be non-negative");
|
||||
return 0;
|
||||
}
|
||||
if ( cells == 0 )
|
||||
{
|
||||
reporter->Error("number of cells must be non-negative");
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_t seed = Hasher::MakeSeed(name->Len() > 0 ? name->Bytes() : 0,
|
||||
name->Len());
|
||||
const Hasher* h = new DefaultHasher(k, seed);
|
||||
|
||||
return new BloomFilterVal(new BasicBloomFilter(h, cells));
|
||||
%}
|
||||
|
||||
## Creates a counting Bloom filter.
|
||||
##
|
||||
## .. note:: A Bloom filter can have a name associated with it. In the future,
|
||||
## Bloom filters with the same name will be compatible across indepedent Bro
|
||||
## instances, i.e., it will be possible to merge them. Currently, however, that is
|
||||
## not yet supported.
|
||||
##
|
||||
## k: The number of hash functions to use.
|
||||
##
|
||||
## cells: The number of cells of the underlying counter vector. As there's no
|
||||
|
@ -71,12 +103,14 @@ function bloomfilter_basic_init%(fp: double, capacity: count,
|
|||
## becomes a cell of size *w* bits.
|
||||
##
|
||||
## name: A name that uniquely identifies and seeds the Bloom filter. If empty,
|
||||
## the filter will remain tied to the current Bro process.
|
||||
## the filter will use :bro:id:`global_hash_seed` if that's set, and otherwise use
|
||||
## a local seed tied to the current Bro process. Only filters with the same seed
|
||||
## can be merged with :bro:id:`bloomfilter_merge` .
|
||||
##
|
||||
## Returns: A Bloom filter handle.
|
||||
##
|
||||
## .. bro:see:: bloomfilter_basic_init bloomfilter_add bloomfilter_lookup
|
||||
## bloomfilter_clear bloomfilter_merge
|
||||
## .. bro:see:: bloomfilter_basic_init bloomfilter_basic_init2 bloomfilter_add
|
||||
## bloomfilter_lookup bloomfilter_clear bloomfilter_merge global_hash_seed
|
||||
function bloomfilter_counting_init%(k: count, cells: count, max: count,
|
||||
name: string &default=""%): opaque of bloomfilter
|
||||
%{
|
||||
|
@ -86,7 +120,10 @@ function bloomfilter_counting_init%(k: count, cells: count, max: count,
|
|||
return 0;
|
||||
}
|
||||
|
||||
const Hasher* h = Hasher::Create(k, name->CheckString());
|
||||
size_t seed = Hasher::MakeSeed(name->Len() > 0 ? name->Bytes() : 0,
|
||||
name->Len());
|
||||
|
||||
const Hasher* h = new DefaultHasher(k, seed);
|
||||
|
||||
uint16 width = 1;
|
||||
while ( max >>= 1 )
|
||||
|
@ -101,8 +138,9 @@ function bloomfilter_counting_init%(k: count, cells: count, max: count,
|
|||
##
|
||||
## x: The element to add.
|
||||
##
|
||||
## .. bro:see:: bloomfilter_counting_init bloomfilter_basic_init loomfilter_lookup
|
||||
## bloomfilter_clear bloomfilter_merge
|
||||
## .. bro:see:: bloomfilter_basic_init bloomfilter_basic_init2
|
||||
## bloomfilter_counting_init bloomfilter_lookup bloomfilter_clear
|
||||
## bloomfilter_merge
|
||||
function bloomfilter_add%(bf: opaque of bloomfilter, x: any%): any
|
||||
%{
|
||||
BloomFilterVal* bfv = static_cast<BloomFilterVal*>(bf);
|
||||
|
@ -127,8 +165,9 @@ function bloomfilter_add%(bf: opaque of bloomfilter, x: any%): any
|
|||
##
|
||||
## Returns: the counter associated with *x* in *bf*.
|
||||
##
|
||||
## .. bro:see:: bloomfilter_counting_init bloomfilter_basic_init
|
||||
## bloomfilter_add bloomfilter_clear bloomfilter_merge
|
||||
## .. bro:see:: bloomfilter_basic_init bloomfilter_basic_init2
|
||||
## bloomfilter_counting_init bloomfilter_add bloomfilter_clear
|
||||
## bloomfilter_merge
|
||||
function bloomfilter_lookup%(bf: opaque of bloomfilter, x: any%): count
|
||||
%{
|
||||
const BloomFilterVal* bfv = static_cast<const BloomFilterVal*>(bf);
|
||||
|
@ -154,8 +193,9 @@ function bloomfilter_lookup%(bf: opaque of bloomfilter, x: any%): count
|
|||
##
|
||||
## bf: The Bloom filter handle.
|
||||
##
|
||||
## .. bro:see:: bloomfilter_counting_init bloomfilter_basic_init
|
||||
## bloomfilter_add bloomfilter_lookup bloomfilter_merge
|
||||
## .. bro:see:: bloomfilter_basic_init bloomfilter_counting_init2
|
||||
## bloomfilter_counting_init bloomfilter_add bloomfilter_lookup
|
||||
## bloomfilter_merge
|
||||
function bloomfilter_clear%(bf: opaque of bloomfilter%): any
|
||||
%{
|
||||
BloomFilterVal* bfv = static_cast<BloomFilterVal*>(bf);
|
||||
|
@ -178,15 +218,18 @@ function bloomfilter_clear%(bf: opaque of bloomfilter%): any
|
|||
##
|
||||
## Returns: The union of *bf1* and *bf2*.
|
||||
##
|
||||
## .. bro:see:: bloomfilter_counting_init bloomfilter_basic_init
|
||||
## bloomfilter_add bloomfilter_lookup bloomfilter_clear
|
||||
## .. bro:see:: bloomfilter_basic_init bloomfilter_basic_init2
|
||||
## bloomfilter_counting_init bloomfilter_add bloomfilter_lookup
|
||||
## bloomfilter_clear
|
||||
function bloomfilter_merge%(bf1: opaque of bloomfilter,
|
||||
bf2: opaque of bloomfilter%): opaque of bloomfilter
|
||||
%{
|
||||
const BloomFilterVal* bfv1 = static_cast<const BloomFilterVal*>(bf1);
|
||||
const BloomFilterVal* bfv2 = static_cast<const BloomFilterVal*>(bf2);
|
||||
|
||||
if ( ! same_type(bfv1->Type(), bfv2->Type()) )
|
||||
if ( bfv1->Type() && // any one 0 is ok here
|
||||
bfv2->Type() &&
|
||||
! same_type(bfv1->Type(), bfv2->Type()) )
|
||||
{
|
||||
reporter->Error("incompatible Bloom filter types");
|
||||
return 0;
|
||||
|
@ -194,3 +237,13 @@ function bloomfilter_merge%(bf1: opaque of bloomfilter,
|
|||
|
||||
return BloomFilterVal::Merge(bfv1, bfv2);
|
||||
%}
|
||||
|
||||
## Returns a string with a representation of a Bloom filter's internal
|
||||
## state. This is for debugging/testing purposes only.
|
||||
##
|
||||
## bf: The Bloom filter handle.
|
||||
function bloomfilter_internal_state%(bf: opaque of bloomfilter%): string
|
||||
%{
|
||||
BloomFilterVal* bfv = static_cast<BloomFilterVal*>(bf);
|
||||
return new StringVal(bfv->InternalState());
|
||||
%}
|
||||
|
|
184
src/probabilistic/top-k.bif
Normal file
184
src/probabilistic/top-k.bif
Normal file
|
@ -0,0 +1,184 @@
|
|||
# ===========================================================================
|
||||
#
|
||||
# Top-K Functions
|
||||
#
|
||||
# ===========================================================================
|
||||
|
||||
|
||||
%%{
|
||||
#include "probabilistic/Topk.h"
|
||||
%%}
|
||||
|
||||
## Creates a top-k data structure which tracks *size* elements.
|
||||
##
|
||||
## size: number of elements to track
|
||||
##
|
||||
## Returns: Opaque pointer to the data structure.
|
||||
##
|
||||
## .. bro:see:: topk_add topk_get_top topk_count topk_epsilon
|
||||
## topk_size topk_sum topk_merge topk_merge_prune
|
||||
function topk_init%(size: count%): opaque of topk
|
||||
%{
|
||||
probabilistic::TopkVal* v = new probabilistic::TopkVal(size);
|
||||
return v;
|
||||
%}
|
||||
|
||||
## Add a new observed object to the data structure.
|
||||
##
|
||||
## .. note:: The first added object sets the type of data tracked by
|
||||
## the top-k data structure. All following values have to be of the same
|
||||
## type.
|
||||
##
|
||||
## handle: the TopK handle
|
||||
##
|
||||
## value: observed value
|
||||
##
|
||||
## .. bro:see:: topk_init topk_get_top topk_count topk_epsilon
|
||||
## topk_size topk_sum topk_merge topk_merge_prune
|
||||
function topk_add%(handle: opaque of topk, value: any%): any
|
||||
%{
|
||||
assert(handle);
|
||||
probabilistic::TopkVal* h = (probabilistic::TopkVal*) handle;
|
||||
h->Encountered(value);
|
||||
|
||||
return 0;
|
||||
%}
|
||||
|
||||
## Get the first *k* elements of the top-k data structure.
|
||||
##
|
||||
## handle: the TopK handle
|
||||
##
|
||||
## k: number of elements to return
|
||||
##
|
||||
## Returns: vector of the first k elements
|
||||
##
|
||||
## .. bro:see:: topk_init topk_add topk_count topk_epsilon
|
||||
## topk_size topk_sum topk_merge topk_merge_prune
|
||||
function topk_get_top%(handle: opaque of topk, k: count%): any
|
||||
%{
|
||||
assert(handle);
|
||||
probabilistic::TopkVal* h = (probabilistic::TopkVal*) handle;
|
||||
return h->GetTopK(k);
|
||||
%}
|
||||
|
||||
## Get an overestimated count of how often value has been encountered.
|
||||
##
|
||||
## .. note:: value has to be part of the currently tracked elements, otherwise
|
||||
## 0 will be returned and an error message will be added to reporter.
|
||||
##
|
||||
## handle: the TopK handle
|
||||
##
|
||||
## value: Value to look up count for.
|
||||
##
|
||||
## Returns: Overestimated number for how often the element has been encountered
|
||||
##
|
||||
## .. bro:see:: topk_init topk_add topk_get_top topk_epsilon
|
||||
## topk_size topk_sum topk_merge topk_merge_prune
|
||||
function topk_count%(handle: opaque of topk, value: any%): count
|
||||
%{
|
||||
assert(handle);
|
||||
probabilistic::TopkVal* h = (probabilistic::TopkVal*) handle;
|
||||
return new Val(h->GetCount(value), TYPE_COUNT);
|
||||
%}
|
||||
|
||||
## Get the maximal overestimation for count.
|
||||
##
|
||||
## .. note:: Same restrictions as for :bro:id:`topk_count` apply.
|
||||
##
|
||||
## handle: the TopK handle
|
||||
##
|
||||
## value: Value to look up epsilon for.
|
||||
##
|
||||
## Returns: Number which represents the maximal overesimation for the count of this element.
|
||||
##
|
||||
## .. bro:see:: topk_init topk_add topk_get_top topk_count
|
||||
## topk_size topk_sum topk_merge topk_merge_prune
|
||||
function topk_epsilon%(handle: opaque of topk, value: any%): count
|
||||
%{
|
||||
assert(handle);
|
||||
probabilistic::TopkVal* h = (probabilistic::TopkVal*) handle;
|
||||
return new Val(h->GetEpsilon(value), TYPE_COUNT);
|
||||
%}
|
||||
|
||||
## Get the number of elements this data structure is supposed to track (given on init).
|
||||
##
|
||||
## .. note ::Note that the actual number of elements in the data structure can be lower
|
||||
## or higher (due to non-pruned merges) than this.
|
||||
##
|
||||
## handle: the TopK handle
|
||||
##
|
||||
## Returns: size given during initialization
|
||||
##
|
||||
## .. bro:see:: topk_init topk_add topk_get_top topk_count topk_epsilon
|
||||
## topk_sum topk_merge topk_merge_prune
|
||||
function topk_size%(handle: opaque of topk%): count
|
||||
%{
|
||||
assert(handle);
|
||||
probabilistic::TopkVal* h = (probabilistic::TopkVal*) handle;
|
||||
return new Val(h->GetSize(), TYPE_COUNT);
|
||||
%}
|
||||
|
||||
## Get the sum of all counts of all elements in the data structure.
|
||||
##
|
||||
## .. note:: This is equal to the number of all inserted objects if the data structure
|
||||
## never has been pruned. Do not use after calling topk_merge_prune (will throw a
|
||||
## warning message if used afterwards)
|
||||
##
|
||||
## handle: the TopK handle
|
||||
##
|
||||
## Returns: sum of all counts
|
||||
##
|
||||
## .. bro:see:: topk_init topk_add topk_get_top topk_count topk_epsilon
|
||||
## topk_size topk_merge topk_merge_prune
|
||||
function topk_sum%(handle: opaque of topk%): count
|
||||
%{
|
||||
assert(handle);
|
||||
probabilistic::TopkVal* h = (probabilistic::TopkVal*) handle;
|
||||
return new Val(h->GetSum(), TYPE_COUNT);
|
||||
%}
|
||||
|
||||
## Merge the second topk data structure into the first.
|
||||
##
|
||||
## .. note:: This does not remove any elements, the resulting data structure can be
|
||||
## bigger than the maximum size given on initialization.
|
||||
##
|
||||
## .. bro:see:: topk_init topk_add topk_get_top topk_count topk_epsilon
|
||||
## topk_size topk_sum topk_merge_prune
|
||||
function topk_merge%(handle1: opaque of topk, handle2: opaque of topk%): any
|
||||
%{
|
||||
assert(handle1);
|
||||
assert(handle2);
|
||||
|
||||
probabilistic::TopkVal* h1 = (probabilistic::TopkVal*) handle1;
|
||||
probabilistic::TopkVal* h2 = (probabilistic::TopkVal*) handle2;
|
||||
|
||||
h1->Merge(h2);
|
||||
|
||||
return 0;
|
||||
%}
|
||||
|
||||
## Merge the second topk data structure into the first and prunes the final data
|
||||
## structure back to the size given on initialization.
|
||||
##
|
||||
## .. note:: Use with care and only when being aware of the restrictions this
|
||||
## entails. Do not call :bro:id:`topk_size` or :bro:id:`topk_add` afterwards,
|
||||
## results will probably not be what you expect.
|
||||
##
|
||||
## handle1: the TopK handle in which the second TopK structure is merged
|
||||
##
|
||||
## handle2: the TopK handle in which is merged into the first TopK structure
|
||||
##
|
||||
## .. bro:see:: topk_init topk_add topk_get_top topk_count topk_epsilon
|
||||
## topk_size topk_sum topk_merge
|
||||
function topk_merge_prune%(handle1: opaque of topk, handle2: opaque of topk%): any
|
||||
%{
|
||||
assert(handle1);
|
||||
assert(handle2);
|
||||
|
||||
probabilistic::TopkVal* h1 = (probabilistic::TopkVal*) handle1;
|
||||
probabilistic::TopkVal* h2 = (probabilistic::TopkVal*) handle2;
|
||||
|
||||
h1->Merge(h2, true);
|
||||
|
||||
return 0;
|
||||
%}
|
8
testing/btest/Baseline/bifs.bloomfilter-seed/output
Normal file
8
testing/btest/Baseline/bifs.bloomfilter-seed/output
Normal file
|
@ -0,0 +1,8 @@
|
|||
bf1, global_seed, 11979365913534242684
|
||||
bf2, global_seed, 12550100962110750449
|
||||
bf3, my_seed, 12550100962110750449
|
||||
bf4, my_seed, 945716460325754659
|
||||
bf1, global_seed, 12550100962110750449
|
||||
bf2, global_seed, 945716460325754659
|
||||
bf3, my_seed, 12550100962110750449
|
||||
bf4, my_seed, 945716460325754659
|
|
@ -12,6 +12,9 @@ error: false-positive rate must take value between 0 and 1
|
|||
1
|
||||
1
|
||||
1
|
||||
0, no fp
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
|
|
11
testing/btest/Baseline/bifs.topk/.stderr
Normal file
11
testing/btest/Baseline/bifs.topk/.stderr
Normal file
|
@ -0,0 +1,11 @@
|
|||
error: GetCount for element that is not in top-k
|
||||
error: GetEpsilon for element that is not in top-k
|
||||
error: GetCount for element that is not in top-k
|
||||
error: GetEpsilon for element that is not in top-k
|
||||
error: GetCount for element that is not in top-k
|
||||
error: GetEpsilon for element that is not in top-k
|
||||
error: GetCount for element that is not in top-k
|
||||
error: GetEpsilon for element that is not in top-k
|
||||
warning: TopkVal::GetSum() was used on a pruned data structure. Result values do not represent total element count
|
||||
error: GetCount for element that is not in top-k
|
||||
error: GetEpsilon for element that is not in top-k
|
81
testing/btest/Baseline/bifs.topk/out
Normal file
81
testing/btest/Baseline/bifs.topk/out
Normal file
|
@ -0,0 +1,81 @@
|
|||
[b, c]
|
||||
4
|
||||
0
|
||||
0
|
||||
2
|
||||
0
|
||||
2
|
||||
1
|
||||
[d, c]
|
||||
5
|
||||
0
|
||||
0
|
||||
2
|
||||
1
|
||||
3
|
||||
2
|
||||
[d, e]
|
||||
6
|
||||
3
|
||||
2
|
||||
3
|
||||
2
|
||||
[f, e]
|
||||
7
|
||||
4
|
||||
3
|
||||
3
|
||||
2
|
||||
[f, e]
|
||||
8
|
||||
4
|
||||
3
|
||||
4
|
||||
2
|
||||
[g, e]
|
||||
9
|
||||
0
|
||||
0
|
||||
4
|
||||
2
|
||||
5
|
||||
4
|
||||
[c, e, d]
|
||||
19
|
||||
6
|
||||
0
|
||||
5
|
||||
0
|
||||
4
|
||||
0
|
||||
[c, e]
|
||||
6
|
||||
0
|
||||
5
|
||||
0
|
||||
0
|
||||
0
|
||||
[c, e]
|
||||
22
|
||||
12
|
||||
0
|
||||
10
|
||||
0
|
||||
0
|
||||
0
|
||||
[c, e]
|
||||
19
|
||||
6
|
||||
0
|
||||
5
|
||||
0
|
||||
4
|
||||
0
|
||||
[c, e, d]
|
||||
38
|
||||
12
|
||||
0
|
||||
10
|
||||
0
|
||||
8
|
||||
0
|
|
@ -161,6 +161,7 @@ scripts/base/init-default.bro
|
|||
scripts/base/protocols/dns/main.bro
|
||||
scripts/base/protocols/ftp/__load__.bro
|
||||
scripts/base/protocols/ftp/utils-commands.bro
|
||||
scripts/base/protocols/ftp/info.bro
|
||||
scripts/base/protocols/ftp/main.bro
|
||||
scripts/base/protocols/ftp/utils.bro
|
||||
scripts/base/protocols/ftp/files.bro
|
||||
|
|
21
testing/btest/Baseline/istate.topk/out
Normal file
21
testing/btest/Baseline/istate.topk/out
Normal file
|
@ -0,0 +1,21 @@
|
|||
1
|
||||
2
|
||||
6
|
||||
4
|
||||
5
|
||||
1
|
||||
[c, e, d]
|
||||
1
|
||||
2
|
||||
6
|
||||
4
|
||||
5
|
||||
1
|
||||
[c, e, d]
|
||||
2
|
||||
4
|
||||
12
|
||||
8
|
||||
10
|
||||
2
|
||||
[c, e, d]
|
|
@ -0,0 +1,9 @@
|
|||
Top entries for key counter
|
||||
Num: 995, count: 100, epsilon: 0
|
||||
Num: 1, count: 99, epsilon: 0
|
||||
Num: 2, count: 98, epsilon: 0
|
||||
Num: 3, count: 97, epsilon: 0
|
||||
Num: 4, count: 96, epsilon: 0
|
||||
Top entries for key two
|
||||
Num: 2, count: 4, epsilon: 0
|
||||
Num: 1, count: 3, epsilon: 0
|
|
@ -0,0 +1,8 @@
|
|||
Top entries for key counter
|
||||
Num: 1, count: 99, epsilon: 0
|
||||
Num: 2, count: 98, epsilon: 0
|
||||
Num: 3, count: 97, epsilon: 0
|
||||
Num: 4, count: 96, epsilon: 0
|
||||
Num: 5, count: 95, epsilon: 0
|
||||
Top entries for key two
|
||||
Num: 1, count: 2, epsilon: 0
|
40
testing/btest/bifs/bloomfilter-seed.bro
Normal file
40
testing/btest/bifs/bloomfilter-seed.bro
Normal file
|
@ -0,0 +1,40 @@
|
|||
# @TEST-EXEC: bro -b %INPUT global_hash_seed="foo" >>output
|
||||
# @TEST-EXEC: bro -b %INPUT global_hash_seed="my_seed" >>output
|
||||
# @TEST-EXEC: btest-diff output
|
||||
|
||||
type Foo: record
|
||||
{
|
||||
a: count;
|
||||
b: string;
|
||||
};
|
||||
|
||||
function test_bloom_filter()
|
||||
{
|
||||
local bf1 = bloomfilter_basic_init(0.9, 10);
|
||||
bloomfilter_add(bf1, "foo");
|
||||
bloomfilter_add(bf1, "bar");
|
||||
|
||||
local bf2 = bloomfilter_basic_init(0.9, 10);
|
||||
bloomfilter_add(bf2, Foo($a=1, $b="xx"));
|
||||
bloomfilter_add(bf2, Foo($a=2, $b="yy"));
|
||||
|
||||
local bf3 = bloomfilter_basic_init(0.9, 10, "my_seed");
|
||||
bloomfilter_add(bf3, "foo");
|
||||
bloomfilter_add(bf3, "bar");
|
||||
|
||||
local bf4 = bloomfilter_basic_init(0.9, 10, "my_seed");
|
||||
bloomfilter_add(bf4, Foo($a=1, $b="xx"));
|
||||
bloomfilter_add(bf4, Foo($a=2, $b="yy"));
|
||||
|
||||
print "bf1, global_seed", bloomfilter_internal_state(bf1);
|
||||
print "bf2, global_seed", bloomfilter_internal_state(bf2);
|
||||
print "bf3, my_seed", bloomfilter_internal_state(bf3);
|
||||
print "bf4, my_seed", bloomfilter_internal_state(bf4);
|
||||
|
||||
|
||||
}
|
||||
|
||||
event bro_init()
|
||||
{
|
||||
test_bloom_filter();
|
||||
}
|
|
@ -15,14 +15,21 @@ function test_basic_bloom_filter()
|
|||
bloomfilter_add(bf_cnt, 0.5); # Type mismatch
|
||||
bloomfilter_add(bf_cnt, "foo"); # Type mismatch
|
||||
|
||||
# Alternative constructor.
|
||||
local bf_dbl = bloomfilter_basic_init2(4, 10);
|
||||
bloomfilter_add(bf_dbl, 4.2);
|
||||
bloomfilter_add(bf_dbl, 3.14);
|
||||
print bloomfilter_lookup(bf_dbl, 4.2);
|
||||
print bloomfilter_lookup(bf_dbl, 3.14);
|
||||
|
||||
# Basic usage with strings.
|
||||
local bf_str = bloomfilter_basic_init(0.9, 10);
|
||||
bloomfilter_add(bf_str, "foo");
|
||||
bloomfilter_add(bf_str, "bar");
|
||||
print bloomfilter_lookup(bf_str, "foo");
|
||||
print bloomfilter_lookup(bf_str, "bar");
|
||||
print bloomfilter_lookup(bf_str, "b4z"); # FP
|
||||
print bloomfilter_lookup(bf_str, "quux"); # FP
|
||||
print bloomfilter_lookup(bf_str, "b4zzz"), "no fp"; # FP
|
||||
print bloomfilter_lookup(bf_str, "quuux"); # FP
|
||||
bloomfilter_add(bf_str, 0.5); # Type mismatch
|
||||
bloomfilter_add(bf_str, 100); # Type mismatch
|
||||
|
||||
|
@ -45,6 +52,11 @@ function test_basic_bloom_filter()
|
|||
print bloomfilter_lookup(bf_merged, 84);
|
||||
print bloomfilter_lookup(bf_merged, 100);
|
||||
print bloomfilter_lookup(bf_merged, 168);
|
||||
|
||||
#empty filter tests
|
||||
local bf_empty = bloomfilter_basic_init(0.1, 1000);
|
||||
local bf_empty_merged = bloomfilter_merge(bf_merged, bf_empty);
|
||||
print bloomfilter_lookup(bf_empty_merged, 42);
|
||||
}
|
||||
|
||||
function test_counting_bloom_filter()
|
||||
|
|
154
testing/btest/bifs/topk.bro
Normal file
154
testing/btest/bifs/topk.bro
Normal file
|
@ -0,0 +1,154 @@
|
|||
# @TEST-EXEC: bro -b %INPUT > out
|
||||
# @TEST-EXEC: btest-diff out
|
||||
# @TEST-EXEC: btest-diff .stderr
|
||||
|
||||
event bro_init()
|
||||
{
|
||||
local k1 = topk_init(2);
|
||||
|
||||
# first - peculiarity check...
|
||||
topk_add(k1, "a");
|
||||
topk_add(k1, "b");
|
||||
topk_add(k1, "b");
|
||||
topk_add(k1, "c");
|
||||
|
||||
local s = topk_get_top(k1, 5);
|
||||
print s;
|
||||
print topk_sum(k1);
|
||||
print topk_count(k1, "a");
|
||||
print topk_epsilon(k1, "a");
|
||||
print topk_count(k1, "b");
|
||||
print topk_epsilon(k1, "b");
|
||||
print topk_count(k1, "c");
|
||||
print topk_epsilon(k1, "c");
|
||||
|
||||
topk_add(k1, "d");
|
||||
s = topk_get_top(k1, 5);
|
||||
print s;
|
||||
print topk_sum(k1);
|
||||
print topk_count(k1, "b");
|
||||
print topk_epsilon(k1, "b");
|
||||
print topk_count(k1, "c");
|
||||
print topk_epsilon(k1, "c");
|
||||
print topk_count(k1, "d");
|
||||
print topk_epsilon(k1, "d");
|
||||
|
||||
topk_add(k1, "e");
|
||||
s = topk_get_top(k1, 5);
|
||||
print s;
|
||||
print topk_sum(k1);
|
||||
print topk_count(k1, "d");
|
||||
print topk_epsilon(k1, "d");
|
||||
print topk_count(k1, "e");
|
||||
print topk_epsilon(k1, "e");
|
||||
|
||||
topk_add(k1, "f");
|
||||
s = topk_get_top(k1, 5);
|
||||
print s;
|
||||
print topk_sum(k1);
|
||||
print topk_count(k1, "f");
|
||||
print topk_epsilon(k1, "f");
|
||||
print topk_count(k1, "e");
|
||||
print topk_epsilon(k1, "e");
|
||||
|
||||
topk_add(k1, "e");
|
||||
s = topk_get_top(k1, 5);
|
||||
print s;
|
||||
print topk_sum(k1);
|
||||
print topk_count(k1, "f");
|
||||
print topk_epsilon(k1, "f");
|
||||
print topk_count(k1, "e");
|
||||
print topk_epsilon(k1, "e");
|
||||
|
||||
topk_add(k1, "g");
|
||||
s = topk_get_top(k1, 5);
|
||||
print s;
|
||||
print topk_sum(k1);
|
||||
print topk_count(k1, "f");
|
||||
print topk_epsilon(k1, "f");
|
||||
print topk_count(k1, "e");
|
||||
print topk_epsilon(k1, "e");
|
||||
print topk_count(k1, "g");
|
||||
print topk_epsilon(k1, "g");
|
||||
|
||||
k1 = topk_init(100);
|
||||
topk_add(k1, "a");
|
||||
topk_add(k1, "b");
|
||||
topk_add(k1, "b");
|
||||
topk_add(k1, "c");
|
||||
topk_add(k1, "c");
|
||||
topk_add(k1, "c");
|
||||
topk_add(k1, "c");
|
||||
topk_add(k1, "c");
|
||||
topk_add(k1, "c");
|
||||
topk_add(k1, "d");
|
||||
topk_add(k1, "d");
|
||||
topk_add(k1, "d");
|
||||
topk_add(k1, "d");
|
||||
topk_add(k1, "e");
|
||||
topk_add(k1, "e");
|
||||
topk_add(k1, "e");
|
||||
topk_add(k1, "e");
|
||||
topk_add(k1, "e");
|
||||
topk_add(k1, "f");
|
||||
s = topk_get_top(k1, 3);
|
||||
print s;
|
||||
print topk_sum(k1);
|
||||
print topk_count(k1, "c");
|
||||
print topk_epsilon(k1, "c");
|
||||
print topk_count(k1, "e");
|
||||
print topk_epsilon(k1, "d");
|
||||
print topk_count(k1, "d");
|
||||
print topk_epsilon(k1, "d");
|
||||
|
||||
local k3 = topk_init(2);
|
||||
topk_merge_prune(k3, k1);
|
||||
|
||||
s = topk_get_top(k3, 3);
|
||||
print s;
|
||||
print topk_count(k3, "c");
|
||||
print topk_epsilon(k3, "c");
|
||||
print topk_count(k3, "e");
|
||||
print topk_epsilon(k3, "e");
|
||||
print topk_count(k3, "d");
|
||||
print topk_epsilon(k3, "d");
|
||||
|
||||
topk_merge_prune(k3, k1);
|
||||
|
||||
s = topk_get_top(k3, 3);
|
||||
print s;
|
||||
print topk_sum(k3); # this gives a warning and a wrong result.
|
||||
print topk_count(k3, "c");
|
||||
print topk_epsilon(k3, "c");
|
||||
print topk_count(k3, "e");
|
||||
print topk_epsilon(k3, "e");
|
||||
print topk_count(k3, "d");
|
||||
print topk_epsilon(k3, "d");
|
||||
|
||||
k3 = topk_init(2);
|
||||
topk_merge(k3, k1);
|
||||
print s;
|
||||
print topk_sum(k3);
|
||||
print topk_count(k3, "c");
|
||||
print topk_epsilon(k3, "c");
|
||||
print topk_count(k3, "e");
|
||||
print topk_epsilon(k3, "e");
|
||||
print topk_count(k3, "d");
|
||||
print topk_epsilon(k3, "d");
|
||||
|
||||
topk_merge(k3, k1);
|
||||
|
||||
s = topk_get_top(k3, 3);
|
||||
print s;
|
||||
print topk_sum(k3);
|
||||
print topk_count(k3, "c");
|
||||
print topk_epsilon(k3, "c");
|
||||
print topk_count(k3, "e");
|
||||
print topk_epsilon(k3, "e");
|
||||
print topk_count(k3, "d");
|
||||
print topk_epsilon(k3, "d");
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
74
testing/btest/istate/topk.bro
Normal file
74
testing/btest/istate/topk.bro
Normal file
|
@ -0,0 +1,74 @@
|
|||
# @TEST-EXEC: bro -b %INPUT runnumber=1 >out
|
||||
# @TEST-EXEC: bro -b %INPUT runnumber=2 >>out
|
||||
# @TEST-EXEC: bro -b %INPUT runnumber=3 >>out
|
||||
# @TEST-EXEC: btest-diff out
|
||||
|
||||
global runnumber: count &redef; # differentiate runs
|
||||
|
||||
global k1: opaque of topk &persistent;
|
||||
global k2: opaque of topk &persistent;
|
||||
|
||||
event bro_init()
|
||||
{
|
||||
|
||||
k2 = topk_init(20);
|
||||
|
||||
if ( runnumber == 1 )
|
||||
{
|
||||
k1 = topk_init(100);
|
||||
|
||||
topk_add(k1, "a");
|
||||
topk_add(k1, "b");
|
||||
topk_add(k1, "b");
|
||||
topk_add(k1, "c");
|
||||
topk_add(k1, "c");
|
||||
topk_add(k1, "c");
|
||||
topk_add(k1, "c");
|
||||
topk_add(k1, "c");
|
||||
topk_add(k1, "c");
|
||||
topk_add(k1, "d");
|
||||
topk_add(k1, "d");
|
||||
topk_add(k1, "d");
|
||||
topk_add(k1, "d");
|
||||
topk_add(k1, "e");
|
||||
topk_add(k1, "e");
|
||||
topk_add(k1, "e");
|
||||
topk_add(k1, "e");
|
||||
topk_add(k1, "e");
|
||||
topk_add(k1, "f");
|
||||
}
|
||||
|
||||
local s = topk_get_top(k1, 3);
|
||||
print topk_count(k1, "a");
|
||||
print topk_count(k1, "b");
|
||||
print topk_count(k1, "c");
|
||||
print topk_count(k1, "d");
|
||||
print topk_count(k1, "e");
|
||||
print topk_count(k1, "f");
|
||||
|
||||
if ( runnumber == 2 )
|
||||
{
|
||||
topk_add(k1, "a");
|
||||
topk_add(k1, "b");
|
||||
topk_add(k1, "b");
|
||||
topk_add(k1, "c");
|
||||
topk_add(k1, "c");
|
||||
topk_add(k1, "c");
|
||||
topk_add(k1, "c");
|
||||
topk_add(k1, "c");
|
||||
topk_add(k1, "c");
|
||||
topk_add(k1, "d");
|
||||
topk_add(k1, "d");
|
||||
topk_add(k1, "d");
|
||||
topk_add(k1, "d");
|
||||
topk_add(k1, "e");
|
||||
topk_add(k1, "e");
|
||||
topk_add(k1, "e");
|
||||
topk_add(k1, "e");
|
||||
topk_add(k1, "e");
|
||||
topk_add(k1, "f");
|
||||
}
|
||||
|
||||
print s;
|
||||
|
||||
}
|
110
testing/btest/scripts/base/frameworks/sumstats/topk-cluster.bro
Normal file
110
testing/btest/scripts/base/frameworks/sumstats/topk-cluster.bro
Normal file
|
@ -0,0 +1,110 @@
|
|||
# @TEST-SERIALIZE: comm
|
||||
#
|
||||
# @TEST-EXEC: btest-bg-run manager-1 BROPATH=$BROPATH:.. CLUSTER_NODE=manager-1 bro %INPUT
|
||||
# @TEST-EXEC: sleep 1
|
||||
# @TEST-EXEC: btest-bg-run worker-1 BROPATH=$BROPATH:.. CLUSTER_NODE=worker-1 bro %INPUT
|
||||
# @TEST-EXEC: btest-bg-run worker-2 BROPATH=$BROPATH:.. CLUSTER_NODE=worker-2 bro %INPUT
|
||||
# @TEST-EXEC: btest-bg-wait 15
|
||||
|
||||
# @TEST-EXEC: btest-diff manager-1/.stdout
|
||||
#
|
||||
@TEST-START-FILE cluster-layout.bro
|
||||
redef Cluster::nodes = {
|
||||
["manager-1"] = [$node_type=Cluster::MANAGER, $ip=127.0.0.1, $p=37757/tcp, $workers=set("worker-1", "worker-2")],
|
||||
["worker-1"] = [$node_type=Cluster::WORKER, $ip=127.0.0.1, $p=37760/tcp, $manager="manager-1", $interface="eth0"],
|
||||
["worker-2"] = [$node_type=Cluster::WORKER, $ip=127.0.0.1, $p=37761/tcp, $manager="manager-1", $interface="eth1"],
|
||||
};
|
||||
@TEST-END-FILE
|
||||
|
||||
redef Log::default_rotation_interval = 0secs;
|
||||
|
||||
|
||||
event bro_init() &priority=5
|
||||
{
|
||||
local r1: SumStats::Reducer = [$stream="test.metric",
|
||||
$apply=set(SumStats::TOPK)];
|
||||
SumStats::create([$epoch=5secs,
|
||||
$reducers=set(r1),
|
||||
$epoch_finished(data: SumStats::ResultTable) =
|
||||
{
|
||||
for ( key in data )
|
||||
{
|
||||
local r = data[key]["test.metric"];
|
||||
|
||||
local s: vector of SumStats::Observation;
|
||||
s = topk_get_top(r$topk, 5);
|
||||
|
||||
print fmt("Top entries for key %s", key$str);
|
||||
for ( element in s )
|
||||
{
|
||||
print fmt("Num: %d, count: %d, epsilon: %d", s[element]$num, topk_count(r$topk, s[element]), topk_epsilon(r$topk, s[element]));
|
||||
}
|
||||
|
||||
terminate();
|
||||
}
|
||||
}
|
||||
]);
|
||||
|
||||
|
||||
}
|
||||
|
||||
event remote_connection_closed(p: event_peer)
|
||||
{
|
||||
terminate();
|
||||
}
|
||||
|
||||
global ready_for_data: event();
|
||||
redef Cluster::manager2worker_events += /^ready_for_data$/;
|
||||
|
||||
event ready_for_data()
|
||||
{
|
||||
const loop_v: vector of count = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100};
|
||||
|
||||
|
||||
if ( Cluster::node == "worker-1" )
|
||||
{
|
||||
|
||||
local a: count;
|
||||
a = 0;
|
||||
|
||||
for ( i in loop_v )
|
||||
{
|
||||
a = a + 1;
|
||||
for ( j in loop_v )
|
||||
{
|
||||
if ( i < j )
|
||||
SumStats::observe("test.metric", [$str="counter"], [$num=a]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
SumStats::observe("test.metric", [$str="two"], [$num=1]);
|
||||
SumStats::observe("test.metric", [$str="two"], [$num=1]);
|
||||
}
|
||||
if ( Cluster::node == "worker-2" )
|
||||
{
|
||||
SumStats::observe("test.metric", [$str="two"], [$num=2]);
|
||||
SumStats::observe("test.metric", [$str="two"], [$num=2]);
|
||||
SumStats::observe("test.metric", [$str="two"], [$num=2]);
|
||||
SumStats::observe("test.metric", [$str="two"], [$num=2]);
|
||||
SumStats::observe("test.metric", [$str="two"], [$num=1]);
|
||||
|
||||
for ( i in loop_v )
|
||||
{
|
||||
SumStats::observe("test.metric", [$str="counter"], [$num=995]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@if ( Cluster::local_node_type() == Cluster::MANAGER )
|
||||
|
||||
global peer_count = 0;
|
||||
event remote_connection_handshake_done(p: event_peer) &priority=-5
|
||||
{
|
||||
++peer_count;
|
||||
if ( peer_count == 2 )
|
||||
event ready_for_data();
|
||||
}
|
||||
|
||||
@endif
|
||||
|
48
testing/btest/scripts/base/frameworks/sumstats/topk.bro
Normal file
48
testing/btest/scripts/base/frameworks/sumstats/topk.bro
Normal file
|
@ -0,0 +1,48 @@
|
|||
# @TEST-EXEC: bro %INPUT
|
||||
# @TEST-EXEC: btest-diff .stdout
|
||||
|
||||
event bro_init() &priority=5
|
||||
{
|
||||
local r1: SumStats::Reducer = [$stream="test.metric",
|
||||
$apply=set(SumStats::TOPK)];
|
||||
SumStats::create([$epoch=3secs,
|
||||
$reducers=set(r1),
|
||||
$epoch_finished(data: SumStats::ResultTable) =
|
||||
{
|
||||
for ( key in data )
|
||||
{
|
||||
local r = data[key]["test.metric"];
|
||||
|
||||
local s: vector of SumStats::Observation;
|
||||
s = topk_get_top(r$topk, 5);
|
||||
|
||||
print fmt("Top entries for key %s", key$str);
|
||||
for ( element in s )
|
||||
{
|
||||
print fmt("Num: %d, count: %d, epsilon: %d", s[element]$num, topk_count(r$topk, s[element]), topk_epsilon(r$topk, s[element]));
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
]);
|
||||
|
||||
|
||||
const loop_v: vector of count = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100};
|
||||
|
||||
local a: count;
|
||||
a = 0;
|
||||
|
||||
for ( i in loop_v )
|
||||
{
|
||||
a = a + 1;
|
||||
for ( j in loop_v )
|
||||
{
|
||||
if ( i < j )
|
||||
SumStats::observe("test.metric", [$str="counter"], [$num=a]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
SumStats::observe("test.metric", [$str="two"], [$num=1]);
|
||||
SumStats::observe("test.metric", [$str="two"], [$num=1]);
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue