diff --git a/CHANGES b/CHANGES index 693dc36dac..3f11c55338 100644 --- a/CHANGES +++ b/CHANGES @@ -1,4 +1,94 @@ +2.1-1034 | 2013-08-03 20:27:43 -0700 + + * A set of DHCP extensions. (Vlad Grigorescu) + + - Leases are logged to dhcp.log as they are seen. + - scripts/policy/protocols/dhcp/known-devices-and-hostnames.bro + - Added DPD sig. + +2.1-1027 | 2013-08-03 01:57:37 -0400 + + * Fix a major memory issue in the SumStats framework. + +2.1-1026 | 2013-08-02 22:35:09 -0400 + + * Fix the SumStats top-k plugin and test. (Seth Hall) + + * Rework of SumStats API to reduce high instantaneous memory + use on clusters. (Seth Hall) + + * Large update for the SumStats framework. + + - On-demand access to sumstats results through "return from" + functions named SumStats::request and Sumstats::request_key. + Both functions are tested in standalone and clustered modes. + + - $name field has returned to SumStats which simplifies cluster + code and makes the on-demand access stuff possible. + + - Clustered results can only be collected for 1 minute from their + time of creation now instead of time of last read. + + - Thresholds use doubles instead of counts everywhere now. + + - Calculation dependency resolution occurs at start up time now + instead of doing it at observation time which provide a minor + cpu performance improvement. A new plugin registration mechanism + was created to support this change. + + - AppStats now has a minimal doc string and is broken into hook-based + plugins. + + - AppStats and traceroute detection added to local.bro (Seth Hall) + +2.1-1009 | 2013-08-02 17:19:08 -0700 + + * A number of exec module and raw input reader fixes. (Jon Siwek) + +2.1-1007 | 2013-08-01 15:41:54 -0700 + + * More function documentation. (Bernhard Amann) + +2.1-1004 | 2013-08-01 14:37:43 -0700 + + * Adding a probabilistic data structure for computing "top k" + elements. (Bernhard Amann) + + The corresponding functions are: + + topk_init(size: count): opaque of topk + topk_add(handle: opaque of topk, value: any) + topk_get_top(handle: opaque of topk, k: count) + topk_count(handle: opaque of topk, value: any): count + topk_epsilon(handle: opaque of topk, value: any): count + topk_size(handle: opaque of topk): count + topk_sum(handle: opaque of topk): count + topk_merge(handle1: opaque of topk, handle2: opaque of topk) + topk_merge_prune(handle1: opaque of topk, handle2: opaque of topk) + +2.1-971 | 2013-08-01 13:28:32 -0700 + + * Fix some build errors. (Jon Siwek) + + * Internal refactoring of how plugin components are tagged/managed. + (Jon Siwek) + + * Fix various documentation, mostly related to file analysis. (Jon + Siwek) + + * Changing the Bloom filter hashing so that it's independent of + CompositeHash. (Robin Sommer) + +2.1-951 | 2013-08-01 11:19:23 -0400 + + * Small fix to deal with a bug in the SSL log delay mechanism. + +2.1-948 | 2013-07-31 20:08:28 -0700 + + * Fix segfault caused by merging an empty bloom-filter with a + bloom-filter already containing values. (Bernhard Amann) + 2.1-945 | 2013-07-30 10:05:10 -0700 * Make hashers serializable. (Matthias Vallentin) diff --git a/NEWS b/NEWS index c421e7d675..3e349e7db3 100644 --- a/NEWS +++ b/NEWS @@ -113,6 +113,7 @@ New Functionality the frequency of elements. The corresponding functions are: bloomfilter_basic_init(fp: double, capacity: count, name: string &default=""): opaque of bloomfilter + bloomfilter_basic_init2(k: count, cells: count, name: string &default=""): opaque of bloomfilter bloomfilter_counting_init(k: count, cells: count, max: count, name: string &default=""): opaque of bloomfilter bloomfilter_add(bf: opaque of bloomfilter, x: any) bloomfilter_lookup(bf: opaque of bloomfilter, x: any): count @@ -121,6 +122,21 @@ New Functionality See for full documentation. +- Bro now provides a probabilistic data structure for computing + "top k" elements. The corresponding functions are: + + topk_init(size: count): opaque of topk + topk_add(handle: opaque of topk, value: any) + topk_get_top(handle: opaque of topk, k: count) + topk_count(handle: opaque of topk, value: any): count + topk_epsilon(handle: opaque of topk, value: any): count + topk_size(handle: opaque of topk): count + topk_sum(handle: opaque of topk): count + topk_merge(handle1: opaque of topk, handle2: opaque of topk) + topk_merge_prune(handle1: opaque of topk, handle2: opaque of topk) + + See for full documentation. + - base/utils/exec.bro provides a module to start external processes asynchronously and retrieve their output on termination. base/utils/dir.bro uses it to monitor a directory for changes, and diff --git a/VERSION b/VERSION index 6b605113b8..5ed761d1ae 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.1-945 +2.1-1034 diff --git a/aux/bro-aux b/aux/bro-aux index 91d258cc8b..d9963983c0 160000 --- a/aux/bro-aux +++ b/aux/bro-aux @@ -1 +1 @@ -Subproject commit 91d258cc8b2f74cd02fc93dfe61f73ec9f0dd489 +Subproject commit d9963983c0b4d426b24836f8d154d014d5aecbba diff --git a/aux/broctl b/aux/broctl index 52fd91261f..090d4553ac 160000 --- a/aux/broctl +++ b/aux/broctl @@ -1 +1 @@ -Subproject commit 52fd91261f41fa1528f7b964837a364d7991889e +Subproject commit 090d4553ace0f9acf2d86eafab07ecfdcc534878 diff --git a/aux/btest b/aux/btest index ce366206e3..69606f8f3c 160000 --- a/aux/btest +++ b/aux/btest @@ -1 +1 @@ -Subproject commit ce366206e3407e534a786ad572c342e9f9fef26b +Subproject commit 69606f8f3cc84d694ca1da14868a5fecd4abbc96 diff --git a/doc/file-analysis.rst b/doc/file-analysis.rst index f312e06471..0a96a8efb7 100644 --- a/doc/file-analysis.rst +++ b/doc/file-analysis.rst @@ -82,9 +82,9 @@ attached, they start receiving the contents of the file as Bro extracts it from an ongoing network connection. What they do with the file contents is up to the particular file analyzer implementation, but they'll typically either report further information about the file via -events (e.g. :bro:see:`FileAnalysis::ANALYZER_MD5` will report the +events (e.g. :bro:see:`Files::ANALYZER_MD5` will report the file's MD5 checksum via :bro:see:`file_hash` once calculated) or they'll -have some side effect (e.g. :bro:see:`FileAnalysis::ANALYZER_EXTRACT` +have some side effect (e.g. :bro:see:`Files::ANALYZER_EXTRACT` will write the contents of the file out to the local file system). In the future there may be file analyzers that automatically attach to @@ -98,7 +98,7 @@ explicit attachment decision: { print "new file", f$id; if ( f?$mime_type && f$mime_type == "text/plain" ) - FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_MD5]); + Files::add_analyzer(f, Files::ANALYZER_MD5); } event file_hash(f: fa_file, kind: string, hash: string) @@ -113,26 +113,27 @@ output:: file_hash, Cx92a0ym5R8, md5, 397168fd09991a0e712254df7bc639ac Some file analyzers might have tunable parameters that need to be -specified in the call to :bro:see:`FileAnalysis::add_analyzer`: +specified in the call to :bro:see:`Files::add_analyzer`: .. code:: bro event file_new(f: fa_file) { - FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT, - $extract_filename="./myfile"]); + Files::add_analyzer(f, Files::ANALYZER_EXTRACT, + [$extract_filename="myfile"]); } In this case, the file extraction analyzer doesn't generate any further -events, but does have the side effect of writing out the file contents -to the local file system at the specified location of ``./myfile``. Of -course, for a network with more than a single file being transferred, -it's probably preferable to specify a different extraction path for each -file, unlike this example. +events, but does have the effect of writing out the file contents to the +local file system at the location resulting from the concatenation of +the path specified by :bro:see:`FileExtract::prefix` and the string, +``myfile``. Of course, for a network with more than a single file being +transferred, it's probably preferable to specify a different extraction +path for each file, unlike this example. Regardless of which file analyzers end up acting on a file, general information about the file (e.g. size, time of last data transferred, -MIME type, etc.) are logged in ``file_analysis.log``. +MIME type, etc.) are logged in ``files.log``. Input Framework Integration =========================== @@ -150,7 +151,7 @@ a network interface it's monitoring. It only requires a call to event file_new(f: fa_file) { print "new file", f$id; - FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_MD5]); + Files::add_analyzer(f, Files::ANALYZER_MD5); } event file_state_remove(f: fa_file) diff --git a/doc/index.rst b/doc/index.rst index ad05f7bf82..aa33d8797d 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -47,6 +47,7 @@ Script Reference scripts/index scripts/builtins scripts/proto-analyzers + scripts/file-analyzers Other Bro Components -------------------- diff --git a/doc/scripts/CMakeLists.txt b/doc/scripts/CMakeLists.txt index e7e39d0b3f..fa234e74f2 100644 --- a/doc/scripts/CMakeLists.txt +++ b/doc/scripts/CMakeLists.txt @@ -124,28 +124,34 @@ endmacro(REST_TARGET) # Schedule Bro scripts for which to generate documentation. include(DocSourcesList.cmake) -# This reST target is independent of a particular Bro script... -add_custom_command(OUTPUT proto-analyzers.rst - # delete any leftover state from previous bro runs - COMMAND "${CMAKE_COMMAND}" - ARGS -E remove_directory .state - # generate the reST documentation using bro - COMMAND BROPATH=${BROPATH}:${srcDir} BROMAGIC=${CMAKE_SOURCE_DIR}/magic/database ${CMAKE_BINARY_DIR}/src/bro - ARGS -b -Z base/init-bare.bro || (rm -rf .state *.log *.rst && exit 1) - # move generated doc into a new directory tree that - # defines the final structure of documents - COMMAND "${CMAKE_COMMAND}" - ARGS -E make_directory ${dstDir} - COMMAND "${CMAKE_COMMAND}" - ARGS -E copy proto-analyzers.rst ${dstDir} - # clean up the build directory - COMMAND rm - ARGS -rf .state *.log *.rst - DEPENDS bro - WORKING_DIRECTORY ${CMAKE_BINARY_DIR} - COMMENT "[Bro] Generating reST docs for proto-analyzers.rst" -) -list(APPEND ALL_REST_OUTPUTS proto-analyzers.rst) +# Macro for generating reST docs that are independent of any particular Bro +# script. +macro(INDEPENDENT_REST_TARGET reST_file) + add_custom_command(OUTPUT ${reST_file} + # delete any leftover state from previous bro runs + COMMAND "${CMAKE_COMMAND}" + ARGS -E remove_directory .state + # generate the reST documentation using bro + COMMAND BROPATH=${BROPATH}:${srcDir} BROMAGIC=${CMAKE_SOURCE_DIR}/magic/database ${CMAKE_BINARY_DIR}/src/bro + ARGS -b -Z base/init-bare.bro || (rm -rf .state *.log *.rst && exit 1) + # move generated doc into a new directory tree that + # defines the final structure of documents + COMMAND "${CMAKE_COMMAND}" + ARGS -E make_directory ${dstDir} + COMMAND "${CMAKE_COMMAND}" + ARGS -E copy ${reST_file} ${dstDir} + # clean up the build directory + COMMAND rm + ARGS -rf .state *.log *.rst + DEPENDS bro + WORKING_DIRECTORY ${CMAKE_BINARY_DIR} + COMMENT "[Bro] Generating reST docs for ${reST_file}" + ) + list(APPEND ALL_REST_OUTPUTS ${reST_file}) +endmacro(INDEPENDENT_REST_TARGET) + +independent_rest_target(proto-analyzers.rst) +independent_rest_target(file-analyzers.rst) # create temporary list of all docs to include in the master policy/index file file(WRITE ${MASTER_POLICY_INDEX} "${MASTER_POLICY_INDEX_TEXT}") diff --git a/doc/scripts/DocSourcesList.cmake b/doc/scripts/DocSourcesList.cmake index 2efa45ef38..bd88f5cd54 100644 --- a/doc/scripts/DocSourcesList.cmake +++ b/doc/scripts/DocSourcesList.cmake @@ -73,6 +73,7 @@ rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/plugins/Bro_UDP.events.bif.bro) rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/plugins/Bro_ZIP.events.bif.bro) rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/reporter.bif.bro) rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/strings.bif.bro) +rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/top-k.bif.bro) rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/types.bif.bro) rest_target(${psd} base/files/extract/main.bro) rest_target(${psd} base/files/hash/main.bro) @@ -129,6 +130,7 @@ rest_target(${psd} base/frameworks/sumstats/plugins/min.bro) rest_target(${psd} base/frameworks/sumstats/plugins/sample.bro) rest_target(${psd} base/frameworks/sumstats/plugins/std-dev.bro) rest_target(${psd} base/frameworks/sumstats/plugins/sum.bro) +rest_target(${psd} base/frameworks/sumstats/plugins/topk.bro) rest_target(${psd} base/frameworks/sumstats/plugins/unique.bro) rest_target(${psd} base/frameworks/sumstats/plugins/variance.bro) rest_target(${psd} base/frameworks/tunnels/main.bro) @@ -137,10 +139,14 @@ rest_target(${psd} base/protocols/conn/contents.bro) rest_target(${psd} base/protocols/conn/inactivity.bro) rest_target(${psd} base/protocols/conn/main.bro) rest_target(${psd} base/protocols/conn/polling.bro) +rest_target(${psd} base/protocols/dhcp/consts.bro) +rest_target(${psd} base/protocols/dhcp/main.bro) +rest_target(${psd} base/protocols/dhcp/utils.bro) rest_target(${psd} base/protocols/dns/consts.bro) rest_target(${psd} base/protocols/dns/main.bro) rest_target(${psd} base/protocols/ftp/files.bro) rest_target(${psd} base/protocols/ftp/gridftp.bro) +rest_target(${psd} base/protocols/ftp/info.bro) rest_target(${psd} base/protocols/ftp/main.bro) rest_target(${psd} base/protocols/ftp/utils-commands.bro) rest_target(${psd} base/protocols/ftp/utils.bro) @@ -203,9 +209,16 @@ rest_target(${psd} policy/frameworks/software/vulnerable.bro) rest_target(${psd} policy/integration/barnyard2/main.bro) rest_target(${psd} policy/integration/barnyard2/types.bro) rest_target(${psd} policy/integration/collective-intel/main.bro) -rest_target(${psd} policy/misc/app-metrics.bro) +rest_target(${psd} policy/misc/app-stats/main.bro) +rest_target(${psd} policy/misc/app-stats/plugins/facebook.bro) +rest_target(${psd} policy/misc/app-stats/plugins/gmail.bro) +rest_target(${psd} policy/misc/app-stats/plugins/google.bro) +rest_target(${psd} policy/misc/app-stats/plugins/netflix.bro) +rest_target(${psd} policy/misc/app-stats/plugins/pandora.bro) +rest_target(${psd} policy/misc/app-stats/plugins/youtube.bro) rest_target(${psd} policy/misc/capture-loss.bro) rest_target(${psd} policy/misc/detect-traceroute/main.bro) +rest_target(${psd} policy/misc/known-devices.bro) rest_target(${psd} policy/misc/load-balancing.bro) rest_target(${psd} policy/misc/loaded-scripts.bro) rest_target(${psd} policy/misc/profiling.bro) @@ -215,6 +228,7 @@ rest_target(${psd} policy/misc/trim-trace-file.bro) rest_target(${psd} policy/protocols/conn/known-hosts.bro) rest_target(${psd} policy/protocols/conn/known-services.bro) rest_target(${psd} policy/protocols/conn/weirds.bro) +rest_target(${psd} policy/protocols/dhcp/known-devices-and-hostnames.bro) rest_target(${psd} policy/protocols/dns/auth-addl.bro) rest_target(${psd} policy/protocols/dns/detect-external-names.bro) rest_target(${psd} policy/protocols/ftp/detect-bruteforcing.bro) diff --git a/scripts/base/frameworks/files/main.bro b/scripts/base/frameworks/files/main.bro index d0c381545b..c1883e037f 100644 --- a/scripts/base/frameworks/files/main.bro +++ b/scripts/base/frameworks/files/main.bro @@ -204,7 +204,7 @@ export { ## ## tag: Tag for the protocol analyzer having a callback being registered. ## - ## reg: A :bro:see:`ProtoRegistration` record. + ## reg: A :bro:see:`Files::ProtoRegistration` record. ## ## Returns: true if the protocol being registered was not previously registered. global register_protocol: function(tag: Analyzer::Tag, reg: ProtoRegistration): bool; @@ -228,11 +228,6 @@ redef record fa_file += { info: Info &optional; }; -redef record AnalyzerArgs += { - # This is used interally for the core file analyzer api. - tag: Files::Tag &optional; -}; - # Store the callbacks for protocol analyzers that have files. global registered_protocols: table[Analyzer::Tag] of ProtoRegistration = table(); @@ -275,14 +270,12 @@ function set_timeout_interval(f: fa_file, t: interval): bool function add_analyzer(f: fa_file, tag: Files::Tag, args: AnalyzerArgs): bool { - # This is to construct the correct args for the core API. - args$tag = tag; add f$info$analyzers[Files::analyzer_name(tag)]; if ( tag in analyzer_add_callbacks ) analyzer_add_callbacks[tag](f, args); - if ( ! __add_analyzer(f$id, args) ) + if ( ! __add_analyzer(f$id, tag, args) ) { Reporter::warning(fmt("Analyzer %s not added successfully to file %s.", tag, f$id)); return F; @@ -297,8 +290,7 @@ function register_analyzer_add_callback(tag: Files::Tag, callback: function(f: f function remove_analyzer(f: fa_file, tag: Files::Tag, args: AnalyzerArgs): bool { - args$tag = tag; - return __remove_analyzer(f$id, args); + return __remove_analyzer(f$id, tag, args); } function stop(f: fa_file): bool diff --git a/scripts/base/frameworks/packet-filter/main.bro b/scripts/base/frameworks/packet-filter/main.bro index 72b2b62f34..929b10fbe1 100644 --- a/scripts/base/frameworks/packet-filter/main.bro +++ b/scripts/base/frameworks/packet-filter/main.bro @@ -109,7 +109,7 @@ export { ## Enables the old filtering approach of "only watch common ports for ## analyzed protocols". - ## + ## ## Unless you know what you are doing, leave this set to F. const enable_auto_protocol_capture_filters = F &redef; diff --git a/scripts/base/frameworks/sumstats/cluster.bro b/scripts/base/frameworks/sumstats/cluster.bro index be0a5b5ded..9c343ad15d 100644 --- a/scripts/base/frameworks/sumstats/cluster.bro +++ b/scripts/base/frameworks/sumstats/cluster.bro @@ -10,10 +10,6 @@ module SumStats; export { - ## Allows a user to decide how large of result groups the workers should transmit - ## values for cluster stats aggregation. - const cluster_send_in_groups_of = 50 &redef; - ## The percent of the full threshold value that needs to be met on a single worker ## for that worker to send the value to its manager in order for it to request a ## global view for that value. There is no requirement that the manager requests @@ -27,45 +23,46 @@ export { ## performed. In practice this should hopefully have a minimal effect. const max_outstanding_global_views = 10 &redef; - ## Intermediate updates can cause overload situations on very large clusters. This - ## option may help reduce load and correct intermittent problems. The goal for this - ## option is also meant to be temporary. - const enable_intermediate_updates = T &redef; - ## Event sent by the manager in a cluster to initiate the collection of values for ## a sumstat. - global cluster_ss_request: event(uid: string, ssid: string); + global cluster_ss_request: event(uid: string, ss_name: string, cleanup: bool); ## Event sent by nodes that are collecting sumstats after receiving a request for ## the sumstat from the manager. - global cluster_ss_response: event(uid: string, ssid: string, data: ResultTable, done: bool); + #global cluster_ss_response: event(uid: string, ss_name: string, data: ResultTable, done: bool, cleanup: bool); ## This event is sent by the manager in a cluster to initiate the collection of ## a single key value from a sumstat. It's typically used to get intermediate ## updates before the break interval triggers to speed detection of a value ## crossing a threshold. - global cluster_key_request: event(uid: string, ssid: string, key: Key); + global cluster_get_result: event(uid: string, ss_name: string, key: Key, cleanup: bool); ## This event is sent by nodes in response to a - ## :bro:id:`SumStats::cluster_key_request` event. - global cluster_key_response: event(uid: string, ssid: string, key: Key, result: Result); + ## :bro:id:`SumStats::cluster_get_result` event. + global cluster_send_result: event(uid: string, ss_name: string, key: Key, result: Result, cleanup: bool); ## This is sent by workers to indicate that they crossed the percent ## of the current threshold by the percentage defined globally in ## :bro:id:`SumStats::cluster_request_global_view_percent` - global cluster_key_intermediate_response: event(ssid: string, key: SumStats::Key); + global cluster_key_intermediate_response: event(ss_name: string, key: SumStats::Key); ## This event is scheduled internally on workers to send result chunks. - global send_data: event(uid: string, ssid: string, data: ResultTable); + global send_data: event(uid: string, ss_name: string, data: ResultTable, cleanup: bool); + + global get_a_key: event(uid: string, ss_name: string, cleanup: bool &default=F); + + global send_a_key: event(uid: string, ss_name: string, key: Key); + global send_no_key: event(uid: string, ss_name: string); ## This event is generated when a threshold is crossed. - global cluster_threshold_crossed: event(ssid: string, key: SumStats::Key, thold: Thresholding); + global cluster_threshold_crossed: event(ss_name: string, key: SumStats::Key, thold_index: count); } # Add events to the cluster framework to make this work. -redef Cluster::manager2worker_events += /SumStats::cluster_(ss_request|key_request|threshold_crossed)/; -redef Cluster::manager2worker_events += /SumStats::thresholds_reset/; -redef Cluster::worker2manager_events += /SumStats::cluster_(ss_response|key_response|key_intermediate_response)/; +redef Cluster::manager2worker_events += /SumStats::cluster_(ss_request|get_result|threshold_crossed)/; +redef Cluster::manager2worker_events += /SumStats::(thresholds_reset|get_a_key)/; +redef Cluster::worker2manager_events += /SumStats::cluster_(ss_response|send_result|key_intermediate_response)/; +redef Cluster::worker2manager_events += /SumStats::(send_a_key|send_no_key)/; @if ( Cluster::local_node_type() != Cluster::MANAGER ) # This variable is maintained to know what keys have recently sent as @@ -74,12 +71,9 @@ redef Cluster::worker2manager_events += /SumStats::cluster_(ss_response|key_resp # an intermediate result has been received. global recent_global_view_keys: table[string, Key] of count &create_expire=1min &default=0; -event bro_init() &priority=-100 - { - # The manager is the only host allowed to track these. - stats_store = table(); - reducer_store = table(); - } +# Result tables indexed on a uid that are currently being sent to the +# manager. +global sending_results: table[string] of ResultTable = table() &create_expire=1min; # This is done on all non-manager node types in the event that a sumstat is # being collected somewhere other than a worker. @@ -87,95 +81,151 @@ function data_added(ss: SumStat, key: Key, result: Result) { # If an intermediate update for this value was sent recently, don't send # it again. - if ( [ss$id, key] in recent_global_view_keys ) + if ( [ss$name, key] in recent_global_view_keys ) return; # If val is 5 and global view % is 0.1 (10%), pct_val will be 50. If that # crosses the full threshold then it's a candidate to send as an # intermediate update. - if ( enable_intermediate_updates && - check_thresholds(ss, key, result, cluster_request_global_view_percent) ) + if ( check_thresholds(ss, key, result, cluster_request_global_view_percent) ) { # kick off intermediate update - event SumStats::cluster_key_intermediate_response(ss$id, key); - ++recent_global_view_keys[ss$id, key]; + event SumStats::cluster_key_intermediate_response(ss$name, key); + ++recent_global_view_keys[ss$name, key]; } } -event SumStats::send_data(uid: string, ssid: string, data: ResultTable) +#event SumStats::send_data(uid: string, ss_name: string, cleanup: bool) +# { +# #print fmt("WORKER %s: sending data for uid %s...", Cluster::node, uid); +# +# local local_data: ResultTable = table(); +# local incoming_data: ResultTable = cleanup ? data : copy(data); +# +# local num_added = 0; +# for ( key in incoming_data ) +# { +# local_data[key] = incoming_data[key]; +# delete incoming_data[key]; +# +# # Only send cluster_send_in_groups_of at a time. Queue another +# # event to send the next group. +# if ( cluster_send_in_groups_of == ++num_added ) +# break; +# } +# +# local done = F; +# # If data is empty, this sumstat is done. +# if ( |incoming_data| == 0 ) +# done = T; +# +# # Note: copy is needed to compensate serialization caching issue. This should be +# # changed to something else later. +# event SumStats::cluster_ss_response(uid, ss_name, copy(local_data), done, cleanup); +# if ( ! done ) +# schedule 0.01 sec { SumStats::send_data(uid, T) }; +# } + +event SumStats::get_a_key(uid: string, ss_name: string, cleanup: bool) { - #print fmt("WORKER %s: sending data for uid %s...", Cluster::node, uid); - - local local_data: ResultTable = table(); - local num_added = 0; - for ( key in data ) + if ( uid in sending_results ) { - local_data[key] = data[key]; - delete data[key]; - - # Only send cluster_send_in_groups_of at a time. Queue another - # event to send the next group. - if ( cluster_send_in_groups_of == ++num_added ) - break; + if ( |sending_results[uid]| == 0 ) + { + event SumStats::send_no_key(uid, ss_name); + } + else + { + for ( key in sending_results[uid] ) + { + event SumStats::send_a_key(uid, ss_name, key); + # break to only send one. + break; + } + } + } + else if ( !cleanup && ss_name in result_store && |result_store[ss_name]| > 0 ) + { + if ( |result_store[ss_name]| == 0 ) + { + event SumStats::send_no_key(uid, ss_name); + } + else + { + for ( key in result_store[ss_name] ) + { + event SumStats::send_a_key(uid, ss_name, key); + # break to only send one. + break; + } + } + } + else + { + event SumStats::send_no_key(uid, ss_name); } - - local done = F; - # If data is empty, this sumstat is done. - if ( |data| == 0 ) - done = T; - - # Note: copy is needed to compensate serialization caching issue. This should be - # changed to something else later. - event SumStats::cluster_ss_response(uid, ssid, copy(local_data), done); - if ( ! done ) - schedule 0.01 sec { SumStats::send_data(uid, ssid, data) }; } -event SumStats::cluster_ss_request(uid: string, ssid: string) +event SumStats::cluster_ss_request(uid: string, ss_name: string, cleanup: bool) { #print fmt("WORKER %s: received the cluster_ss_request event for %s.", Cluster::node, id); - # Initiate sending all of the data for the requested stats. - if ( ssid in result_store ) - event SumStats::send_data(uid, ssid, result_store[ssid]); - else - event SumStats::send_data(uid, ssid, table()); + # Create a back store for the result + sending_results[uid] = (ss_name in result_store) ? result_store[ss_name] : table(); # Lookup the actual sumstats and reset it, the reference to the data - # currently stored will be maintained internally by the send_data event. - if ( ssid in stats_store ) - reset(stats_store[ssid]); + # currently stored will be maintained internally from the + # sending_results table. + if ( cleanup && ss_name in stats_store ) + reset(stats_store[ss_name]); } -event SumStats::cluster_key_request(uid: string, ssid: string, key: Key) +event SumStats::cluster_get_result(uid: string, ss_name: string, key: Key, cleanup: bool) { - if ( ssid in result_store && key in result_store[ssid] ) - { - #print fmt("WORKER %s: received the cluster_key_request event for %s=%s.", Cluster::node, key2str(key), data); + #print fmt("WORKER %s: received the cluster_get_result event for %s=%s.", Cluster::node, key2str(key), data); - # Note: copy is needed to compensate serialization caching issue. This should be - # changed to something else later. - event SumStats::cluster_key_response(uid, ssid, key, copy(result_store[ssid][key])); - } - else + if ( cleanup ) # data will implicitly be in sending_results (i know this isn't great) { - # We need to send an empty response if we don't have the data so that the manager - # can know that it heard back from all of the workers. - event SumStats::cluster_key_response(uid, ssid, key, table()); + if ( uid in sending_results && key in sending_results[uid] ) + { + # Note: copy is needed to compensate serialization caching issue. This should be + # changed to something else later. + event SumStats::cluster_send_result(uid, ss_name, key, copy(sending_results[uid][key]), cleanup); + delete sending_results[uid][key]; + } + else + { + # We need to send an empty response if we don't have the data so that the manager + # can know that it heard back from all of the workers. + event SumStats::cluster_send_result(uid, ss_name, key, table(), cleanup); + } + } + else + { + if ( ss_name in result_store && key in result_store[ss_name] ) + { + event SumStats::cluster_send_result(uid, ss_name, key, copy(result_store[ss_name][key]), cleanup); + } + else + { + # We need to send an empty response if we don't have the data so that the manager + # can know that it heard back from all of the workers. + event SumStats::cluster_send_result(uid, ss_name, key, table(), cleanup); + } } } -event SumStats::cluster_threshold_crossed(ssid: string, key: SumStats::Key, thold: Thresholding) +event SumStats::cluster_threshold_crossed(ss_name: string, key: SumStats::Key, thold_index: count) { - if ( ssid !in threshold_tracker ) - threshold_tracker[ssid] = table(); + if ( ss_name !in threshold_tracker ) + threshold_tracker[ss_name] = table(); - threshold_tracker[ssid][key] = thold; + threshold_tracker[ss_name][key] = thold_index; } -event SumStats::thresholds_reset(ssid: string) +event SumStats::thresholds_reset(ss_name: string) { - threshold_tracker[ssid] = table(); + delete threshold_tracker[ss_name]; } @endif @@ -186,7 +236,7 @@ event SumStats::thresholds_reset(ssid: string) # This variable is maintained by manager nodes as they collect and aggregate # results. # Index on a uid. -global stats_results: table[string] of ResultTable &read_expire=1min; +global stats_keys: table[string] of set[Key] &create_expire=1min; # This variable is maintained by manager nodes to track how many "dones" they # collected per collection unique id. Once the number of results for a uid @@ -194,18 +244,18 @@ global stats_results: table[string] of ResultTable &read_expire=1min; # result is written out and deleted from here. # Indexed on a uid. # TODO: add an &expire_func in case not all results are received. -global done_with: table[string] of count &read_expire=1min &default=0; +global done_with: table[string] of count &create_expire=1min &default=0; # This variable is maintained by managers to track intermediate responses as # they are getting a global view for a certain key. # Indexed on a uid. -global key_requests: table[string] of Result &read_expire=1min; +global key_requests: table[string] of Result &create_expire=1min; # This variable is maintained by managers to prevent overwhelming communication due # to too many intermediate updates. Each sumstat is tracked separately so that # one won't overwhelm and degrade other quieter sumstats. # Indexed on a sumstat id. -global outstanding_global_views: table[string] of count &default=0; +global outstanding_global_views: table[string] of count &create_expire=1min &default=0; const zero_time = double_to_time(0.0); # Managers handle logging. @@ -213,15 +263,19 @@ event SumStats::finish_epoch(ss: SumStat) { if ( network_time() > zero_time ) { - #print fmt("%.6f MANAGER: breaking %s sumstat for %s sumstat", network_time(), ss$name, ss$id); + #print fmt("%.6f MANAGER: breaking %s sumstat", network_time(), ss$name); local uid = unique_id(""); - if ( uid in stats_results ) - delete stats_results[uid]; - stats_results[uid] = table(); + if ( uid in stats_keys ) + delete stats_keys[uid]; + stats_keys[uid] = set(); # Request data from peers. - event SumStats::cluster_ss_request(uid, ss$id); + event SumStats::cluster_ss_request(uid, ss$name, T); + + done_with[uid] = 0; + #print fmt("get_key by uid: %s", uid); + event SumStats::get_a_key(uid, ss$name, T); } # Schedule the next finish_epoch event. @@ -235,51 +289,160 @@ function data_added(ss: SumStat, key: Key, result: Result) if ( check_thresholds(ss, key, result, 1.0) ) { threshold_crossed(ss, key, result); - event SumStats::cluster_threshold_crossed(ss$id, key, threshold_tracker[ss$id][key]); + event SumStats::cluster_threshold_crossed(ss$name, key, threshold_tracker[ss$name][key]); } } -event SumStats::cluster_key_response(uid: string, ssid: string, key: Key, result: Result) +function handle_end_of_result_collection(uid: string, ss_name: string, key: Key, cleanup: bool) { + #print fmt("worker_count:%d :: done_with:%d", Cluster::worker_count, done_with[uid]); + local ss = stats_store[ss_name]; + local ir = key_requests[uid]; + if ( check_thresholds(ss, key, ir, 1.0) ) + { + threshold_crossed(ss, key, ir); + event SumStats::cluster_threshold_crossed(ss_name, key, threshold_tracker[ss_name][key]); + } + + if ( cleanup ) + { + # This is done here because "cleanup" implicitly means + # it's the end of an epoch. + if ( ss?$epoch_result && |ir| > 0 ) + { + local now = network_time(); + ss$epoch_result(now, key, ir); + } + + # Check that there is an outstanding view before subtracting. + # Global views only apply to non-dynamic requests. Dynamic + # requests must be serviced. + if ( outstanding_global_views[ss_name] > 0 ) + --outstanding_global_views[ss_name]; + } + + delete key_requests[uid]; + delete done_with[uid]; + } + +function request_all_current_keys(uid: string, ss_name: string, cleanup: bool) + { + #print "request_all_current_keys"; + if ( uid in stats_keys && |stats_keys[uid]| > 0 ) + { + #print fmt(" -- %d remaining keys here", |stats_keys[uid]|); + for ( key in stats_keys[uid] ) + { + done_with[uid] = 0; + event SumStats::cluster_get_result(uid, ss_name, key, cleanup); + when ( uid in done_with && Cluster::worker_count == done_with[uid] ) + { + #print "done getting result"; + handle_end_of_result_collection(uid, ss_name, key, cleanup); + request_all_current_keys(uid, ss_name, cleanup); + } + delete stats_keys[uid][key]; + break; # only a single key + } + } + else + { + # Get more keys! And this breaks us out of the evented loop. + done_with[uid] = 0; + #print fmt("get_key by uid: %s", uid); + event SumStats::get_a_key(uid, ss_name, cleanup); + } + } + +event SumStats::send_no_key(uid: string, ss_name: string) + { + #print "send_no_key"; + ++done_with[uid]; + if ( Cluster::worker_count == done_with[uid] ) + { + delete done_with[uid]; + + if ( |stats_keys[uid]| > 0 ) + { + #print "we need more keys!"; + # Now that we have a key from each worker, lets + # grab all of the results. + request_all_current_keys(uid, ss_name, T); + } + else + { + #print "we're out of keys!"; + local ss = stats_store[ss_name]; + if ( ss?$epoch_finished ) + ss$epoch_finished(network_time()); + } + } + } + +event SumStats::send_a_key(uid: string, ss_name: string, key: Key) + { + #print fmt("send_a_key %s", key); + if ( uid !in stats_keys ) + { + # no clue what happened here + return; + } + + if ( key !in stats_keys[uid] ) + add stats_keys[uid][key]; + + ++done_with[uid]; + if ( Cluster::worker_count == done_with[uid] ) + { + delete done_with[uid]; + + if ( |stats_keys[uid]| > 0 ) + { + #print "we need more keys!"; + # Now that we have a key from each worker, lets + # grab all of the results. + request_all_current_keys(uid, ss_name, T); + } + else + { + #print "we're out of keys!"; + local ss = stats_store[ss_name]; + if ( ss?$epoch_finished ) + ss$epoch_finished(network_time()); + } + } + } + +event SumStats::cluster_send_result(uid: string, ss_name: string, key: Key, result: Result, cleanup: bool) + { + #print "cluster_send_result"; #print fmt("%0.6f MANAGER: receiving key data from %s - %s=%s", network_time(), get_event_peer()$descr, key2str(key), result); # We only want to try and do a value merge if there are actually measured datapoints # in the Result. - if ( uid in key_requests ) - key_requests[uid] = compose_results(key_requests[uid], result); - else + if ( uid !in key_requests || |key_requests[uid]| == 0 ) key_requests[uid] = result; + else + key_requests[uid] = compose_results(key_requests[uid], result); # Mark that a worker is done. ++done_with[uid]; - #print fmt("worker_count:%d :: done_with:%d", Cluster::worker_count, done_with[uid]); - if ( Cluster::worker_count == done_with[uid] ) - { - local ss = stats_store[ssid]; - local ir = key_requests[uid]; - if ( check_thresholds(ss, key, ir, 1.0) ) - { - threshold_crossed(ss, key, ir); - event SumStats::cluster_threshold_crossed(ss$id, key, threshold_tracker[ss$id][key]); - } - - delete done_with[uid]; - delete key_requests[uid]; - # Check that there is an outstanding view before subtracting. - if ( outstanding_global_views[ssid] > 0 ) - --outstanding_global_views[ssid]; - } + #if ( Cluster::worker_count == done_with[uid] ) + # { + # print "done"; + # handle_end_of_result_collection(uid, ss_name, key, cleanup); + # } } # Managers handle intermediate updates here. -event SumStats::cluster_key_intermediate_response(ssid: string, key: Key) +event SumStats::cluster_key_intermediate_response(ss_name: string, key: Key) { #print fmt("MANAGER: receiving intermediate key data from %s", get_event_peer()$descr); #print fmt("MANAGER: requesting key data for %s", key2str(key)); - if ( ssid in outstanding_global_views && - |outstanding_global_views[ssid]| > max_outstanding_global_views ) + if ( ss_name in outstanding_global_views && + |outstanding_global_views[ss_name]| > max_outstanding_global_views ) { # Don't do this intermediate update. Perhaps at some point in the future # we will queue and randomly select from these ignored intermediate @@ -287,60 +450,131 @@ event SumStats::cluster_key_intermediate_response(ssid: string, key: Key) return; } - ++outstanding_global_views[ssid]; + ++outstanding_global_views[ss_name]; local uid = unique_id(""); - event SumStats::cluster_key_request(uid, ssid, key); - } - -event SumStats::cluster_ss_response(uid: string, ssid: string, data: ResultTable, done: bool) - { - #print fmt("MANAGER: receiving results from %s", get_event_peer()$descr); - - # Mark another worker as being "done" for this uid. - if ( done ) - ++done_with[uid]; - - local local_data = stats_results[uid]; - local ss = stats_store[ssid]; - - for ( key in data ) + done_with[uid] = 0; + event SumStats::cluster_get_result(uid, ss_name, key, F); + when ( uid in done_with && Cluster::worker_count == done_with[uid] ) { - if ( key in local_data ) - local_data[key] = compose_results(local_data[key], data[key]); - else - local_data[key] = data[key]; - - # If a stat is done being collected, thresholds for each key - # need to be checked so we're doing it here to avoid doubly - # iterating over each key. - if ( Cluster::worker_count == done_with[uid] ) - { - if ( check_thresholds(ss, key, local_data[key], 1.0) ) - { - threshold_crossed(ss, key, local_data[key]); - event SumStats::cluster_threshold_crossed(ss$id, key, threshold_tracker[ss$id][key]); - } - } + handle_end_of_result_collection(uid, ss_name, key, F); + } + timeout 1.1min + { + Reporter::warning(fmt("Dynamic SumStat intermediate key request for %s (%s) took longer than 1 minute and was automatically cancelled.", ss_name, key)); } - # If the data has been collected from all peers, we are done and ready to finish. - if ( Cluster::worker_count == done_with[uid] ) - { - if ( ss?$epoch_finished ) - ss$epoch_finished(local_data); + } +#event SumStats::cluster_ss_response(uid: string, ss_name: string, data: ResultTable, done: bool, cleanup: bool) +# { +# #print fmt("MANAGER: receiving results from %s", get_event_peer()$descr); +# +# # Mark another worker as being "done" for this uid. +# if ( done ) +# ++done_with[uid]; +# +# # We had better only be getting requests for stuff that exists. +# if ( ss_name !in stats_store ) +# return; +# +# if ( uid !in stats_keys ) +# stats_keys[uid] = table(); +# +# local local_data = stats_keys[uid]; +# local ss = stats_store[ss_name]; +# +# for ( key in data ) +# { +# if ( key in local_data ) +# local_data[key] = compose_results(local_data[key], data[key]); +# else +# local_data[key] = data[key]; +# +# # If a stat is done being collected, thresholds for each key +# # need to be checked so we're doing it here to avoid doubly +# # iterating over each key. +# if ( Cluster::worker_count == done_with[uid] ) +# { +# if ( check_thresholds(ss, key, local_data[key], 1.0) ) +# { +# threshold_crossed(ss, key, local_data[key]); +# event SumStats::cluster_threshold_crossed(ss$name, key, threshold_tracker[ss$name][key]); +# } +# } +# } +# +# # If the data has been collected from all peers, we are done and ready to finish. +# if ( cleanup && Cluster::worker_count == done_with[uid] ) +# { +# local now = network_time(); +# if ( ss?$epoch_result ) +# { +# for ( key in local_data ) +# ss$epoch_result(now, key, local_data[key]); +# } +# +# if ( ss?$epoch_finished ) +# ss$epoch_finished(now); +# +# # Clean up +# delete stats_keys[uid]; +# delete done_with[uid]; +# reset(ss); +# } +# } + +#function request(ss_name: string): ResultTable +# { +# # This only needs to be implemented this way for cluster compatibility. +# local uid = unique_id("dyn-"); +# stats_keys[uid] = table(); +# done_with[uid] = 0; +# event SumStats::cluster_ss_request(uid, ss_name, F); +# +# return when ( uid in done_with && Cluster::worker_count == done_with[uid] ) +# { +# if ( uid in stats_keys ) +# { +# local ss_result = stats_keys[uid]; +# # Clean up +# delete stats_keys[uid]; +# delete done_with[uid]; +# reset(stats_store[ss_name]); +# return ss_result; +# } +# else +# return table(); +# } +# timeout 1.1min +# { +# Reporter::warning(fmt("Dynamic SumStat request for %s took longer than 1 minute and was automatically cancelled.", ss_name)); +# return table(); +# } +# } + +function request_key(ss_name: string, key: Key): Result + { + local uid = unique_id(""); + done_with[uid] = 0; + key_requests[uid] = table(); + + event SumStats::cluster_get_result(uid, ss_name, key, F); + return when ( uid in done_with && Cluster::worker_count == done_with[uid] ) + { + #print "done with request_key"; + local result = key_requests[uid]; # Clean up - delete stats_results[uid]; + delete key_requests[uid]; delete done_with[uid]; - # Not sure I need to reset the sumstat on the manager. - reset(ss); + + return result; + } + timeout 1.1min + { + Reporter::warning(fmt("Dynamic SumStat key request for %s (%s) took longer than 1 minute and was automatically cancelled.", ss_name, key)); + return table(); } } -event remote_connection_handshake_done(p: event_peer) &priority=5 - { - send_id(p, "SumStats::stats_store"); - send_id(p, "SumStats::reducer_store"); - } @endif diff --git a/scripts/base/frameworks/sumstats/main.bro b/scripts/base/frameworks/sumstats/main.bro index cc2aba2362..282b03da6b 100644 --- a/scripts/base/frameworks/sumstats/main.bro +++ b/scripts/base/frameworks/sumstats/main.bro @@ -74,10 +74,6 @@ export { ## Type to store results for multiple reducers. type Result: table[string] of ResultVal; - ## Type to store a table of sumstats results indexed - ## by keys. - type ResultTable: table[Key] of Result; - ## SumStats represent an aggregation of reducers along with ## mechanisms to handle various situations like the epoch ending ## or thresholds being crossed. @@ -87,8 +83,12 @@ export { ## is no assurance provided as to where the callbacks ## will be executed on clusters. type SumStat: record { + ## An arbitrary name for the sumstat so that it can + ## be referred to later. + name: string; + ## The interval at which this filter should be "broken" - ## and the '$epoch_finished' callback called. The + ## and the '$epoch_result' callback called. The ## results are also reset at this time so any threshold ## based detection needs to be set to a ## value that should be expected to happen within @@ -102,22 +102,28 @@ export { ## :bro:see:`SumStats::Result` structure which will be used ## for thresholding. ## This is required if a $threshold value is given. - threshold_val: function(key: SumStats::Key, result: SumStats::Result): count &optional; + threshold_val: function(key: SumStats::Key, result: SumStats::Result): double &optional; ## The threshold value for calling the ## $threshold_crossed callback. - threshold: count &optional; + threshold: double &optional; ## A series of thresholds for calling the ## $threshold_crossed callback. - threshold_series: vector of count &optional; + threshold_series: vector of double &optional; ## A callback that is called when a threshold is crossed. threshold_crossed: function(key: SumStats::Key, result: SumStats::Result) &optional; - ## A callback with the full collection of Results for - ## this SumStat. - epoch_finished: function(rt: SumStats::ResultTable) &optional; + ## A callback that receives each of the results at the + ## end of the analysis epoch. The function will be + ## called once for each key. + epoch_result: function(ts: time, key: SumStats::Key, result: SumStats::Result) &optional; + + ## A callback that will be called when a single collection + ## interval is completed. The ts value will be the time of + ## when the collection started. + epoch_finished: function(ts:time) &optional; }; ## Create a summary statistic. @@ -134,19 +140,23 @@ export { ## obs: The data point to send into the stream. global observe: function(id: string, key: SumStats::Key, obs: SumStats::Observation); - ## This record is primarily used for internal threshold tracking. - type Thresholding: record { - # Internal use only. Indicates if a simple threshold was already crossed. - is_threshold_crossed: bool &default=F; - - # Internal use only. Current key for threshold series. - threshold_series_index: count &default=0; - }; + ## Dynamically request a sumstat key. This function should be + ## used sparingly and not as a replacement for the callbacks + ## from the :bro:see:`SumStat` record. The function is only + ## available for use within "when" statements as an asynchronous + ## function. + ## + ## ss_name: SumStat name. + ## + ## key: The SumStat key being requested. + ## + ## Returns: The result for the requested sumstat key. + global request_key: function(ss_name: string, key: Key): Result; ## This event is generated when thresholds are reset for a SumStat. ## - ## ssid: SumStats ID that thresholds were reset for. - global thresholds_reset: event(ssid: string); + ## name: SumStats name that thresholds were reset for. + global thresholds_reset: event(name: string); ## Helper function to represent a :bro:type:`SumStats::Key` value as ## a simple string. @@ -157,18 +167,49 @@ export { global key2str: function(key: SumStats::Key): string; } +# Type to store a table of sumstats results indexed by keys. +type ResultTable: table[Key] of Result; + +# The function prototype for plugins to do calculations. +type ObserveFunc: function(r: Reducer, val: double, data: Observation, rv: ResultVal); + redef record Reducer += { - # Internal use only. Provides a reference back to the related SumStats by it's ID. - sid: string &optional; + # Internal use only. Provides a reference back to the related SumStats by its name. + ssname: string &optional; + + calc_funcs: vector of Calculation &optional; }; # Internal use only. For tracking thresholds per sumstat and key. -global threshold_tracker: table[string] of table[Key] of Thresholding &optional; +# In the case of a single threshold, 0 means the threshold isn't crossed. +# In the case of a threshold series, the number tracks the threshold offset. +global threshold_tracker: table[string] of table[Key] of count; -redef record SumStat += { - # Internal use only (mostly for cluster coherency). - id: string &optional; -}; +function increment_threshold_tracker(ss_name: string, key: Key) + { + if ( ss_name !in threshold_tracker ) + threshold_tracker[ss_name] = table(); + if ( key !in threshold_tracker[ss_name] ) + threshold_tracker[ss_name][key] = 0; + + ++threshold_tracker[ss_name][key]; + } + +function get_threshold_index(ss_name: string, key: Key): count + { + if ( ss_name !in threshold_tracker ) + return 0; + if ( key !in threshold_tracker[ss_name] ) + return 0; + + return threshold_tracker[ss_name][key]; + } + +# Prototype the hook point for plugins to initialize any result values. +global init_resultval_hook: hook(r: Reducer, rv: ResultVal); + +# Prototype the hook point for plugins to merge Results. +global compose_resultvals_hook: hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal); # Store of sumstats indexed on the sumstat id. global stats_store: table[string] of SumStat = table(); @@ -182,20 +223,20 @@ global result_store: table[string] of ResultTable = table(); # Store of threshold information. global thresholds_store: table[string, Key] of bool = table(); +# Store the calculations. +global calc_store: table[Calculation] of ObserveFunc = table(); + +# Store the dependencies for Calculations. +global calc_deps: table[Calculation] of vector of Calculation = table(); + +# Hook for registering observation calculation plugins. +global register_observe_plugins: hook(); + # This is called whenever key values are updated and the new val is given as the # `val` argument. It's only prototyped here because cluster and non-cluster have # separate implementations. global data_added: function(ss: SumStat, key: Key, result: Result); -# Prototype the hook point for plugins to do calculations. -global observe_hook: hook(r: Reducer, val: double, data: Observation, rv: ResultVal); - -# Prototype the hook point for plugins to initialize any result values. -global init_resultval_hook: hook(r: Reducer, rv: ResultVal); - -# Prototype the hook point for plugins to merge Results. -global compose_resultvals_hook: hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal); - # Event that is used to "finish" measurements and adapt the measurement # framework for clustered or non-clustered usage. global finish_epoch: event(ss: SumStat); @@ -210,6 +251,24 @@ function key2str(key: Key): string return fmt("sumstats_key(%s)", out); } +function register_observe_plugin(calc: Calculation, func: ObserveFunc) + { + calc_store[calc] = func; + } + +function add_observe_plugin_dependency(calc: Calculation, depends_on: Calculation) + { + if ( calc !in calc_deps ) + calc_deps[calc] = vector(); + calc_deps[calc][|calc_deps[calc]|] = depends_on; + } + +event bro_init() &priority=100000 + { + # Call all of the plugin registration hooks + hook register_observe_plugins(); + } + function init_resultval(r: Reducer): ResultVal { local rv: ResultVal = [$begin=network_time(), $end=network_time()]; @@ -234,25 +293,17 @@ function compose_results(r1: Result, r2: Result): Result { local result: Result = table(); - if ( |r1| > |r2| ) + for ( id in r1 ) { - for ( data_id in r1 ) - { - if ( data_id in r2 ) - result[data_id] = compose_resultvals(r1[data_id], r2[data_id]); - else - result[data_id] = r1[data_id]; - } + result[id] = r1[id]; } - else + + for ( id in r2 ) { - for ( data_id in r2 ) - { - if ( data_id in r1 ) - result[data_id] = compose_resultvals(r1[data_id], r2[data_id]); - else - result[data_id] = r2[data_id]; - } + if ( id in r1 ) + result[id] = compose_resultvals(r1[id], r2[id]); + else + result[id] = r2[id]; } return result; @@ -261,18 +312,43 @@ function compose_results(r1: Result, r2: Result): Result function reset(ss: SumStat) { - if ( ss$id in result_store ) - delete result_store[ss$id]; + if ( ss$name in result_store ) + delete result_store[ss$name]; - result_store[ss$id] = table(); + result_store[ss$name] = table(); - if ( ss?$threshold || ss?$threshold_series ) + if ( ss$name in threshold_tracker ) { - threshold_tracker[ss$id] = table(); - event SumStats::thresholds_reset(ss$id); + delete threshold_tracker[ss$name]; + threshold_tracker[ss$name] = table(); + event SumStats::thresholds_reset(ss$name); } } +# This could potentially recurse forever, but plugin authors +# should be making sure they aren't causing reflexive dependencies. +function add_calc_deps(calcs: vector of Calculation, c: Calculation) + { + #print fmt("Checking for deps for %s", c); + for ( i in calc_deps[c] ) + { + local skip_calc=F; + for ( j in calcs ) + { + if ( calcs[j] == calc_deps[c][i] ) + skip_calc=T; + } + if ( ! skip_calc ) + { + if ( calc_deps[c][i] in calc_deps ) + add_calc_deps(calcs, calc_deps[c][i]); + calcs[|c|] = calc_deps[c][i]; + #print fmt("add dep for %s [%s] ", c, calc_deps[c][i]); + } + } + + } + function create(ss: SumStat) { if ( (ss?$threshold || ss?$threshold_series) && ! ss?$threshold_val ) @@ -280,14 +356,34 @@ function create(ss: SumStat) Reporter::error("SumStats given a threshold with no $threshold_val function"); } - if ( ! ss?$id ) - ss$id=unique_id(""); - threshold_tracker[ss$id] = table(); - stats_store[ss$id] = ss; + stats_store[ss$name] = ss; + + if ( ss?$threshold || ss?$threshold_series ) + threshold_tracker[ss$name] = table(); for ( reducer in ss$reducers ) { - reducer$sid = ss$id; + reducer$ssname = ss$name; + reducer$calc_funcs = vector(); + for ( calc in reducer$apply ) + { + # Add in dependencies recursively. + if ( calc in calc_deps ) + add_calc_deps(reducer$calc_funcs, calc); + + # Don't add this calculation to the vector if + # it was already added by something else as a + # dependency. + local skip_calc=F; + for ( j in reducer$calc_funcs ) + { + if ( calc == reducer$calc_funcs[j] ) + skip_calc=T; + } + if ( ! skip_calc ) + reducer$calc_funcs[|reducer$calc_funcs|] = calc; + } + if ( reducer$stream !in reducer_store ) reducer_store[reducer$stream] = set(); add reducer_store[reducer$stream][reducer]; @@ -313,9 +409,9 @@ function observe(id: string, key: Key, obs: Observation) if ( r?$pred && ! r$pred(key, obs) ) next; - local ss = stats_store[r$sid]; + local ss = stats_store[r$ssname]; - # If there is a threshold and no epoch_finished callback + # If there is a threshold and no epoch_result callback # we don't need to continue counting since the data will # never be accessed. This was leading # to some state management issues when measuring @@ -323,18 +419,21 @@ function observe(id: string, key: Key, obs: Observation) # NOTE: this optimization could need removed in the # future if on demand access is provided to the # SumStats results. - if ( ! ss?$epoch_finished && - r$sid in threshold_tracker && - key in threshold_tracker[r$sid] && + if ( ! ss?$epoch_result && + r$ssname in threshold_tracker && ( ss?$threshold && - threshold_tracker[r$sid][key]$is_threshold_crossed ) || + key in threshold_tracker[r$ssname] && + threshold_tracker[r$ssname][key] != 0 ) || ( ss?$threshold_series && - threshold_tracker[r$sid][key]$threshold_series_index+1 == |ss$threshold_series| ) ) + key in threshold_tracker[r$ssname] && + threshold_tracker[r$ssname][key] == |ss$threshold_series| ) ) + { next; + } - if ( r$sid !in result_store ) - result_store[ss$id] = table(); - local results = result_store[r$sid]; + if ( r$ssname !in result_store ) + result_store[r$ssname] = table(); + local results = result_store[r$ssname]; if ( key !in results ) results[key] = table(); @@ -350,10 +449,13 @@ function observe(id: string, key: Key, obs: Observation) # If a string was given, fall back to 1.0 as the value. local val = 1.0; - if ( obs?$num || obs?$dbl ) - val = obs?$dbl ? obs$dbl : obs$num; + if ( obs?$num ) + val = obs$num; + else if ( obs?$dbl ) + val = obs$dbl; - hook observe_hook(r, val, obs, result_val); + for ( i in r$calc_funcs ) + calc_store[r$calc_funcs[i]](r, val, obs, result_val); data_added(ss, key, result); } } @@ -362,10 +464,12 @@ function observe(id: string, key: Key, obs: Observation) # mid-break-interval threshold crossing detection for cluster deployments. function check_thresholds(ss: SumStat, key: Key, result: Result, modify_pct: double): bool { - if ( ! (ss?$threshold || ss?$threshold_series) ) + if ( ! (ss?$threshold || ss?$threshold_series || ss?$threshold_crossed) ) return F; # Add in the extra ResultVals to make threshold_vals easier to write. + # This length comparison should work because we just need to make + # sure that we have the same number of reducers and results. if ( |ss$reducers| != |result| ) { for ( reducer in ss$reducers ) @@ -378,28 +482,21 @@ function check_thresholds(ss: SumStat, key: Key, result: Result, modify_pct: dou local watch = ss$threshold_val(key, result); if ( modify_pct < 1.0 && modify_pct > 0.0 ) - watch = double_to_count(floor(watch/modify_pct)); + watch = watch/modify_pct; - if ( ss$id !in threshold_tracker ) - threshold_tracker[ss$id] = table(); - local t_tracker = threshold_tracker[ss$id]; + local t_index = get_threshold_index(ss$name, key); - if ( key !in t_tracker ) - { - local ttmp: Thresholding; - t_tracker[key] = ttmp; - } - local tt = t_tracker[key]; - - if ( ss?$threshold && ! tt$is_threshold_crossed && watch >= ss$threshold ) + if ( ss?$threshold && + t_index == 0 && # Check that the threshold hasn't already been crossed. + watch >= ss$threshold ) { # Value crossed the threshold. return T; } if ( ss?$threshold_series && - |ss$threshold_series| >= tt$threshold_series_index && - watch >= ss$threshold_series[tt$threshold_series_index] ) + |ss$threshold_series| > t_index && # Check if there are more thresholds. + watch >= ss$threshold_series[t_index] ) { # A threshold series was given and the value crossed the next # value in the series. @@ -415,6 +512,8 @@ function threshold_crossed(ss: SumStat, key: Key, result: Result) if ( ! ss?$threshold_crossed ) return; + increment_threshold_tracker(ss$name,key); + # Add in the extra ResultVals to make threshold_crossed callbacks easier to write. if ( |ss$reducers| != |result| ) { @@ -426,11 +525,5 @@ function threshold_crossed(ss: SumStat, key: Key, result: Result) } ss$threshold_crossed(key, result); - local tt = threshold_tracker[ss$id][key]; - tt$is_threshold_crossed = T; - - # Bump up to the next threshold series index if a threshold series is being used. - if ( ss?$threshold_series ) - ++tt$threshold_series_index; } diff --git a/scripts/base/frameworks/sumstats/non-cluster.bro b/scripts/base/frameworks/sumstats/non-cluster.bro index f27d4b5cfb..97e1817598 100644 --- a/scripts/base/frameworks/sumstats/non-cluster.bro +++ b/scripts/base/frameworks/sumstats/non-cluster.bro @@ -4,11 +4,20 @@ module SumStats; event SumStats::finish_epoch(ss: SumStat) { - if ( ss$id in result_store ) + if ( ss$name in result_store ) { - local data = result_store[ss$id]; + local now = network_time(); + + if ( ss?$epoch_result ) + { + local data = result_store[ss$name]; + # TODO: don't block here. + for ( key in data ) + ss$epoch_result(now, key, data[key]); + } + if ( ss?$epoch_finished ) - ss$epoch_finished(data); + ss$epoch_finished(now); reset(ss); } @@ -16,9 +25,32 @@ event SumStats::finish_epoch(ss: SumStat) schedule ss$epoch { SumStats::finish_epoch(ss) }; } - function data_added(ss: SumStat, key: Key, result: Result) { if ( check_thresholds(ss, key, result, 1.0) ) threshold_crossed(ss, key, result); } + +function request(ss_name: string): ResultTable + { + # This only needs to be implemented this way for cluster compatibility. + return when ( T ) + { + if ( ss_name in result_store ) + return result_store[ss_name]; + else + return table(); + } + } + +function request_key(ss_name: string, key: Key): Result + { + # This only needs to be implemented this way for cluster compatibility. + return when ( T ) + { + if ( ss_name in result_store && key in result_store[ss_name] ) + return result_store[ss_name][key]; + else + return table(); + } + } \ No newline at end of file diff --git a/scripts/base/frameworks/sumstats/plugins/__load__.bro b/scripts/base/frameworks/sumstats/plugins/__load__.bro index 1033ad4549..0b57597e58 100644 --- a/scripts/base/frameworks/sumstats/plugins/__load__.bro +++ b/scripts/base/frameworks/sumstats/plugins/__load__.bro @@ -6,5 +6,6 @@ @load ./sample @load ./std-dev @load ./sum +@load ./topk @load ./unique @load ./variance diff --git a/scripts/base/frameworks/sumstats/plugins/average.bro b/scripts/base/frameworks/sumstats/plugins/average.bro index ad82a91d20..8f7f7b568f 100644 --- a/scripts/base/frameworks/sumstats/plugins/average.bro +++ b/scripts/base/frameworks/sumstats/plugins/average.bro @@ -1,4 +1,4 @@ -@load base/frameworks/sumstats/main +@load ../main module SumStats; @@ -14,17 +14,18 @@ export { }; } -hook observe_hook(r: Reducer, val: double, obs: Observation, rv: ResultVal) +hook register_observe_plugins() { - if ( AVERAGE in r$apply ) + register_observe_plugin(AVERAGE, function(r: Reducer, val: double, obs: Observation, rv: ResultVal) { if ( ! rv?$average ) rv$average = val; else rv$average += (val - rv$average) / rv$num; - } + }); } + hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal) { if ( rv1?$average && rv2?$average ) diff --git a/scripts/base/frameworks/sumstats/plugins/hll_unique.bro b/scripts/base/frameworks/sumstats/plugins/hll_unique.bro index 41889532f3..3f17571f87 100644 --- a/scripts/base/frameworks/sumstats/plugins/hll_unique.bro +++ b/scripts/base/frameworks/sumstats/plugins/hll_unique.bro @@ -30,23 +30,20 @@ redef record ResultVal += { hll_error_margin: double &optional; }; - -hook init_resultval_hook(r: Reducer, rv: ResultVal) +hook register_observe_plugins() { - if ( HLLUNIQUE in r$apply && ! rv?$card ) - rv$card = hll_cardinality_init(r$hll_error_margin); - rv$hll_error_margin = r$hll_error_margin; - rv$hllunique = 0; - } - - -hook observe_hook(r: Reducer, val: double, obs: Observation, rv: ResultVal) - { - if ( HLLUNIQUE in r$apply ) + register_observe_plugin(HLLUNIQUE, function(r: Reducer, val: double, obs: Observation, rv: ResultVal) { + if ( ! rv?$card ) + { + rv$card = hll_cardinality_init(r$hll_error_margin); + rv$hll_error_margin = r$hll_error_margin; + rv$hllunique = 0; + } + hll_cardinality_add(rv$card, obs); - rv$hllunique = double_to_count(hll_cardinality_estimate(rv$card)); - } + rv$hllunique = double_to_count(hll_cardinality_estimate(rv$card)); + }); } hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal) diff --git a/scripts/base/frameworks/sumstats/plugins/last.bro b/scripts/base/frameworks/sumstats/plugins/last.bro index daebe30cf5..24376f8a2b 100644 --- a/scripts/base/frameworks/sumstats/plugins/last.bro +++ b/scripts/base/frameworks/sumstats/plugins/last.bro @@ -33,16 +33,20 @@ function get_last(rv: ResultVal): vector of Observation return s; } -hook observe_hook(r: Reducer, val: double, obs: Observation, rv: ResultVal) +hook register_observe_plugins() { - if ( LAST in r$apply && r$num_last_elements > 0 ) + register_observe_plugin(LAST, function(r: Reducer, val: double, obs: Observation, rv: ResultVal) { - if ( ! rv?$last_elements ) - rv$last_elements = Queue::init([$max_len=r$num_last_elements]); - Queue::put(rv$last_elements, obs); - } + if ( r$num_last_elements > 0 ) + { + if ( ! rv?$last_elements ) + rv$last_elements = Queue::init([$max_len=r$num_last_elements]); + Queue::put(rv$last_elements, obs); + } + }); } + hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal) { # Merge $samples diff --git a/scripts/base/frameworks/sumstats/plugins/max.bro b/scripts/base/frameworks/sumstats/plugins/max.bro index f9ff9258ee..d43ad9dc38 100644 --- a/scripts/base/frameworks/sumstats/plugins/max.bro +++ b/scripts/base/frameworks/sumstats/plugins/max.bro @@ -1,4 +1,4 @@ -@load base/frameworks/sumstats/main +@load ../main module SumStats; @@ -14,15 +14,15 @@ export { }; } -hook observe_hook(r: Reducer, val: double, obs: Observation, rv: ResultVal) +hook register_observe_plugins() { - if ( MAX in r$apply ) + register_observe_plugin(MAX, function(r: Reducer, val: double, obs: Observation, rv: ResultVal) { if ( ! rv?$max ) rv$max = val; else if ( val > rv$max ) rv$max = val; - } + }); } hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal) diff --git a/scripts/base/frameworks/sumstats/plugins/min.bro b/scripts/base/frameworks/sumstats/plugins/min.bro index 95d492f428..014755cf32 100644 --- a/scripts/base/frameworks/sumstats/plugins/min.bro +++ b/scripts/base/frameworks/sumstats/plugins/min.bro @@ -1,4 +1,4 @@ -@load base/frameworks/sumstats/main +@load ../main module SumStats; @@ -14,17 +14,18 @@ export { }; } -hook observe_hook(r: Reducer, val: double, obs: Observation, rv: ResultVal) +hook register_observe_plugins() { - if ( MIN in r$apply ) + register_observe_plugin(MIN, function(r: Reducer, val: double, obs: Observation, rv: ResultVal) { if ( ! rv?$min ) rv$min = val; else if ( val < rv$min ) rv$min = val; - } + }); } + hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal) { if ( rv1?$min && rv2?$min ) diff --git a/scripts/base/frameworks/sumstats/plugins/sample.bro b/scripts/base/frameworks/sumstats/plugins/sample.bro index 9ce85c7795..c270ef42fd 100644 --- a/scripts/base/frameworks/sumstats/plugins/sample.bro +++ b/scripts/base/frameworks/sumstats/plugins/sample.bro @@ -47,15 +47,14 @@ function sample_add_sample(obs:Observation, rv: ResultVal) if ( ra < rv$num_samples ) rv$samples[ra] = obs; } - } -hook observe_hook(r: Reducer, val: double, obs: Observation, rv: ResultVal) +hook register_observe_plugins() { - if ( SAMPLE in r$apply ) + register_observe_plugin(SAMPLE, function(r: Reducer, val: double, obs: Observation, rv: ResultVal) { sample_add_sample(obs, rv); - } + }); } hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal) @@ -75,7 +74,6 @@ hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal) return; } - if ( |rv1$samples| != num_samples && |rv2$samples| < num_samples ) { if ( |rv1$samples| != rv1$sample_elements || |rv2$samples| < rv2$sample_elements ) diff --git a/scripts/base/frameworks/sumstats/plugins/std-dev.bro b/scripts/base/frameworks/sumstats/plugins/std-dev.bro index 0f32e25a68..2e5b95b212 100644 --- a/scripts/base/frameworks/sumstats/plugins/std-dev.bro +++ b/scripts/base/frameworks/sumstats/plugins/std-dev.bro @@ -1,5 +1,5 @@ -@load base/frameworks/sumstats/main @load ./variance +@load ../main module SumStats; @@ -21,11 +21,18 @@ function calc_std_dev(rv: ResultVal) rv$std_dev = sqrt(rv$variance); } -# This depends on the variance plugin which uses priority -5 -hook observe_hook(r: Reducer, val: double, obs: Observation, rv: ResultVal) &priority=-10 +hook std_dev_hook(r: Reducer, val: double, obs: Observation, rv: ResultVal) { - if ( STD_DEV in r$apply ) + calc_std_dev(rv); + } + +hook register_observe_plugins() &priority=-10 + { + register_observe_plugin(STD_DEV, function(r: Reducer, val: double, obs: Observation, rv: ResultVal) + { calc_std_dev(rv); + }); + add_observe_plugin_dependency(STD_DEV, VARIANCE); } hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal) &priority=-10 diff --git a/scripts/base/frameworks/sumstats/plugins/sum.bro b/scripts/base/frameworks/sumstats/plugins/sum.bro index db2246742b..074b4b72f3 100644 --- a/scripts/base/frameworks/sumstats/plugins/sum.bro +++ b/scripts/base/frameworks/sumstats/plugins/sum.bro @@ -1,4 +1,4 @@ -@load base/frameworks/sumstats/main +@load ../main module SumStats; @@ -14,19 +14,19 @@ export { sum: double &default=0.0; }; - type threshold_function: function(key: SumStats::Key, result: SumStats::Result): count; - global sum_threshold: function(data_id: string): threshold_function; + #type threshold_function: function(key: SumStats::Key, result: SumStats::Result): count; + #global sum_threshold: function(data_id: string): threshold_function; } -function sum_threshold(data_id: string): threshold_function - { - return function(key: SumStats::Key, result: SumStats::Result): count - { - print fmt("data_id: %s", data_id); - print result; - return double_to_count(result[data_id]$sum); - }; - } +#function sum_threshold(data_id: string): threshold_function +# { +# return function(key: SumStats::Key, result: SumStats::Result): count +# { +# print fmt("data_id: %s", data_id); +# print result; +# return double_to_count(result[data_id]$sum); +# }; +# } hook init_resultval_hook(r: Reducer, rv: ResultVal) { @@ -34,10 +34,12 @@ hook init_resultval_hook(r: Reducer, rv: ResultVal) rv$sum = 0; } -hook observe_hook(r: Reducer, val: double, obs: Observation, rv: ResultVal) +hook register_observe_plugins() { - if ( SUM in r$apply ) + register_observe_plugin(SUM, function(r: Reducer, val: double, obs: Observation, rv: ResultVal) + { rv$sum += val; + }); } hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal) diff --git a/scripts/base/frameworks/sumstats/plugins/topk.bro b/scripts/base/frameworks/sumstats/plugins/topk.bro new file mode 100644 index 0000000000..cb90af962e --- /dev/null +++ b/scripts/base/frameworks/sumstats/plugins/topk.bro @@ -0,0 +1,52 @@ +@load base/frameworks/sumstats + +module SumStats; + +export { + redef record Reducer += { + ## number of elements to keep in the top-k list + topk_size: count &default=500; + }; + + redef enum Calculation += { + TOPK + }; + + redef record ResultVal += { + topk: opaque of topk &optional; + }; + +} + +hook register_observe_plugins() + { + register_observe_plugin(TOPK, function(r: Reducer, val: double, obs: Observation, rv: ResultVal) + { + topk_add(rv$topk, obs); + }); + } + +hook init_resultval_hook(r: Reducer, rv: ResultVal) + { + if ( TOPK in r$apply && ! rv?$topk ) + rv$topk = topk_init(r$topk_size); + } + +hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal) + { + if ( rv1?$topk ) + { + result$topk = topk_init(topk_size(rv1$topk)); + + topk_merge(result$topk, rv1$topk); + + if ( rv2?$topk ) + topk_merge(result$topk, rv2$topk); + } + + else if ( rv2?$topk ) + { + result$topk = topk_init(topk_size(rv2$topk)); + topk_merge(result$topk, rv2$topk); + } + } diff --git a/scripts/base/frameworks/sumstats/plugins/unique.bro b/scripts/base/frameworks/sumstats/plugins/unique.bro index b8bfc6a4e2..011949ce2f 100644 --- a/scripts/base/frameworks/sumstats/plugins/unique.bro +++ b/scripts/base/frameworks/sumstats/plugins/unique.bro @@ -1,4 +1,4 @@ -@load base/frameworks/sumstats/main +@load ../main module SumStats; @@ -23,15 +23,15 @@ redef record ResultVal += { unique_vals: set[Observation] &optional; }; -hook observe_hook(r: Reducer, val: double, obs: Observation, rv: ResultVal) +hook register_observe_plugins() { - if ( UNIQUE in r$apply ) + register_observe_plugin(UNIQUE, function(r: Reducer, val: double, obs: Observation, rv: ResultVal) { if ( ! rv?$unique_vals ) rv$unique_vals=set(); add rv$unique_vals[obs]; rv$unique = |rv$unique_vals|; - } + }); } hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal) diff --git a/scripts/base/frameworks/sumstats/plugins/variance.bro b/scripts/base/frameworks/sumstats/plugins/variance.bro index 773c7d697c..12d30cc4fe 100644 --- a/scripts/base/frameworks/sumstats/plugins/variance.bro +++ b/scripts/base/frameworks/sumstats/plugins/variance.bro @@ -1,5 +1,5 @@ -@load base/frameworks/sumstats/main @load ./average +@load ../main module SumStats; @@ -28,17 +28,17 @@ function calc_variance(rv: ResultVal) rv$variance = (rv$num > 1) ? rv$var_s/(rv$num-1) : 0.0; } -# Reduced priority since this depends on the average -hook observe_hook(r: Reducer, val: double, obs: Observation, rv: ResultVal) &priority=-5 +hook register_observe_plugins() &priority=-5 { - if ( VARIANCE in r$apply ) + register_observe_plugin(VARIANCE, function(r: Reducer, val: double, obs: Observation, rv: ResultVal) { if ( rv$num > 1 ) rv$var_s += ((val - rv$prev_avg) * (val - rv$average)); calc_variance(rv); rv$prev_avg = rv$average; - } + }); + add_observe_plugin_dependency(VARIANCE, AVERAGE); } # Reduced priority since this depends on the average diff --git a/scripts/base/init-bare.bro b/scripts/base/init-bare.bro index 7ee4c627b3..73176ccb94 100644 --- a/scripts/base/init-bare.bro +++ b/scripts/base/init-bare.bro @@ -531,22 +531,19 @@ type record_field_table: table[string] of record_field; # dependent on the names remaining as they are now. ## Set of BPF capture filters to use for capturing, indexed by a user-definable -## ID (which must be unique). If Bro is *not* configured to examine -## :bro:id:`PacketFilter::all_packets`, all packets matching at least -## one of the filters in this table (and all in :bro:id:`restrict_filters`) -## will be analyzed. +## ID (which must be unique). If Bro is *not* configured with +## :bro:id:`PacketFilter::enable_auto_protocol_capture_filters`, +## all packets matching at least one of the filters in this table (and all in +## :bro:id:`restrict_filters`) will be analyzed. ## -## .. bro:see:: PacketFilter PacketFilter::all_packets +## .. bro:see:: PacketFilter PacketFilter::enable_auto_protocol_capture_filters ## PacketFilter::unrestricted_filter restrict_filters global capture_filters: table[string] of string &redef; ## Set of BPF filters to restrict capturing, indexed by a user-definable ID (which -## must be unique). If Bro is *not* configured to examine -## :bro:id:`PacketFilter::all_packets`, only packets matching *all* of the -## filters in this table (and any in :bro:id:`capture_filters`) will be -## analyzed. +## must be unique). ## -## .. bro:see:: PacketFilter PacketFilter::all_packets +## .. bro:see:: PacketFilter PacketFilter::enable_auto_protocol_capture_filters ## PacketFilter::unrestricted_filter capture_filters global restrict_filters: table[string] of string &redef; @@ -3041,6 +3038,11 @@ module GLOBAL; ## Number of bytes per packet to capture from live interfaces. const snaplen = 8192 &redef; +## Seed for hashes computed internally for probabilistic data structures. Using +## the same value here will make the hashes compatible between independent Bro +## instances. If left unset, Bro will use a temporary local seed. +const global_hash_seed: string = "" &redef; + # Load BiFs defined by plugins. @load base/bif/plugins diff --git a/scripts/base/init-default.bro b/scripts/base/init-default.bro index 6e348cfffd..61376c7de4 100644 --- a/scripts/base/init-default.bro +++ b/scripts/base/init-default.bro @@ -39,6 +39,7 @@ @load base/frameworks/tunnels @load base/protocols/conn +@load base/protocols/dhcp @load base/protocols/dns @load base/protocols/ftp @load base/protocols/http diff --git a/scripts/base/protocols/dhcp/__load__.bro b/scripts/base/protocols/dhcp/__load__.bro new file mode 100644 index 0000000000..c04423a855 --- /dev/null +++ b/scripts/base/protocols/dhcp/__load__.bro @@ -0,0 +1,4 @@ +@load ./consts +@load ./main + +@load-sigs ./dpd.sig diff --git a/scripts/base/protocols/dhcp/consts.bro b/scripts/base/protocols/dhcp/consts.bro new file mode 100644 index 0000000000..1b2a271563 --- /dev/null +++ b/scripts/base/protocols/dhcp/consts.bro @@ -0,0 +1,20 @@ +##! Types, errors, and fields for analyzing DHCP data. A helper file +##! for DHCP analysis scripts. + +module DHCP; + +export { + + ## Types of DHCP messages. See RFC 1533. + const message_types = { + [1] = "DHCP_DISCOVER", + [2] = "DHCP_OFFER", + [3] = "DHCP_REQUEST", + [4] = "DHCP_DECLINE", + [5] = "DHCP_ACK", + [6] = "DHCP_NAK", + [7] = "DHCP_RELEASE", + [8] = "DHCP_INFORM", + } &default = function(n: count): string { return fmt("unknown-message-type-%d", n); }; + +} diff --git a/scripts/base/protocols/dhcp/dpd.sig b/scripts/base/protocols/dhcp/dpd.sig new file mode 100644 index 0000000000..010920e2d8 --- /dev/null +++ b/scripts/base/protocols/dhcp/dpd.sig @@ -0,0 +1,5 @@ +signature dhcp_cookie { + ip-proto == udp + payload /^.*\x63\x82\x53\x63/ + enable "dhcp" +} \ No newline at end of file diff --git a/scripts/base/protocols/dhcp/main.bro b/scripts/base/protocols/dhcp/main.bro new file mode 100644 index 0000000000..144e5a53e7 --- /dev/null +++ b/scripts/base/protocols/dhcp/main.bro @@ -0,0 +1,75 @@ +##! Analyzes DHCP traffic in order to log DHCP leases given to clients. +##! This script ignores large swaths of the protocol, since it is rather +##! noisy on most networks, and focuses on the end-result: assigned leases. +##! +##! If you'd like to track known DHCP devices and to log the hostname +##! supplied by the client, see policy/protocols/dhcp/known-devices.bro + +@load ./utils.bro + +module DHCP; + +export { + redef enum Log::ID += { LOG }; + + ## The record type which contains the column fields of the DHCP log. + type Info: record { + ## The earliest time at which a DHCP message over the + ## associated connection is observed. + ts: time &log; + ## A unique identifier of the connection over which DHCP is + ## occuring. + uid: string &log; + ## The connection's 4-tuple of endpoint addresses/ports. + id: conn_id &log; + ## Client's hardware address. + mac: string &log &optional; + ## Client's actual assigned IP address. + assigned_ip: addr &log &optional; + ## IP address lease interval. + lease_time: interval &log &optional; + ## A random number choosen by the client for this transaction. + trans_id: count &log; + }; + + ## Event that can be handled to access the DHCP + ## record as it is sent on to the logging framework. + global log_dhcp: event(rec: Info); +} + +# Add the dhcp info to the connection record +redef record connection += { + dhcp: Info &optional; +}; + +# 67/udp is the server's port, 68/udp the client. +const ports = { 67/udp, 68/udp }; +redef likely_server_ports += { 67/udp }; + +event bro_init() + { + Log::create_stream(DHCP::LOG, [$columns=Info, $ev=log_dhcp]); + Analyzer::register_for_ports(Analyzer::ANALYZER_DHCP, ports); + } + +event dhcp_ack(c: connection, msg: dhcp_msg, mask: addr, router: dhcp_router_list, lease: interval, serv_addr: addr, host_name: string) + { + local info: Info; + info$ts = network_time(); + info$id = c$id; + info$uid = c$uid; + info$lease_time = lease; + info$trans_id = msg$xid; + + if ( msg$h_addr != "" ) + info$mac = msg$h_addr; + + if ( reverse_ip(msg$yiaddr) != 0.0.0.0 ) + info$assigned_ip = reverse_ip(msg$yiaddr); + else + info$assigned_ip = c$id$orig_h; + + c$dhcp = info; + + Log::write(DHCP::LOG, c$dhcp); + } diff --git a/scripts/base/protocols/dhcp/utils.bro b/scripts/base/protocols/dhcp/utils.bro new file mode 100644 index 0000000000..cb06450088 --- /dev/null +++ b/scripts/base/protocols/dhcp/utils.bro @@ -0,0 +1,21 @@ +##! Utilities specific for DHCP processing. + +@load ./main + +module DHCP; + +export { + ## Reverse the octets of an IPv4 IP. + ## + ## ip: An :bro:type:`addr` IPv4 address. + ## + ## Returns: A reversed addr. + global reverse_ip: function(ip: addr): addr; +} + +function reverse_ip(ip: addr): addr + { + local octets = split(cat(ip), /\./); + return to_addr(cat(octets[4], ".", octets[3], ".", octets[2], ".", octets[1])); + } + diff --git a/scripts/base/protocols/ftp/__load__.bro b/scripts/base/protocols/ftp/__load__.bro index ebb09e702c..3ddd8a2dc2 100644 --- a/scripts/base/protocols/ftp/__load__.bro +++ b/scripts/base/protocols/ftp/__load__.bro @@ -1,4 +1,5 @@ @load ./utils-commands +@load ./info @load ./main @load ./utils @load ./files diff --git a/scripts/base/protocols/ftp/files.bro b/scripts/base/protocols/ftp/files.bro index 9ed17ab2a4..b507ca32a7 100644 --- a/scripts/base/protocols/ftp/files.bro +++ b/scripts/base/protocols/ftp/files.bro @@ -1,3 +1,4 @@ +@load ./info @load ./main @load ./utils @load base/utils/conn-ids diff --git a/scripts/base/protocols/ftp/gridftp.bro b/scripts/base/protocols/ftp/gridftp.bro index 57752b1cbd..73bd656544 100644 --- a/scripts/base/protocols/ftp/gridftp.bro +++ b/scripts/base/protocols/ftp/gridftp.bro @@ -19,6 +19,7 @@ ##! sizes are not logged, but at the benefit of saving CPU cycles that ##! otherwise go to analyzing the large (and likely benign) connections. +@load ./info @load ./main @load base/protocols/conn @load base/protocols/ssl diff --git a/scripts/base/protocols/ftp/info.bro b/scripts/base/protocols/ftp/info.bro new file mode 100644 index 0000000000..f6fceb071e --- /dev/null +++ b/scripts/base/protocols/ftp/info.bro @@ -0,0 +1,72 @@ +##! Defines data structures for tracking and logging FTP sessions. + +module FTP; + +@load ./utils-commands + +export { + + ## This setting changes if passwords used in FTP sessions are + ## captured or not. + const default_capture_password = F &redef; + + ## The expected endpoints of an FTP data channel. + type ExpectedDataChannel: record { + ## Whether PASV mode is toggled for control channel. + passive: bool &log; + ## The host that will be initiating the data connection. + orig_h: addr &log; + ## The host that will be accepting the data connection. + resp_h: addr &log; + ## The port at which the acceptor is listening for the data connection. + resp_p: port &log; + }; + + type Info: record { + ## Time when the command was sent. + ts: time &log; + ## Unique ID for the connection. + uid: string &log; + ## The connection's 4-tuple of endpoint addresses/ports. + id: conn_id &log; + ## User name for the current FTP session. + user: string &log &default=""; + ## Password for the current FTP session if captured. + password: string &log &optional; + ## Command given by the client. + command: string &log &optional; + ## Argument for the command if one is given. + arg: string &log &optional; + + ## Libmagic "sniffed" file type if the command indicates a file transfer. + mime_type: string &log &optional; + ## Size of the file if the command indicates a file transfer. + file_size: count &log &optional; + + ## Reply code from the server in response to the command. + reply_code: count &log &optional; + ## Reply message from the server in response to the command. + reply_msg: string &log &optional; + + ## Expected FTP data channel. + data_channel: ExpectedDataChannel &log &optional; + + ## Current working directory that this session is in. By making + ## the default value '.', we can indicate that unless something + ## more concrete is discovered that the existing but unknown + ## directory is ok to use. + cwd: string &default="."; + + ## Command that is currently waiting for a response. + cmdarg: CmdArg &optional; + ## Queue for commands that have been sent but not yet responded to + ## are tracked here. + pending_commands: PendingCmds; + + ## Indicates if the session is in active or passive mode. + passive: bool &default=F; + + ## Determines if the password will be captured for this request. + capture_password: bool &default=default_capture_password; + }; +} diff --git a/scripts/base/protocols/ftp/main.bro b/scripts/base/protocols/ftp/main.bro index 7e66b63f40..254dca7d42 100644 --- a/scripts/base/protocols/ftp/main.bro +++ b/scripts/base/protocols/ftp/main.bro @@ -3,6 +3,8 @@ ##! will take on the full path that the client is at along with the requested ##! file name. +@load ./info +@load ./utils @load ./utils-commands @load base/utils/paths @load base/utils/numbers @@ -20,72 +22,9 @@ export { "EPSV" } &redef; - ## This setting changes if passwords used in FTP sessions are captured or not. - const default_capture_password = F &redef; - ## User IDs that can be considered "anonymous". const guest_ids = { "anonymous", "ftp", "ftpuser", "guest" } &redef; - ## The expected endpoints of an FTP data channel. - type ExpectedDataChannel: record { - ## Whether PASV mode is toggled for control channel. - passive: bool &log; - ## The host that will be initiating the data connection. - orig_h: addr &log; - ## The host that will be accepting the data connection. - resp_h: addr &log; - ## The port at which the acceptor is listening for the data connection. - resp_p: port &log; - }; - - type Info: record { - ## Time when the command was sent. - ts: time &log; - ## Unique ID for the connection. - uid: string &log; - ## The connection's 4-tuple of endpoint addresses/ports. - id: conn_id &log; - ## User name for the current FTP session. - user: string &log &default=""; - ## Password for the current FTP session if captured. - password: string &log &optional; - ## Command given by the client. - command: string &log &optional; - ## Argument for the command if one is given. - arg: string &log &optional; - - ## Libmagic "sniffed" file type if the command indicates a file transfer. - mime_type: string &log &optional; - ## Size of the file if the command indicates a file transfer. - file_size: count &log &optional; - - ## Reply code from the server in response to the command. - reply_code: count &log &optional; - ## Reply message from the server in response to the command. - reply_msg: string &log &optional; - - ## Expected FTP data channel. - data_channel: ExpectedDataChannel &log &optional; - - ## Current working directory that this session is in. By making - ## the default value '.', we can indicate that unless something - ## more concrete is discovered that the existing but unknown - ## directory is ok to use. - cwd: string &default="."; - - ## Command that is currently waiting for a response. - cmdarg: CmdArg &optional; - ## Queue for commands that have been sent but not yet responded to - ## are tracked here. - pending_commands: PendingCmds; - - ## Indicates if the session is in active or passive mode. - passive: bool &default=F; - - ## Determines if the password will be captured for this request. - capture_password: bool &default=default_capture_password; - }; - ## This record is to hold a parsed FTP reply code. For example, for the ## 201 status code, the digits would be parsed as: x->2, y->0, z=>1. type ReplyCode: record { @@ -102,8 +41,6 @@ export { global log_ftp: event(rec: Info); } -@load ./utils - # Add the state tracking information variable to the connection record redef record connection += { ftp: Info &optional; diff --git a/scripts/base/protocols/ftp/utils.bro b/scripts/base/protocols/ftp/utils.bro index 8b92a37764..313280b904 100644 --- a/scripts/base/protocols/ftp/utils.bro +++ b/scripts/base/protocols/ftp/utils.bro @@ -1,7 +1,8 @@ ##! Utilities specific for FTP processing. -@load ./main +@load ./info @load base/utils/addrs +@load base/utils/paths module FTP; diff --git a/scripts/base/protocols/ssl/main.bro b/scripts/base/protocols/ssl/main.bro index 65526182ac..0d4a8435f0 100644 --- a/scripts/base/protocols/ssl/main.bro +++ b/scripts/base/protocols/ssl/main.bro @@ -67,11 +67,8 @@ export { ## (especially with large file transfers). const disable_analyzer_after_detection = T &redef; - ## The maximum amount of time a script can delay records from being logged. - const max_log_delay = 15secs &redef; - ## Delays an SSL record for a specific token: the record will not be logged - ## as longs the token exists or until :bro:id:`SSL::max_log_delay` elapses. + ## as longs the token exists or until 15 seconds elapses. global delay_log: function(info: Info, token: string); ## Undelays an SSL record for a previously inserted token, allowing the @@ -90,7 +87,7 @@ redef record connection += { redef record Info += { # Adding a string "token" to this set will cause the SSL script # to delay logging the record until either the token has been removed or - # the record has been delayed for :bro:id:`SSL::max_log_delay`. + # the record has been delayed. delay_tokens: set[string] &optional; }; @@ -138,7 +135,7 @@ function log_record(info: Info) { log_record(info); } - timeout SSL::max_log_delay + timeout 15secs { Reporter::info(fmt("SSL delay tokens not released in time (%s tokens remaining)", |info$delay_tokens|)); diff --git a/scripts/base/utils/dir.bro b/scripts/base/utils/dir.bro index 4f3ee94945..1ade4a47f7 100644 --- a/scripts/base/utils/dir.bro +++ b/scripts/base/utils/dir.bro @@ -28,7 +28,7 @@ event Dir::monitor_ev(dir: string, last_files: set[string], callback: function(fname: string), poll_interval: interval) { - when ( local result = Exec::run([$cmd=fmt("ls -i \"%s/\"", str_shell_escape(dir))]) ) + when ( local result = Exec::run([$cmd=fmt("ls -i -1 \"%s/\"", str_shell_escape(dir))]) ) { if ( result$exit_code != 0 ) { diff --git a/scripts/base/utils/exec.bro b/scripts/base/utils/exec.bro index 732bbcf34c..d505b424c7 100644 --- a/scripts/base/utils/exec.bro +++ b/scripts/base/utils/exec.bro @@ -163,6 +163,7 @@ function run(cmd: Command): Result Input::add_event([$name=cmd$uid, $source=fmt("%s |", cmd$cmd), $reader=Input::READER_RAW, + $mode=Input::STREAM, $fields=Exec::OneLine, $ev=Exec::line, $want_record=F, diff --git a/scripts/policy/frameworks/packet-filter/shunt.bro b/scripts/policy/frameworks/packet-filter/shunt.bro index b87369ee62..85ec189a17 100644 --- a/scripts/policy/frameworks/packet-filter/shunt.bro +++ b/scripts/policy/frameworks/packet-filter/shunt.bro @@ -34,8 +34,8 @@ export { global current_shunted_host_pairs: function(): set[conn_id]; redef enum Notice::Type += { - ## Indicative that :bro:id:`max_bpf_shunts` connections are already - ## being shunted with BPF filters and no more are allowed. + ## Indicative that :bro:id:`PacketFilter::max_bpf_shunts` connections + ## are already being shunted with BPF filters and no more are allowed. No_More_Conn_Shunts_Available, ## Limitations in BPF make shunting some connections with BPF impossible. diff --git a/scripts/policy/misc/app-metrics.bro b/scripts/policy/misc/app-metrics.bro deleted file mode 100644 index 3df38ad8ad..0000000000 --- a/scripts/policy/misc/app-metrics.bro +++ /dev/null @@ -1,109 +0,0 @@ -@load base/protocols/http -@load base/protocols/ssl -@load base/frameworks/sumstats - -module AppStats; - -export { - redef enum Log::ID += { LOG }; - - type Info: record { - ## Timestamp when the log line was finished and written. - ts: time &log; - ## Time interval that the log line covers. - ts_delta: interval &log; - ## The name of the "app", like "facebook" or "netflix". - app: string &log; - ## The number of unique local hosts using the app. - uniq_hosts: count &log; - ## The number of hits to the app in total. - hits: count &log; - ## The total number of bytes received by users of the app. - bytes: count &log; - }; - - ## The frequency of logging the stats collected by this script. - const break_interval = 15mins &redef; -} - -redef record connection += { - resp_hostname: string &optional; -}; - -event bro_init() &priority=3 - { - Log::create_stream(AppStats::LOG, [$columns=Info]); - - local r1: SumStats::Reducer = [$stream="apps.bytes", $apply=set(SumStats::SUM)]; - local r2: SumStats::Reducer = [$stream="apps.hits", $apply=set(SumStats::UNIQUE)]; - SumStats::create([$epoch=break_interval, - $reducers=set(r1, r2), - $epoch_finished(data: SumStats::ResultTable) = - { - local l: Info; - l$ts = network_time(); - l$ts_delta = break_interval; - for ( key in data ) - { - local result = data[key]; - l$app = key$str; - l$bytes = double_to_count(floor(result["apps.bytes"]$sum)); - l$hits = result["apps.hits"]$num; - l$uniq_hosts = result["apps.hits"]$unique; - Log::write(LOG, l); - } - }]); - } - -function add_sumstats(id: conn_id, hostname: string, size: count) - { - if ( /\.youtube\.com$/ in hostname && size > 512*1024 ) - { - SumStats::observe("apps.bytes", [$str="youtube"], [$num=size]); - SumStats::observe("apps.hits", [$str="youtube"], [$str=cat(id$orig_h)]); - } - else if ( /(\.facebook\.com|\.fbcdn\.net)$/ in hostname && size > 20 ) - { - SumStats::observe("apps.bytes", [$str="facebook"], [$num=size]); - SumStats::observe("apps.hits", [$str="facebook"], [$str=cat(id$orig_h)]); - } - else if ( /\.google\.com$/ in hostname && size > 20 ) - { - SumStats::observe("apps.bytes", [$str="google"], [$num=size]); - SumStats::observe("apps.hits", [$str="google"], [$str=cat(id$orig_h)]); - } - else if ( /\.nflximg\.com$/ in hostname && size > 200*1024 ) - { - SumStats::observe("apps.bytes", [$str="netflix"], [$num=size]); - SumStats::observe("apps.hits", [$str="netflix"], [$str=cat(id$orig_h)]); - } - else if ( /\.(pandora|p-cdn)\.com$/ in hostname && size > 512*1024 ) - { - SumStats::observe("apps.bytes", [$str="pandora"], [$num=size]); - SumStats::observe("apps.hits", [$str="pandora"], [$str=cat(id$orig_h)]); - } - else if ( /\.gmail\.com$/ in hostname && size > 20 ) - { - SumStats::observe("apps.bytes", [$str="gmail"], [$num=size]); - SumStats::observe("apps.hits", [$str="gmail"], [$str=cat(id$orig_h)]); - } -} - - -event ssl_established(c: connection) - { - if ( c?$ssl && c$ssl?$server_name ) - c$resp_hostname = c$ssl$server_name; - } - -event connection_finished(c: connection) - { - if ( c?$resp_hostname ) - add_sumstats(c$id, c$resp_hostname, c$resp$size); - } - -event HTTP::log_http(rec: HTTP::Info) - { - if( rec?$host ) - add_sumstats(rec$id, rec$host, rec$response_body_len); - } diff --git a/scripts/policy/misc/app-stats/__load__.bro b/scripts/policy/misc/app-stats/__load__.bro new file mode 100644 index 0000000000..c468d055ee --- /dev/null +++ b/scripts/policy/misc/app-stats/__load__.bro @@ -0,0 +1,2 @@ +@load ./main +@load ./plugins \ No newline at end of file diff --git a/scripts/policy/misc/app-stats/main.bro b/scripts/policy/misc/app-stats/main.bro new file mode 100644 index 0000000000..24c9ac2ade --- /dev/null +++ b/scripts/policy/misc/app-stats/main.bro @@ -0,0 +1,77 @@ +#! AppStats collects information about web applications in use +#! on the network. + +@load base/protocols/http +@load base/protocols/ssl +@load base/frameworks/sumstats + +module AppStats; + +export { + redef enum Log::ID += { LOG }; + + type Info: record { + ## Timestamp when the log line was finished and written. + ts: time &log; + ## Time interval that the log line covers. + ts_delta: interval &log; + ## The name of the "app", like "facebook" or "netflix". + app: string &log; + ## The number of unique local hosts using the app. + uniq_hosts: count &log; + ## The number of hits to the app in total. + hits: count &log; + ## The total number of bytes received by users of the app. + bytes: count &log; + }; + + ## The frequency of logging the stats collected by this script. + const break_interval = 15mins &redef; +} + +redef record connection += { + resp_hostname: string &optional; +}; + +global add_sumstats: hook(id: conn_id, hostname: string, size: count); + + +event bro_init() &priority=3 + { + Log::create_stream(AppStats::LOG, [$columns=Info]); + + local r1: SumStats::Reducer = [$stream="apps.bytes", $apply=set(SumStats::SUM)]; + local r2: SumStats::Reducer = [$stream="apps.hits", $apply=set(SumStats::UNIQUE)]; + SumStats::create([$name="app-metrics", + $epoch=break_interval, + $reducers=set(r1, r2), + $epoch_result(ts: time, key: SumStats::Key, result: SumStats::Result) = + { + local l: Info; + l$ts = network_time(); + l$ts_delta = break_interval; + l$app = key$str; + l$bytes = double_to_count(floor(result["apps.bytes"]$sum)); + l$hits = result["apps.hits"]$num; + l$uniq_hosts = result["apps.hits"]$unique; + Log::write(LOG, l); + }]); + } + +event ssl_established(c: connection) + { + if ( c?$ssl && c$ssl?$server_name ) + c$resp_hostname = c$ssl$server_name; + } + +event connection_finished(c: connection) + { + if ( c?$resp_hostname ) + hook add_sumstats(c$id, c$resp_hostname, c$resp$size); + } + +event HTTP::log_http(rec: HTTP::Info) + { + if( rec?$host ) + hook add_sumstats(rec$id, rec$host, rec$response_body_len); + } diff --git a/scripts/policy/misc/app-stats/plugins/__load__.bro b/scripts/policy/misc/app-stats/plugins/__load__.bro new file mode 100644 index 0000000000..7a3ea2da81 --- /dev/null +++ b/scripts/policy/misc/app-stats/plugins/__load__.bro @@ -0,0 +1,6 @@ +@load ./facebook +@load ./gmail +@load ./google +@load ./netflix +@load ./pandora +@load ./youtube \ No newline at end of file diff --git a/scripts/policy/misc/app-stats/plugins/facebook.bro b/scripts/policy/misc/app-stats/plugins/facebook.bro new file mode 100644 index 0000000000..edcb02b72a --- /dev/null +++ b/scripts/policy/misc/app-stats/plugins/facebook.bro @@ -0,0 +1,12 @@ +@load ../main + +module AppStats; + +hook add_sumstats(id: conn_id, hostname: string, size: count) + { + if ( /\.(facebook\.com|fbcdn\.net)$/ in hostname && size > 20 ) + { + SumStats::observe("apps.bytes", [$str="facebook"], [$num=size]); + SumStats::observe("apps.hits", [$str="facebook"], [$str=cat(id$orig_h)]); + } + } \ No newline at end of file diff --git a/scripts/policy/misc/app-stats/plugins/gmail.bro b/scripts/policy/misc/app-stats/plugins/gmail.bro new file mode 100644 index 0000000000..1642fb7651 --- /dev/null +++ b/scripts/policy/misc/app-stats/plugins/gmail.bro @@ -0,0 +1,12 @@ +@load ../main + +module AppStats; + +hook add_sumstats(id: conn_id, hostname: string, size: count) + { + if ( /\.gmail\.com$/ in hostname && size > 20 ) + { + SumStats::observe("apps.bytes", [$str="gmail"], [$num=size]); + SumStats::observe("apps.hits", [$str="gmail"], [$str=cat(id$orig_h)]); + } + } \ No newline at end of file diff --git a/scripts/policy/misc/app-stats/plugins/google.bro b/scripts/policy/misc/app-stats/plugins/google.bro new file mode 100644 index 0000000000..e1da3a9068 --- /dev/null +++ b/scripts/policy/misc/app-stats/plugins/google.bro @@ -0,0 +1,12 @@ +@load ../main + +module AppStats; + +hook add_sumstats(id: conn_id, hostname: string, size: count) + { + if ( /\.google\.com$/ in hostname && size > 20 ) + { + SumStats::observe("apps.bytes", [$str="google"], [$num=size]); + SumStats::observe("apps.hits", [$str="google"], [$str=cat(id$orig_h)]); + } + } \ No newline at end of file diff --git a/scripts/policy/misc/app-stats/plugins/netflix.bro b/scripts/policy/misc/app-stats/plugins/netflix.bro new file mode 100644 index 0000000000..5d429f0caf --- /dev/null +++ b/scripts/policy/misc/app-stats/plugins/netflix.bro @@ -0,0 +1,12 @@ +@load ../main + +module AppStats; + +hook add_sumstats(id: conn_id, hostname: string, size: count) + { + if ( /\.nflximg\.com$/ in hostname && size > 200*1024 ) + { + SumStats::observe("apps.bytes", [$str="netflix"], [$num=size]); + SumStats::observe("apps.hits", [$str="netflix"], [$str=cat(id$orig_h)]); + } + } \ No newline at end of file diff --git a/scripts/policy/misc/app-stats/plugins/pandora.bro b/scripts/policy/misc/app-stats/plugins/pandora.bro new file mode 100644 index 0000000000..6cfbfab72d --- /dev/null +++ b/scripts/policy/misc/app-stats/plugins/pandora.bro @@ -0,0 +1,12 @@ +@load ../main + +module AppStats; + +hook add_sumstats(id: conn_id, hostname: string, size: count) + { + if ( /\.(pandora|p-cdn)\.com$/ in hostname && size > 512*1024 ) + { + SumStats::observe("apps.bytes", [$str="pandora"], [$num=size]); + SumStats::observe("apps.hits", [$str="pandora"], [$str=cat(id$orig_h)]); + } + } \ No newline at end of file diff --git a/scripts/policy/misc/app-stats/plugins/youtube.bro b/scripts/policy/misc/app-stats/plugins/youtube.bro new file mode 100644 index 0000000000..af872cfdac --- /dev/null +++ b/scripts/policy/misc/app-stats/plugins/youtube.bro @@ -0,0 +1,12 @@ +@load ../main + +module AppStats; + +hook add_sumstats(id: conn_id, hostname: string, size: count) + { + if ( /\.youtube\.com$/ in hostname && size > 512*1024 ) + { + SumStats::observe("apps.bytes", [$str="youtube"], [$num=size]); + SumStats::observe("apps.hits", [$str="youtube"], [$str=cat(id$orig_h)]); + } + } \ No newline at end of file diff --git a/scripts/policy/misc/detect-traceroute/main.bro b/scripts/policy/misc/detect-traceroute/main.bro index 3ed315746f..6b472f2948 100644 --- a/scripts/policy/misc/detect-traceroute/main.bro +++ b/scripts/policy/misc/detect-traceroute/main.bro @@ -29,7 +29,7 @@ export { ## Defines the threshold for ICMP Time Exceeded messages for a src-dst pair. ## This threshold only comes into play after a host is found to be ## sending low ttl packets. - const icmp_time_exceeded_threshold = 3 &redef; + const icmp_time_exceeded_threshold: double = 3 &redef; ## Interval at which to watch for the ## :bro:id:`Traceroute::icmp_time_exceeded_threshold` variable to be @@ -57,16 +57,17 @@ event bro_init() &priority=5 local r1: SumStats::Reducer = [$stream="traceroute.time_exceeded", $apply=set(SumStats::UNIQUE)]; local r2: SumStats::Reducer = [$stream="traceroute.low_ttl_packet", $apply=set(SumStats::SUM)]; - SumStats::create([$epoch=icmp_time_exceeded_interval, + SumStats::create([$name="traceroute-detection", + $epoch=icmp_time_exceeded_interval, $reducers=set(r1, r2), $threshold_val(key: SumStats::Key, result: SumStats::Result) = { # Give a threshold value of zero depending on if the host # sends a low ttl packet. if ( require_low_ttl_packets && result["traceroute.low_ttl_packet"]$sum == 0 ) - return 0; + return 0.0; else - return result["traceroute.time_exceeded"]$unique; + return result["traceroute.time_exceeded"]$unique+0; }, $threshold=icmp_time_exceeded_threshold, $threshold_crossed(key: SumStats::Key, result: SumStats::Result) = diff --git a/scripts/policy/misc/known-devices.bro b/scripts/policy/misc/known-devices.bro new file mode 100644 index 0000000000..f4776a990b --- /dev/null +++ b/scripts/policy/misc/known-devices.bro @@ -0,0 +1,41 @@ +##! This script provides infrastructure for logging devices for which Bro has been +##! able to determine the MAC address, and it logs them once per day (by default). +##! The log that is output provides an easy way to determine a count of the devices +##! in use on a network per day. +##! +##! ..note:: +##! +##! This script will not generate any logs on its own, it needs to be +##! supplied with information from elsewhere, such as +##! :doc:`policy/protocols/dhcp/known-devices-and-hostnames/scripts/. + +module Known; + +export { + ## The known-hosts logging stream identifier. + redef enum Log::ID += { DEVICES_LOG }; + + ## The record type which contains the column fields of the known-devices log. + type DevicesInfo: record { + ## The timestamp at which the host was detected. + ts: time &log; + ## The MAC address that was detected. + mac: string &log; + }; + + ## The set of all known MAC addresses. It can accessed from other + ## to add, and check for, addresses seen in use. + ## + ## We maintain each entry for 24 hours by default so that the existence of + ## individual addressed is logged each day. + global known_devices: set[string] &create_expire=1day &synchronized &redef; + + ## An event that can be handled to access the :bro:type:`Known::DevicesInfo` + ## record as it is sent on to the logging framework. + global log_known_devices: event(rec: DevicesInfo); +} + +event bro_init() + { + Log::create_stream(Known::DEVICES_LOG, [$columns=DevicesInfo, $ev=log_known_devices]); + } diff --git a/scripts/policy/misc/load-balancing.bro b/scripts/policy/misc/load-balancing.bro index fe07dd64da..889d18119a 100644 --- a/scripts/policy/misc/load-balancing.bro +++ b/scripts/policy/misc/load-balancing.bro @@ -12,12 +12,12 @@ export { ## Apply BPF filters to each worker in a way that causes them to ## automatically flow balance traffic between them. AUTO_BPF, - ## Load balance traffic across the workers by making each one apply - ## a restrict filter to only listen to a single MAC address. This - ## is a somewhat common deployment option for sites doing network - ## based load balancing with MAC address rewriting and passing the - ## traffic to a single interface. Multiple MAC addresses will show - ## up on the same interface and need filtered to a single address. + # Load balance traffic across the workers by making each one apply + # a restrict filter to only listen to a single MAC address. This + # is a somewhat common deployment option for sites doing network + # based load balancing with MAC address rewriting and passing the + # traffic to a single interface. Multiple MAC addresses will show + # up on the same interface and need filtered to a single address. #MAC_ADDR_BPF, }; diff --git a/scripts/policy/misc/scan.bro b/scripts/policy/misc/scan.bro index 31caf527b7..909ccac02b 100644 --- a/scripts/policy/misc/scan.bro +++ b/scripts/policy/misc/scan.bro @@ -40,15 +40,11 @@ export { ## The threshold of a unique number of hosts a scanning host has to have failed ## connections with on a single port. - const addr_scan_threshold = 25 &redef; + const addr_scan_threshold = 25.0 &redef; ## The threshold of a number of unique ports a scanning host has to have failed ## connections with on a single victim host. - const port_scan_threshold = 15 &redef; - - ## Custom thresholds based on service for address scan. This is primarily - ## useful for setting reduced thresholds for specific ports. - const addr_scan_custom_thresholds: table[port] of count &redef; + const port_scan_threshold = 15.0 &redef; global Scan::addr_scan_policy: hook(scanner: addr, victim: addr, scanned_port: port); global Scan::port_scan_policy: hook(scanner: addr, victim: addr, scanned_port: port); @@ -57,11 +53,12 @@ export { event bro_init() &priority=5 { local r1: SumStats::Reducer = [$stream="scan.addr.fail", $apply=set(SumStats::UNIQUE)]; - SumStats::create([$epoch=addr_scan_interval, + SumStats::create([$name="addr-scan", + $epoch=addr_scan_interval, $reducers=set(r1), $threshold_val(key: SumStats::Key, result: SumStats::Result) = { - return double_to_count(result["scan.addr.fail"]$unique); + return result["scan.addr.fail"]$unique+0.0; }, #$threshold_func=check_addr_scan_threshold, $threshold=addr_scan_threshold, @@ -81,11 +78,12 @@ event bro_init() &priority=5 # Note: port scans are tracked similar to: table[src_ip, dst_ip] of set(port); local r2: SumStats::Reducer = [$stream="scan.port.fail", $apply=set(SumStats::UNIQUE)]; - SumStats::create([$epoch=port_scan_interval, + SumStats::create([$name="port-scan", + $epoch=port_scan_interval, $reducers=set(r2), $threshold_val(key: SumStats::Key, result: SumStats::Result) = { - return double_to_count(result["scan.port.fail"]$unique); + return result["scan.port.fail"]$unique+0.0; }, $threshold=port_scan_threshold, $threshold_crossed(key: SumStats::Key, result: SumStats::Result) = diff --git a/scripts/policy/protocols/dhcp/known-devices-and-hostnames.bro b/scripts/policy/protocols/dhcp/known-devices-and-hostnames.bro new file mode 100644 index 0000000000..519429981c --- /dev/null +++ b/scripts/policy/protocols/dhcp/known-devices-and-hostnames.bro @@ -0,0 +1,37 @@ +##! Tracks MAC address with hostnames seen in DHCP traffic. They are logged into +##! ``devices.log``. + +@load policy/misc/known-devices + +module Known; + +export { + redef record DevicesInfo += { + ## The value of the DHCP host name option, if seen + dhcp_host_name: string &log &optional; + }; +} + +event dhcp_request(c: connection, msg: dhcp_msg, req_addr: addr, serv_addr: addr, host_name: string) + { + if ( msg$h_addr == "" ) + return; + + if ( msg$h_addr !in known_devices ) + { + add known_devices[msg$h_addr]; + Log::write(Known::DEVICES_LOG, [$ts=network_time(), $mac=msg$h_addr, $dhcp_host_name=host_name]); + } + } + +event dhcp_inform(c: connection, msg: dhcp_msg, host_name: string) + { + if ( msg$h_addr == "" ) + return; + + if ( msg$h_addr !in known_devices ) + { + add known_devices[msg$h_addr]; + Log::write(Known::DEVICES_LOG, [$ts=network_time(), $mac=msg$h_addr, $dhcp_host_name=host_name]); + } + } diff --git a/scripts/policy/protocols/ftp/detect-bruteforcing.bro b/scripts/policy/protocols/ftp/detect-bruteforcing.bro index 21c9c403c7..36dfafb53a 100644 --- a/scripts/policy/protocols/ftp/detect-bruteforcing.bro +++ b/scripts/policy/protocols/ftp/detect-bruteforcing.bro @@ -17,7 +17,7 @@ export { ## How many rejected usernames or passwords are required before being ## considered to be bruteforcing. - const bruteforce_threshold = 20 &redef; + const bruteforce_threshold: double = 20 &redef; ## The time period in which the threshold needs to be crossed before ## being reset. @@ -28,11 +28,12 @@ export { event bro_init() { local r1: SumStats::Reducer = [$stream="ftp.failed_auth", $apply=set(SumStats::UNIQUE)]; - SumStats::create([$epoch=bruteforce_measurement_interval, + SumStats::create([$name="ftp-detect-bruteforcing", + $epoch=bruteforce_measurement_interval, $reducers=set(r1), $threshold_val(key: SumStats::Key, result: SumStats::Result) = { - return result["ftp.failed_auth"]$num; + return result["ftp.failed_auth"]$num+0.0; }, $threshold=bruteforce_threshold, $threshold_crossed(key: SumStats::Key, result: SumStats::Result) = diff --git a/scripts/policy/protocols/http/detect-sqli.bro b/scripts/policy/protocols/http/detect-sqli.bro index 8671bbd165..79d8d6f2f9 100644 --- a/scripts/policy/protocols/http/detect-sqli.bro +++ b/scripts/policy/protocols/http/detect-sqli.bro @@ -28,7 +28,7 @@ export { ## Defines the threshold that determines if an SQL injection attack ## is ongoing based on the number of requests that appear to be SQL ## injection attacks. - const sqli_requests_threshold = 50 &redef; + const sqli_requests_threshold: double = 50.0 &redef; ## Interval at which to watch for the ## :bro:id:`HTTP::sqli_requests_threshold` variable to be crossed. @@ -64,11 +64,12 @@ event bro_init() &priority=3 # determine when it looks like an actual attack and how to respond when # thresholds are crossed. local r1: SumStats::Reducer = [$stream="http.sqli.attacker", $apply=set(SumStats::SUM, SumStats::SAMPLE), $num_samples=collect_SQLi_samples]; - SumStats::create([$epoch=sqli_requests_interval, + SumStats::create([$name="detect-sqli-attackers", + $epoch=sqli_requests_interval, $reducers=set(r1), $threshold_val(key: SumStats::Key, result: SumStats::Result) = { - return double_to_count(result["http.sqli.attacker"]$sum); + return result["http.sqli.attacker"]$sum; }, $threshold=sqli_requests_threshold, $threshold_crossed(key: SumStats::Key, result: SumStats::Result) = @@ -82,11 +83,12 @@ event bro_init() &priority=3 }]); local r2: SumStats::Reducer = [$stream="http.sqli.victim", $apply=set(SumStats::SUM, SumStats::SAMPLE), $num_samples=collect_SQLi_samples]; - SumStats::create([$epoch=sqli_requests_interval, + SumStats::create([$name="detect-sqli-victims", + $epoch=sqli_requests_interval, $reducers=set(r2), $threshold_val(key: SumStats::Key, result: SumStats::Result) = { - return double_to_count(result["http.sqli.victim"]$sum); + return result["http.sqli.victim"]$sum; }, $threshold=sqli_requests_threshold, $threshold_crossed(key: SumStats::Key, result: SumStats::Result) = diff --git a/scripts/policy/protocols/ssh/detect-bruteforcing.bro b/scripts/policy/protocols/ssh/detect-bruteforcing.bro index ada418e61f..7988ecb0ad 100644 --- a/scripts/policy/protocols/ssh/detect-bruteforcing.bro +++ b/scripts/policy/protocols/ssh/detect-bruteforcing.bro @@ -27,7 +27,7 @@ export { ## The number of failed SSH connections before a host is designated as ## guessing passwords. - const password_guesses_limit = 30 &redef; + const password_guesses_limit: double = 30 &redef; ## The amount of time to remember presumed non-successful logins to build ## model of a password guesser. @@ -42,20 +42,29 @@ export { event bro_init() { - local r1: SumStats::Reducer = [$stream="ssh.login.failure", $apply=set(SumStats::SUM)]; - SumStats::create([$epoch=guessing_timeout, + local r1: SumStats::Reducer = [$stream="ssh.login.failure", $apply=set(SumStats::SUM, SumStats::SAMPLE), $num_samples=5]; + SumStats::create([$name="detect-ssh-bruteforcing", + $epoch=guessing_timeout, $reducers=set(r1), $threshold_val(key: SumStats::Key, result: SumStats::Result) = { - return double_to_count(result["ssh.login.failure"]$sum); + return result["ssh.login.failure"]$sum; }, $threshold=password_guesses_limit, $threshold_crossed(key: SumStats::Key, result: SumStats::Result) = { local r = result["ssh.login.failure"]; + local sub_msg = fmt("Sampled servers: "); + local samples = r$samples; + for ( i in samples ) + { + if ( samples[i]?$str ) + sub_msg = fmt("%s%s %s", sub_msg, i==0 ? "":",", samples[i]$str); + } # Generate the notice. NOTICE([$note=Password_Guessing, $msg=fmt("%s appears to be guessing SSH passwords (seen in %d connections).", key$host, r$num), + $sub=sub_msg, $src=key$host, $identifier=cat(key$host)]); }]); @@ -78,5 +87,5 @@ event SSH::heuristic_failed_login(c: connection) # be ignored. if ( ! (id$orig_h in ignore_guessers && id$resp_h in ignore_guessers[id$orig_h]) ) - SumStats::observe("ssh.login.failure", [$host=id$orig_h], [$num=1]); + SumStats::observe("ssh.login.failure", [$host=id$orig_h], [$str=cat(id$resp_h)]); } diff --git a/scripts/policy/tuning/defaults/packet-fragments.bro b/scripts/policy/tuning/defaults/packet-fragments.bro index 24b18d5917..f95c826547 100644 --- a/scripts/policy/tuning/defaults/packet-fragments.bro +++ b/scripts/policy/tuning/defaults/packet-fragments.bro @@ -1,10 +1,10 @@ -## Capture TCP fragments, but not UDP (or ICMP), since those are a lot more -## common due to high-volume, fragmenting protocols such as NFS :-(. +# Capture TCP fragments, but not UDP (or ICMP), since those are a lot more +# common due to high-volume, fragmenting protocols such as NFS :-(. -## This normally isn't used because of the default open packet filter -## but we set it anyway in case the user is using a packet filter. -## Note: This was removed because the default model now is to have a wide -## open packet filter. +# This normally isn't used because of the default open packet filter +# but we set it anyway in case the user is using a packet filter. +# Note: This was removed because the default model now is to have a wide +# open packet filter. #redef capture_filters += { ["frag"] = "(ip[6:2] & 0x3fff != 0) and tcp" }; ## Shorten the fragment timeout from never expiring to expiring fragments after diff --git a/scripts/site/local.bro b/scripts/site/local.bro index e4b3a44e7a..5b4af4d87e 100644 --- a/scripts/site/local.bro +++ b/scripts/site/local.bro @@ -11,6 +11,13 @@ # Load the scan detection script. @load misc/scan +# Log some information about web applications being used by users +# on your network. +@load misc/app-stats + +# Detect traceroute being run on the network. +@load misc/detect-traceroute + # Generate notices when vulnerable versions of software are discovered. # The default is to only monitor software found in the address space defined # as "local". Refer to the software framework's documentation for more diff --git a/scripts/test-all-policy.bro b/scripts/test-all-policy.bro index dcf50b538e..7d582bf82f 100644 --- a/scripts/test-all-policy.bro +++ b/scripts/test-all-policy.bro @@ -35,10 +35,19 @@ @load integration/barnyard2/types.bro @load integration/collective-intel/__load__.bro @load integration/collective-intel/main.bro -@load misc/app-metrics.bro +@load misc/app-stats/__load__.bro +@load misc/app-stats/main.bro +@load misc/app-stats/plugins/__load__.bro +@load misc/app-stats/plugins/facebook.bro +@load misc/app-stats/plugins/gmail.bro +@load misc/app-stats/plugins/google.bro +@load misc/app-stats/plugins/netflix.bro +@load misc/app-stats/plugins/pandora.bro +@load misc/app-stats/plugins/youtube.bro @load misc/capture-loss.bro @load misc/detect-traceroute/__load__.bro @load misc/detect-traceroute/main.bro +@load misc/known-devices.bro @load misc/load-balancing.bro @load misc/loaded-scripts.bro @load misc/profiling.bro @@ -48,6 +57,7 @@ @load protocols/conn/known-hosts.bro @load protocols/conn/known-services.bro @load protocols/conn/weirds.bro +@load protocols/dhcp/known-devices-and-hostnames.bro @load protocols/dns/auth-addl.bro @load protocols/dns/detect-external-names.bro @load protocols/ftp/detect-bruteforcing.bro diff --git a/src/BroDoc.cc b/src/BroDoc.cc index c04cd92eca..55dc8ce558 100644 --- a/src/BroDoc.cc +++ b/src/BroDoc.cc @@ -11,6 +11,7 @@ #include "plugin/Manager.h" #include "analyzer/Manager.h" #include "analyzer/Component.h" +#include "file_analysis/Manager.h" BroDoc::BroDoc(const std::string& rel, const std::string& abs) { @@ -479,6 +480,17 @@ static void WriteAnalyzerComponent(FILE* f, const analyzer::Component* c) fprintf(f, ":bro:enum:`Analyzer::%s`\n\n", tag.c_str()); } +static void WriteAnalyzerComponent(FILE* f, const file_analysis::Component* c) + { + EnumType* atag = file_mgr->GetTagEnumType(); + string tag = fmt("ANALYZER_%s", c->CanonicalName()); + + if ( atag->Lookup("Files", tag.c_str()) < 0 ) + reporter->InternalError("missing analyzer tag for %s", tag.c_str()); + + fprintf(f, ":bro:enum:`Files::%s`\n\n", tag.c_str()); + } + static void WritePluginComponents(FILE* f, const plugin::Plugin* p) { plugin::Plugin::component_list components = p->Components(); @@ -494,6 +506,10 @@ static void WritePluginComponents(FILE* f, const plugin::Plugin* p) WriteAnalyzerComponent(f, dynamic_cast(*it)); break; + case plugin::component::FILE_ANALYZER: + WriteAnalyzerComponent(f, + dynamic_cast(*it)); + break; case plugin::component::READER: reporter->InternalError("docs for READER component unimplemented"); case plugin::component::WRITER: @@ -537,30 +553,35 @@ static void WritePluginBifItems(FILE* f, const plugin::Plugin* p, } } -static void WriteAnalyzerTagDefn(FILE* f, EnumType* e) +static void WriteAnalyzerTagDefn(FILE* f, EnumType* e, const string& module) { + string tag_id= module + "::Tag"; e = new CommentedEnumType(e); - e->SetTypeID(copy_string("Analyzer::Tag")); + e->SetTypeID(copy_string(tag_id.c_str())); - ID* dummy_id = new ID(copy_string("Analyzer::Tag"), SCOPE_GLOBAL, true); + ID* dummy_id = new ID(copy_string(tag_id.c_str()), SCOPE_GLOBAL, true); dummy_id->SetType(e); dummy_id->MakeType(); list* r = new list(); - r->push_back("Unique identifiers for protocol analyzers."); + r->push_back("Unique identifiers for analyzers."); BroDocObj bdo(dummy_id, r, true); bdo.WriteReST(f); } -static bool IsAnalyzerPlugin(const plugin::Plugin* p) +static bool ComponentsMatch(const plugin::Plugin* p, plugin::component::Type t, + bool match_empty = false) { plugin::Plugin::component_list components = p->Components(); plugin::Plugin::component_list::const_iterator it; + if ( components.empty() ) + return match_empty; + for ( it = components.begin(); it != components.end(); ++it ) - if ( (*it)->Type() != plugin::component::ANALYZER ) + if ( (*it)->Type() != t ) return false; return true; @@ -573,14 +594,44 @@ void CreateProtoAnalyzerDoc(const char* filename) fprintf(f, "Protocol Analyzer Reference\n"); fprintf(f, "===========================\n\n"); - WriteAnalyzerTagDefn(f, analyzer_mgr->GetTagEnumType()); + WriteAnalyzerTagDefn(f, analyzer_mgr->GetTagEnumType(), "Analyzer"); plugin::Manager::plugin_list plugins = plugin_mgr->Plugins(); plugin::Manager::plugin_list::const_iterator it; for ( it = plugins.begin(); it != plugins.end(); ++it ) { - if ( ! IsAnalyzerPlugin(*it) ) + if ( ! ComponentsMatch(*it, plugin::component::ANALYZER, true) ) + continue; + + WritePluginSectionHeading(f, *it); + WritePluginComponents(f, *it); + WritePluginBifItems(f, *it, plugin::BifItem::CONSTANT, + "Options/Constants"); + WritePluginBifItems(f, *it, plugin::BifItem::GLOBAL, "Globals"); + WritePluginBifItems(f, *it, plugin::BifItem::TYPE, "Types"); + WritePluginBifItems(f, *it, plugin::BifItem::EVENT, "Events"); + WritePluginBifItems(f, *it, plugin::BifItem::FUNCTION, "Functions"); + } + + fclose(f); + } + +void CreateFileAnalyzerDoc(const char* filename) + { + FILE* f = fopen(filename, "w"); + + fprintf(f, "File Analyzer Reference\n"); + fprintf(f, "=======================\n\n"); + + WriteAnalyzerTagDefn(f, file_mgr->GetTagEnumType(), "Files"); + + plugin::Manager::plugin_list plugins = plugin_mgr->Plugins(); + plugin::Manager::plugin_list::const_iterator it; + + for ( it = plugins.begin(); it != plugins.end(); ++it ) + { + if ( ! ComponentsMatch(*it, plugin::component::FILE_ANALYZER) ) continue; WritePluginSectionHeading(f, *it); diff --git a/src/BroDoc.h b/src/BroDoc.h index 9f92f821f8..081df698d9 100644 --- a/src/BroDoc.h +++ b/src/BroDoc.h @@ -413,4 +413,10 @@ private: */ void CreateProtoAnalyzerDoc(const char* filename); +/** + * Writes out plugin index documentation for all file analyzer plugins. + * @param filename the name of the file to write. + */ +void CreateFileAnalyzerDoc(const char* filename); + #endif diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 4a65ddd4d3..e64dcbb9f6 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -319,6 +319,7 @@ set(bro_SRCS StateAccess.cc Stats.cc Stmt.cc + Tag.cc Timer.cc Traverse.cc Trigger.cc @@ -362,6 +363,8 @@ set(bro_SRCS 3rdparty/sqlite3.c plugin/Component.cc + plugin/ComponentManager.h + plugin/TaggedComponent.h plugin/Manager.cc plugin/Plugin.cc plugin/Macros.h diff --git a/src/DebugLogger.cc b/src/DebugLogger.cc index 380f21aa5f..dc557c4a0a 100644 --- a/src/DebugLogger.cc +++ b/src/DebugLogger.cc @@ -16,7 +16,8 @@ DebugLogger::Stream DebugLogger::streams[NUM_DBGS] = { { "notifiers", 0, false }, { "main-loop", 0, false }, { "dpd", 0, false }, { "tm", 0, false }, { "logging", 0, false }, {"input", 0, false }, - { "threading", 0, false }, { "file_analysis", 0, false } + { "threading", 0, false }, { "file_analysis", 0, false }, + { "plugins", 0, false} }; DebugLogger::DebugLogger(const char* filename) diff --git a/src/DebugLogger.h b/src/DebugLogger.h index e293b326a8..c5744642f5 100644 --- a/src/DebugLogger.h +++ b/src/DebugLogger.h @@ -27,6 +27,7 @@ enum DebugStream { DBG_INPUT, // Input streams DBG_THREADING, // Threading system DBG_FILE_ANALYSIS, // File analysis + DBG_PLUGINS, NUM_DBGS // Has to be last }; diff --git a/src/NetVar.cc b/src/NetVar.cc index c4c08882eb..28483b183b 100644 --- a/src/NetVar.cc +++ b/src/NetVar.cc @@ -238,11 +238,14 @@ TableType* record_field_table; StringVal* cmd_line_bpf_filter; +StringVal* global_hash_seed; + OpaqueType* md5_type; OpaqueType* sha1_type; OpaqueType* sha256_type; OpaqueType* entropy_type; OpaqueType* cardinality_type; +OpaqueType* topk_type; OpaqueType* bloomfilter_type; #include "const.bif.netvar_def" @@ -305,11 +308,14 @@ void init_general_global_var() cmd_line_bpf_filter = internal_val("cmd_line_bpf_filter")->AsStringVal(); + global_hash_seed = opt_internal_string("global_hash_seed"); + md5_type = new OpaqueType("md5"); sha1_type = new OpaqueType("sha1"); sha256_type = new OpaqueType("sha256"); entropy_type = new OpaqueType("entropy"); cardinality_type = new OpaqueType("cardinality"); + topk_type = new OpaqueType("topk"); bloomfilter_type = new OpaqueType("bloomfilter"); } diff --git a/src/NetVar.h b/src/NetVar.h index eaffb7e280..c404044b2b 100644 --- a/src/NetVar.h +++ b/src/NetVar.h @@ -242,12 +242,15 @@ extern TableType* record_field_table; extern StringVal* cmd_line_bpf_filter; +extern StringVal* global_hash_seed; + class OpaqueType; extern OpaqueType* md5_type; extern OpaqueType* sha1_type; extern OpaqueType* sha256_type; extern OpaqueType* entropy_type; extern OpaqueType* cardinality_type; +extern OpaqueType* topk_type; extern OpaqueType* bloomfilter_type; // Initializes globals that don't pertain to network/event analysis. diff --git a/src/OpaqueVal.cc b/src/OpaqueVal.cc index 211426a434..5c4d819e47 100644 --- a/src/OpaqueVal.cc +++ b/src/OpaqueVal.cc @@ -569,14 +569,14 @@ BroType* BloomFilterVal::Type() const void BloomFilterVal::Add(const Val* val) { HashKey* key = hash->ComputeHash(val, 1); - bloom_filter->Add(key->Hash()); + bloom_filter->Add(key); delete key; } size_t BloomFilterVal::Count(const Val* val) const { HashKey* key = hash->ComputeHash(val, 1); - size_t cnt = bloom_filter->Count(key->Hash()); + size_t cnt = bloom_filter->Count(key); delete key; return cnt; } @@ -591,10 +591,17 @@ bool BloomFilterVal::Empty() const return bloom_filter->Empty(); } +string BloomFilterVal::InternalState() const + { + return bloom_filter->InternalState(); + } + BloomFilterVal* BloomFilterVal::Merge(const BloomFilterVal* x, const BloomFilterVal* y) { - if ( ! same_type(x->Type(), y->Type()) ) + if ( x->Type() && // any one 0 is ok here + y->Type() && + ! same_type(x->Type(), y->Type()) ) { reporter->Error("cannot merge Bloom filters with different types"); return 0; @@ -616,7 +623,7 @@ BloomFilterVal* BloomFilterVal::Merge(const BloomFilterVal* x, BloomFilterVal* merged = new BloomFilterVal(copy); - if ( ! merged->Typify(x->Type()) ) + if ( x->Type() && ! merged->Typify(x->Type()) ) { reporter->Error("failed to set type on merged Bloom filter"); return 0; diff --git a/src/OpaqueVal.h b/src/OpaqueVal.h index a7c51657c4..5c19eb6067 100644 --- a/src/OpaqueVal.h +++ b/src/OpaqueVal.h @@ -127,6 +127,7 @@ public: size_t Count(const Val* val) const; void Clear(); bool Empty() const; + string InternalState() const; static BloomFilterVal* Merge(const BloomFilterVal* x, const BloomFilterVal* y); diff --git a/src/RuleAction.cc b/src/RuleAction.cc index a13392ee40..ec57c96bd2 100644 --- a/src/RuleAction.cc +++ b/src/RuleAction.cc @@ -40,7 +40,7 @@ RuleActionAnalyzer::RuleActionAnalyzer(const char* arg_analyzer) string str(arg_analyzer); string::size_type pos = str.find(':'); string arg = str.substr(0, pos); - analyzer = analyzer_mgr->GetAnalyzerTag(arg.c_str()); + analyzer = analyzer_mgr->GetComponentTag(arg.c_str()); if ( ! analyzer ) reporter->Warning("unknown analyzer '%s' specified in rule", arg.c_str()); @@ -48,7 +48,7 @@ RuleActionAnalyzer::RuleActionAnalyzer(const char* arg_analyzer) if ( pos != string::npos ) { arg = str.substr(pos + 1); - child_analyzer = analyzer_mgr->GetAnalyzerTag(arg.c_str()); + child_analyzer = analyzer_mgr->GetComponentTag(arg.c_str()); if ( ! child_analyzer ) reporter->Warning("unknown analyzer '%s' specified in rule", arg.c_str()); @@ -60,11 +60,11 @@ RuleActionAnalyzer::RuleActionAnalyzer(const char* arg_analyzer) void RuleActionAnalyzer::PrintDebug() { if ( ! child_analyzer ) - fprintf(stderr, "|%s|\n", analyzer_mgr->GetAnalyzerName(analyzer)); + fprintf(stderr, "|%s|\n", analyzer_mgr->GetComponentName(analyzer)); else fprintf(stderr, "|%s:%s|\n", - analyzer_mgr->GetAnalyzerName(analyzer), - analyzer_mgr->GetAnalyzerName(child_analyzer)); + analyzer_mgr->GetComponentName(analyzer), + analyzer_mgr->GetComponentName(child_analyzer)); } diff --git a/src/SerialTypes.h b/src/SerialTypes.h index b008763bbf..69927afb74 100644 --- a/src/SerialTypes.h +++ b/src/SerialTypes.h @@ -108,8 +108,9 @@ SERIAL_VAL(MD5_VAL, 16) SERIAL_VAL(SHA1_VAL, 17) SERIAL_VAL(SHA256_VAL, 18) SERIAL_VAL(ENTROPY_VAL, 19) -SERIAL_VAL(BLOOMFILTER_VAL, 20) -SERIAL_VAL(CARDINALITY_VAL, 21) +SERIAL_VAL(TOPK_VAL, 20) +SERIAL_VAL(BLOOMFILTER_VAL, 21) +SERIAL_VAL(CARDINALITY_VAL, 22) #define SERIAL_EXPR(name, val) SERIAL_CONST(name, val, EXPR) SERIAL_EXPR(EXPR, 1) diff --git a/src/Tag.cc b/src/Tag.cc new file mode 100644 index 0000000000..178edaa71e --- /dev/null +++ b/src/Tag.cc @@ -0,0 +1,82 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +#include "Tag.h" +#include "Val.h" + +Tag::Tag(EnumType* etype, type_t arg_type, subtype_t arg_subtype) + { + assert(arg_type > 0); + + type = arg_type; + subtype = arg_subtype; + int64_t i = (int64)(type) | ((int64)subtype << 31); + Ref(etype); + val = new EnumVal(i, etype); + } + +Tag::Tag(EnumVal* arg_val) + { + assert(arg_val); + + val = arg_val; + Ref(val); + + int64 i = val->InternalInt(); + type = i & 0xffffffff; + subtype = (i >> 31) & 0xffffffff; + } + +Tag::Tag(const Tag& other) + { + type = other.type; + subtype = other.subtype; + val = other.val; + + if ( val ) + Ref(val); + } + +Tag::Tag() + { + type = 0; + subtype = 0; + val = 0; + } + +Tag::~Tag() + { + Unref(val); + val = 0; + } + +Tag& Tag::operator=(const Tag& other) + { + if ( this != &other ) + { + type = other.type; + subtype = other.subtype; + val = other.val; + + if ( val ) + Ref(val); + } + + return *this; + } + +EnumVal* Tag::AsEnumVal(EnumType* etype) const + { + if ( ! val ) + { + assert(type == 0 && subtype == 0); + Ref(etype); + val = new EnumVal(0, etype); + } + + return val; + } + +std::string Tag::AsString() const + { + return fmt("%" PRIu32 "/%" PRIu32, type, subtype); + } diff --git a/src/Tag.h b/src/Tag.h new file mode 100644 index 0000000000..2c76f253a5 --- /dev/null +++ b/src/Tag.h @@ -0,0 +1,138 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +#ifndef TAG_H +#define TAG_H + +#include "config.h" +#include "util.h" +#include "Type.h" + +class EnumVal; + +/** + * Class to identify an analyzer type. + * + * Each analyzer type gets a tag consisting of a main type and subtype. The + * former is an identifier that's unique across all analyzer classes. The latter is + * passed through to the analyzer instances for their use, yet not further + * interpreted by the analyzer infrastructure; it allows an analyzer to + * branch out into a set of sub-analyzers internally. Jointly, main type and + * subtype form an analyzer "tag". Each unique tag corresponds to a single + * "analyzer" from the user's perspective. At the script layer, these tags + * are mapped into enums of type \c Analyzer::Tag or Files::Tag. Internally, + * the analyzer::Manager and file_analysis::Manager maintain the mapping of tag + * to analyzer (and it also assigns them their main types), and + * analyzer::Component and file_analysis::Component create new tag. + * + * The Tag class supports all operations necessary to act as an index in a + * \c std::map. + */ +class Tag { +public: + /** + * Type for the analyzer's main type. + */ + typedef uint32 type_t; + + /** + * Type for the analyzer's subtype. + */ + typedef uint32 subtype_t; + + /** + * Returns the tag's main type. + */ + type_t Type() const { return type; } + + /** + * Returns the tag's subtype. + */ + subtype_t Subtype() const { return subtype; } + + /** + * Returns the numerical values for main and subtype inside a string + * suitable for printing. This is primarily for debugging. + */ + std::string AsString() const; + +protected: + /* + * Copy constructor. + */ + Tag(const Tag& other); + + /** + * Default constructor. This initializes the tag with an error value + * that will make \c operator \c bool return false. + */ + Tag(); + + /** + * Destructor. + */ + ~Tag(); + + /** + * Assignment operator. + */ + Tag& operator=(const Tag& other); + + /** + * Compares two tags for equality. + */ + bool operator==(const Tag& other) const + { + return type == other.type && subtype == other.subtype; + } + + /** + * Compares two tags for inequality. + */ + bool operator!=(const Tag& other) const + { + return type != other.type || subtype != other.subtype; + } + + /** + * Compares two tags for less-than relationship. + */ + bool operator<(const Tag& other) const + { + return type != other.type ? type < other.type : (subtype < other.subtype); + } + + /** + * Returns the script-layer enum that corresponds to this tag. + * The returned value does not have its ref-count increased. + * + * @param etype the script-layer enum type associated with the tag. + */ + EnumVal* AsEnumVal(EnumType* etype) const; + + /** + * Constructor. + * + * @param etype the script-layer enum type associated with the tag. + * + * @param type The main type. Note that the manager class manages the + * the value space internally, so noone else should assign main types. + * + * @param subtype The sub type, which is left to an analyzer for + * interpretation. By default it's set to zero. + */ + Tag(EnumType* etype, type_t type, subtype_t subtype = 0); + + /** + * Constructor. + * + * @param val An enum value of script type \c Analyzer::Tag. + */ + Tag(EnumVal* val); + +private: + type_t type; // Main type. + subtype_t subtype; // Subtype. + mutable EnumVal* val; // Script-layer value. +}; + +#endif diff --git a/src/analyzer/Analyzer.cc b/src/analyzer/Analyzer.cc index ecd3c9f686..b8b739f3cb 100644 --- a/src/analyzer/Analyzer.cc +++ b/src/analyzer/Analyzer.cc @@ -70,12 +70,12 @@ void AnalyzerTimer::Init(Analyzer* arg_analyzer, analyzer_timer_func arg_timer, Ref(analyzer->Conn()); } -analyzer::ID Analyzer::id_counter = 0;; +analyzer::ID Analyzer::id_counter = 0; const char* Analyzer::GetAnalyzerName() const { assert(tag); - return analyzer_mgr->GetAnalyzerName(tag); + return analyzer_mgr->GetComponentName(tag); } void Analyzer::SetAnalyzerTag(const Tag& arg_tag) @@ -87,7 +87,7 @@ void Analyzer::SetAnalyzerTag(const Tag& arg_tag) bool Analyzer::IsAnalyzer(const char* name) { assert(tag); - return strcmp(analyzer_mgr->GetAnalyzerName(tag), name) == 0; + return strcmp(analyzer_mgr->GetComponentName(tag), name) == 0; } // Used in debugging output. @@ -98,7 +98,7 @@ static string fmt_analyzer(Analyzer* a) Analyzer::Analyzer(const char* name, Connection* conn) { - Tag tag = analyzer_mgr->GetAnalyzerTag(name); + Tag tag = analyzer_mgr->GetComponentTag(name); if ( ! tag ) reporter->InternalError("unknown analyzer name %s; mismatch with tag analyzer::Component?", name); @@ -494,7 +494,7 @@ Analyzer* Analyzer::FindChild(Tag arg_tag) Analyzer* Analyzer::FindChild(const char* name) { - Tag tag = analyzer_mgr->GetAnalyzerTag(name); + Tag tag = analyzer_mgr->GetComponentTag(name); return tag ? FindChild(tag) : 0; } diff --git a/src/analyzer/Component.cc b/src/analyzer/Component.cc index cbb0f40c20..66ab2213bb 100644 --- a/src/analyzer/Component.cc +++ b/src/analyzer/Component.cc @@ -8,29 +8,26 @@ using namespace analyzer; -Tag::type_t Component::type_counter = 0; - Component::Component(const char* arg_name, factory_callback arg_factory, Tag::subtype_t arg_subtype, bool arg_enabled, bool arg_partial) - : plugin::Component(plugin::component::ANALYZER) + : plugin::Component(plugin::component::ANALYZER), + plugin::TaggedComponent(arg_subtype) { name = copy_string(arg_name); canon_name = canonify_name(arg_name); factory = arg_factory; enabled = arg_enabled; partial = arg_partial; - - tag = analyzer::Tag(++type_counter, arg_subtype); } Component::Component(const Component& other) - : plugin::Component(Type()) + : plugin::Component(Type()), + plugin::TaggedComponent(other) { name = copy_string(other.name); canon_name = copy_string(other.canon_name); factory = other.factory; enabled = other.enabled; partial = other.partial; - tag = other.tag; } Component::~Component() @@ -39,11 +36,6 @@ Component::~Component() delete [] canon_name; } -analyzer::Tag Component::Tag() const - { - return tag; - } - void Component::Describe(ODesc* d) const { plugin::Component::Describe(d); @@ -63,13 +55,14 @@ void Component::Describe(ODesc* d) const Component& Component::operator=(const Component& other) { + plugin::TaggedComponent::operator=(other); + if ( &other != this ) { name = copy_string(other.name); factory = other.factory; enabled = other.enabled; partial = other.partial; - tag = other.tag; } return *this; diff --git a/src/analyzer/Component.h b/src/analyzer/Component.h index 9e12ed347e..9bc8b357d7 100644 --- a/src/analyzer/Component.h +++ b/src/analyzer/Component.h @@ -5,6 +5,7 @@ #include "Tag.h" #include "plugin/Component.h" +#include "plugin/TaggedComponent.h" #include "../config.h" #include "../util.h" @@ -21,7 +22,8 @@ class Analyzer; * A plugin can provide a specific protocol analyzer by registering this * analyzer component, describing the analyzer. */ -class Component : public plugin::Component { +class Component : public plugin::Component, + public plugin::TaggedComponent { public: typedef Analyzer* (*factory_callback)(Connection* conn); @@ -100,13 +102,6 @@ public: */ bool Enabled() const { return enabled; } - /** - * Returns the analyzer's tag. Note that this is automatically - * generated for each new Components, and hence unique across all of - * them. - */ - analyzer::Tag Tag() const; - /** * Enables or disables this analyzer. * @@ -128,11 +123,7 @@ private: const char* canon_name; // The analyzer's canonical name. factory_callback factory; // The analyzer's factory callback. bool partial; // True if the analyzer supports partial connections. - analyzer::Tag tag; // The automatically assigned analyzer tag. bool enabled; // True if the analyzer is enabled. - - // Global counter used to generate unique tags. - static analyzer::Tag::type_t type_counter; }; } diff --git a/src/analyzer/Manager.cc b/src/analyzer/Manager.cc index 8b290e2341..2359e4ec98 100644 --- a/src/analyzer/Manager.cc +++ b/src/analyzer/Manager.cc @@ -60,10 +60,8 @@ bool Manager::ConnIndex::operator<(const ConnIndex& other) const } Manager::Manager() + : plugin::ComponentManager("Analyzer") { - tag_enum_type = new EnumType("Analyzer::Tag"); - ::ID* id = install_ID("Tag", "Analyzer", true, true); - add_type(id, tag_enum_type, 0, 0); } Manager::~Manager() @@ -91,14 +89,14 @@ void Manager::InitPreScript() std::list analyzers = plugin_mgr->Components(); for ( std::list::const_iterator i = analyzers.begin(); i != analyzers.end(); i++ ) - RegisterAnalyzerComponent(*i); + RegisterComponent(*i, "ANALYZER_"); // Cache these tags. - analyzer_backdoor = GetAnalyzerTag("BACKDOOR"); - analyzer_connsize = GetAnalyzerTag("CONNSIZE"); - analyzer_interconn = GetAnalyzerTag("INTERCONN"); - analyzer_stepping = GetAnalyzerTag("STEPPINGSTONE"); - analyzer_tcpstats = GetAnalyzerTag("TCPSTATS"); + analyzer_backdoor = GetComponentTag("BACKDOOR"); + analyzer_connsize = GetComponentTag("CONNSIZE"); + analyzer_interconn = GetComponentTag("INTERCONN"); + analyzer_stepping = GetComponentTag("STEPPINGSTONE"); + analyzer_tcpstats = GetComponentTag("TCPSTATS"); } void Manager::InitPostScript() @@ -109,8 +107,9 @@ void Manager::DumpDebug() { #ifdef DEBUG DBG_LOG(DBG_ANALYZER, "Available analyzers after bro_init():"); - for ( analyzer_map_by_name::const_iterator i = analyzers_by_name.begin(); i != analyzers_by_name.end(); i++ ) - DBG_LOG(DBG_ANALYZER, " %s (%s)", i->second->Name(), IsEnabled(i->second->Tag()) ? "enabled" : "disabled"); + list all_analyzers = GetComponents(); + for ( list::const_iterator i = all_analyzers.begin(); i != all_analyzers.end(); ++i ) + DBG_LOG(DBG_ANALYZER, " %s (%s)", (*i)->Name(), IsEnabled((*i)->Tag()) ? "enabled" : "disabled"); DBG_LOG(DBG_ANALYZER, ""); DBG_LOG(DBG_ANALYZER, "Analyzers by port:"); @@ -120,7 +119,7 @@ void Manager::DumpDebug() string s; for ( tag_set::const_iterator j = i->second->begin(); j != i->second->end(); j++ ) - s += string(GetAnalyzerName(*j)) + " "; + s += string(GetComponentName(*j)) + " "; DBG_LOG(DBG_ANALYZER, " %d/tcp: %s", i->first, s.c_str()); } @@ -130,7 +129,7 @@ void Manager::DumpDebug() string s; for ( tag_set::const_iterator j = i->second->begin(); j != i->second->end(); j++ ) - s += string(GetAnalyzerName(*j)) + " "; + s += string(GetComponentName(*j)) + " "; DBG_LOG(DBG_ANALYZER, " %d/udp: %s", i->first, s.c_str()); } @@ -142,25 +141,6 @@ void Manager::Done() { } -void Manager::RegisterAnalyzerComponent(Component* component) - { - const char* cname = component->CanonicalName(); - - if ( Lookup(cname) ) - reporter->FatalError("Analyzer %s defined more than once", cname); - - DBG_LOG(DBG_ANALYZER, "Registering analyzer %s (tag %s)", - component->Name(), component->Tag().AsString().c_str()); - - analyzers_by_name.insert(std::make_pair(cname, component)); - analyzers_by_tag.insert(std::make_pair(component->Tag(), component)); - analyzers_by_val.insert(std::make_pair(component->Tag().AsEnumVal()->InternalInt(), component)); - - // Install enum "Analyzer::ANALYZER_*" - string id = fmt("ANALYZER_%s", cname); - tag_enum_type->AddName("Analyzer", id.c_str(), component->Tag().AsEnumVal()->InternalInt(), true); - } - bool Manager::EnableAnalyzer(Tag tag) { Component* p = Lookup(tag); @@ -217,8 +197,9 @@ void Manager::DisableAllAnalyzers() { DBG_LOG(DBG_ANALYZER, "Disabling all analyzers"); - for ( analyzer_map_by_tag::const_iterator i = analyzers_by_tag.begin(); i != analyzers_by_tag.end(); i++ ) - i->second->SetEnabled(false); + list all_analyzers = GetComponents(); + for ( list::const_iterator i = all_analyzers.begin(); i != all_analyzers.end(); ++i ) + (*i)->SetEnabled(false); } bool Manager::IsEnabled(Tag tag) @@ -270,7 +251,7 @@ bool Manager::RegisterAnalyzerForPort(Tag tag, TransportProto proto, uint32 port tag_set* l = LookupPort(proto, port, true); #ifdef DEBUG - const char* name = GetAnalyzerName(tag); + const char* name = GetComponentName(tag); DBG_LOG(DBG_ANALYZER, "Registering analyzer %s for port %" PRIu32 "/%d", name, port, proto); #endif @@ -283,7 +264,7 @@ bool Manager::UnregisterAnalyzerForPort(Tag tag, TransportProto proto, uint32 po tag_set* l = LookupPort(proto, port, true); #ifdef DEBUG - const char* name = GetAnalyzerName(tag); + const char* name = GetComponentName(tag); DBG_LOG(DBG_ANALYZER, "Unregistering analyzer %s for port %" PRIu32 "/%d", name, port, proto); #endif @@ -302,7 +283,7 @@ Analyzer* Manager::InstantiateAnalyzer(Tag tag, Connection* conn) return 0; if ( ! c->Factory() ) - reporter->InternalError("analyzer %s cannot be instantiated dynamically", GetAnalyzerName(tag)); + reporter->InternalError("analyzer %s cannot be instantiated dynamically", GetComponentName(tag)); Analyzer* a = c->Factory()(conn); @@ -316,59 +297,10 @@ Analyzer* Manager::InstantiateAnalyzer(Tag tag, Connection* conn) Analyzer* Manager::InstantiateAnalyzer(const char* name, Connection* conn) { - Tag tag = GetAnalyzerTag(name); + Tag tag = GetComponentTag(name); return tag ? InstantiateAnalyzer(tag, conn) : 0; } -const char* Manager::GetAnalyzerName(Tag tag) - { - static const char* error = ""; - - if ( ! tag ) - return error; - - Component* c = Lookup(tag); - - if ( ! c ) - reporter->InternalError("request for name of unknown analyzer tag %s", tag.AsString().c_str()); - - return c->CanonicalName(); - } - -const char* Manager::GetAnalyzerName(Val* val) - { - return GetAnalyzerName(Tag(val->AsEnumVal())); - } - -Tag Manager::GetAnalyzerTag(const char* name) - { - Component* c = Lookup(name); - return c ? c->Tag() : Tag(); - } - -EnumType* Manager::GetTagEnumType() - { - return tag_enum_type; - } - -Component* Manager::Lookup(const char* name) - { - analyzer_map_by_name::const_iterator i = analyzers_by_name.find(to_upper(name)); - return i != analyzers_by_name.end() ? i->second : 0; - } - -Component* Manager::Lookup(const Tag& tag) - { - analyzer_map_by_tag::const_iterator i = analyzers_by_tag.find(tag); - return i != analyzers_by_tag.end() ? i->second : 0; - } - -Component* Manager::Lookup(EnumVal* val) - { - analyzer_map_by_val::const_iterator i = analyzers_by_val.find(val->InternalInt()); - return i != analyzers_by_val.end() ? i->second : 0; - } - Manager::tag_set* Manager::LookupPort(TransportProto proto, uint32 port, bool add_if_not_found) { analyzer_map_by_port* m = 0; @@ -461,7 +393,7 @@ bool Manager::BuildInitialAnalyzerTree(Connection* conn) root->AddChildAnalyzer(analyzer, false); DBG_ANALYZER_ARGS(conn, "activated %s analyzer as scheduled", - analyzer_mgr->GetAnalyzerName(*i)); + analyzer_mgr->GetComponentName(*i)); } } @@ -487,7 +419,7 @@ bool Manager::BuildInitialAnalyzerTree(Connection* conn) root->AddChildAnalyzer(analyzer, false); DBG_ANALYZER_ARGS(conn, "activated %s analyzer due to port %d", - analyzer_mgr->GetAnalyzerName(*j), resp_port); + analyzer_mgr->GetComponentName(*j), resp_port); } } } @@ -613,7 +545,7 @@ void Manager::ExpireScheduledAnalyzers() conns.erase(i); DBG_LOG(DBG_ANALYZER, "Expiring expected analyzer %s for connection %s", - analyzer_mgr->GetAnalyzerName(a->analyzer), + analyzer_mgr->GetComponentName(a->analyzer), fmt_conn_id(a->conn.orig, 0, a->conn.resp, a->conn.resp_p)); delete a; @@ -655,7 +587,7 @@ void Manager::ScheduleAnalyzer(const IPAddr& orig, const IPAddr& resp, TransportProto proto, const char* analyzer, double timeout) { - Tag tag = GetAnalyzerTag(analyzer); + Tag tag = GetComponentTag(analyzer); if ( tag != Tag() ) ScheduleAnalyzer(orig, resp, resp_p, proto, tag, timeout); diff --git a/src/analyzer/Manager.h b/src/analyzer/Manager.h index efae629971..d151709eda 100644 --- a/src/analyzer/Manager.h +++ b/src/analyzer/Manager.h @@ -26,6 +26,7 @@ #include "Analyzer.h" #include "Component.h" #include "Tag.h" +#include "plugin/ComponentManager.h" #include "../Dict.h" #include "../net_util.h" @@ -49,7 +50,7 @@ namespace analyzer { * classes. This allows to external analyzer code to potentially use a * different C++ standard library. */ -class Manager { +class Manager : public plugin::ComponentManager { public: /** * Constructor. @@ -231,42 +232,6 @@ public: */ Analyzer* InstantiateAnalyzer(const char* name, Connection* c); - /** - * Translates an analyzer tag into corresponding analyzer name. - * - * @param tag The analyzer tag. - * - * @return The name, or an empty string if the tag is invalid. - */ - const char* GetAnalyzerName(Tag tag); - - /** - * Translates an script-level analyzer tag into corresponding - * analyzer name. - * - * @param val The analyzer tag as an script-level enum value of type - * \c Analyzer::Tag. - * - * @return The name, or an empty string if the tag is invalid. - */ - const char* GetAnalyzerName(Val* val); - - /** - * Translates an analyzer name into the corresponding tag. - * - * @param name The name. - * - * @return The tag. If the name does not correspond to a valid - * analyzer, the returned tag will evaluate to false. - */ - Tag GetAnalyzerTag(const char* name); - - /** - * Returns the enum type that corresponds to the script-level type \c - * Analyzer::Tag. - */ - EnumType* GetTagEnumType(); - /** * Given the first packet of a connection, builds its initial * analyzer tree. @@ -350,18 +315,8 @@ public: private: typedef set tag_set; - typedef map analyzer_map_by_name; - typedef map analyzer_map_by_tag; - typedef map analyzer_map_by_val; typedef map analyzer_map_by_port; - void RegisterAnalyzerComponent(Component* component); // Takes ownership. - - Component* Lookup(const string& name); - Component* Lookup(const char* name); - Component* Lookup(const Tag& tag); - Component* Lookup(EnumVal* val); - tag_set* LookupPort(PortVal* val, bool add_if_not_found); tag_set* LookupPort(TransportProto proto, uint32 port, bool add_if_not_found); @@ -370,9 +325,6 @@ private: analyzer_map_by_port analyzers_by_port_tcp; analyzer_map_by_port analyzers_by_port_udp; - analyzer_map_by_name analyzers_by_name; - analyzer_map_by_tag analyzers_by_tag; - analyzer_map_by_val analyzers_by_val; Tag analyzer_backdoor; Tag analyzer_connsize; @@ -380,8 +332,6 @@ private: Tag analyzer_stepping; Tag analyzer_tcpstats; - EnumType* tag_enum_type; - //// Data structures to track analyzed scheduled for future connections. // The index for a scheduled connection. diff --git a/src/analyzer/Tag.cc b/src/analyzer/Tag.cc index 2f04ff17da..3ab41daf78 100644 --- a/src/analyzer/Tag.cc +++ b/src/analyzer/Tag.cc @@ -3,90 +3,20 @@ #include "Tag.h" #include "Manager.h" -#include "../NetVar.h" +analyzer::Tag analyzer::Tag::Error; -using namespace analyzer; - -Tag Tag::Error; - -Tag::Tag(type_t arg_type, subtype_t arg_subtype) +analyzer::Tag::Tag(type_t type, subtype_t subtype) + : ::Tag(analyzer_mgr->GetTagEnumType(), type, subtype) { - assert(arg_type > 0); - - type = arg_type; - subtype = arg_subtype; - int64_t i = (int64)(type) | ((int64)subtype << 31); - - EnumType* etype = analyzer_mgr->GetTagEnumType(); - Ref(etype); - val = new EnumVal(i, etype); } -Tag::Tag(EnumVal* arg_val) +analyzer::Tag& analyzer::Tag::operator=(const analyzer::Tag& other) { - assert(arg_val); - - val = arg_val; - Ref(val); - - int64 i = val->InternalInt(); - type = i & 0xffffffff; - subtype = (i >> 31) & 0xffffffff; - } - -Tag::Tag(const Tag& other) - { - type = other.type; - subtype = other.subtype; - val = other.val; - - if ( val ) - Ref(val); - } - -Tag::Tag() - { - type = 0; - subtype = 0; - val = 0; - } - -Tag::~Tag() - { - Unref(val); - val = 0; - } - -Tag& Tag::operator=(const Tag& other) - { - if ( this != &other ) - { - type = other.type; - subtype = other.subtype; - val = other.val; - - if ( val ) - Ref(val); - } - + ::Tag::operator=(other); return *this; } -EnumVal* Tag::AsEnumVal() const +EnumVal* analyzer::Tag::AsEnumVal() const { - if ( ! val ) - { - assert(analyzer_mgr); - assert(type == 0 && subtype == 0); - EnumType* etype = analyzer_mgr->GetTagEnumType(); - Ref(etype); - val = new EnumVal(0, etype); - } - - return val; - } - -std::string Tag::AsString() const - { - return fmt("%" PRIu32 "/%" PRIu32, type, subtype); + return ::Tag::AsEnumVal(analyzer_mgr->GetTagEnumType()); } diff --git a/src/analyzer/Tag.h b/src/analyzer/Tag.h index edb0ade8a7..d01c8902ee 100644 --- a/src/analyzer/Tag.h +++ b/src/analyzer/Tag.h @@ -5,90 +5,46 @@ #include "config.h" #include "util.h" +#include "../Tag.h" +#include "plugin/TaggedComponent.h" +#include "plugin/ComponentManager.h" class EnumVal; -namespace file_analysis { -class Manager; -class Component; -} - namespace analyzer { class Manager; class Component; /** - * Class to identify an analyzer type. + * Class to identify a protocol analyzer type. * - * Each analyzer type gets a tag consisting of a main type and subtype. The - * former is an identifier that's unique all analyzer classes. The latter is - * passed through to the analyzer instances for their use, yet not further - * interpreted by the analyzer infrastructure; it allows an analyzer to - * branch out into a set of sub-analyzers internally. Jointly, main type and - * subtype form an analyzer "tag". Each unique tag corresponds to a single - * "analyzer" from the user's perspective. At the script layer, these tags - * are mapped into enums of type \c Analyzer::Tag. Internally, the - * analyzer::Manager maintains the mapping of tag to analyzer (and it also - * assigns them their main types), and analyzer::Component creates new - * tags. - * - * The Tag class supports all operations necessary to act as an index in a - * \c std::map. + * The script-layer analogue is Analyzer::Tag. */ -class Tag { +class Tag : public ::Tag { public: - /** - * Type for the analyzer's main type. - */ - typedef uint32 type_t; - - /** - * Type for the analyzer's subtype. - */ - typedef uint32 subtype_t; - /* * Copy constructor. */ - Tag(const Tag& other); + Tag(const Tag& other) : ::Tag(other) {} /** * Default constructor. This initializes the tag with an error value * that will make \c operator \c bool return false. */ - Tag(); + Tag() : ::Tag() {} /** * Destructor. */ - ~Tag(); - - /** - * Returns the tag's main type. - */ - type_t Type() const { return type; } - - /** - * Returns the tag's subtype. - */ - subtype_t Subtype() const { return subtype; } - - /** - * Returns the \c Analyzer::Tag enum that corresponds to this tag. - * The returned value is \a does not have its ref-count increased. - */ - EnumVal* AsEnumVal() const; - - /** - * Returns the numerical values for main and subtype inside a string - * suitable for printing. This is primarily for debugging. - */ - std::string AsString() const; + ~Tag() {} /** * Returns false if the tag represents an error value rather than a * legal analyzer type. + * TODO: make this conversion operator "explicit" (C++11) or use a + * "safe bool" idiom (not necessary if "explicit" is available), + * otherwise this may allow nonsense/undesired comparison operations. */ operator bool() const { return *this != Tag(); } @@ -102,7 +58,7 @@ public: */ bool operator==(const Tag& other) const { - return type == other.type && subtype == other.subtype; + return ::Tag::operator==(other); } /** @@ -110,7 +66,7 @@ public: */ bool operator!=(const Tag& other) const { - return type != other.type || subtype != other.subtype; + return ::Tag::operator!=(other); } /** @@ -118,23 +74,30 @@ public: */ bool operator<(const Tag& other) const { - return type != other.type ? type < other.type : (subtype < other.subtype); + return ::Tag::operator<(other); } + /** + * Returns the \c Analyzer::Tag enum that corresponds to this tag. + * The returned value does not have its ref-count increased. + * + * @param etype the script-layer enum type associated with the tag. + */ + EnumVal* AsEnumVal() const; + static Tag Error; protected: friend class analyzer::Manager; - friend class analyzer::Component; - friend class file_analysis::Manager; - friend class file_analysis::Component; + friend class plugin::ComponentManager; + friend class plugin::TaggedComponent; /** * Constructor. * * @param type The main type. Note that the \a analyzer::Manager * manages the value space internally, so noone else should assign - * any main tyoes. + * any main types. * * @param subtype The sub type, which is left to an analyzer for * interpretation. By default it's set to zero. @@ -144,14 +107,9 @@ protected: /** * Constructor. * - * @param val An enuam value of script type \c Analyzer::Tag. + * @param val An enum value of script type \c Analyzer::Tag. */ - Tag(EnumVal* val); - -private: - type_t type; // Main type. - subtype_t subtype; // Subtype. - mutable EnumVal* val; // Analyzer::Tag value. + Tag(EnumVal* val) : ::Tag(val) {} }; } diff --git a/src/analyzer/analyzer.bif b/src/analyzer/analyzer.bif index 4d70816075..ebf8083624 100644 --- a/src/analyzer/analyzer.bif +++ b/src/analyzer/analyzer.bif @@ -41,11 +41,11 @@ function Analyzer::__schedule_analyzer%(orig: addr, resp: addr, resp_p: port, function __name%(atype: Analyzer::Tag%) : string %{ - return new StringVal(analyzer_mgr->GetAnalyzerName(atype)); + return new StringVal(analyzer_mgr->GetComponentName(atype)); %} function __tag%(name: string%) : Analyzer::Tag %{ - analyzer::Tag t = analyzer_mgr->GetAnalyzerTag(name->CheckString()); + analyzer::Tag t = analyzer_mgr->GetComponentTag(name->CheckString()); return t.AsEnumVal()->Ref(); %} diff --git a/src/analyzer/protocol/dhcp/DHCP.cc b/src/analyzer/protocol/dhcp/DHCP.cc index 8d05aef37d..1fa8759fbf 100644 --- a/src/analyzer/protocol/dhcp/DHCP.cc +++ b/src/analyzer/protocol/dhcp/DHCP.cc @@ -1,4 +1,3 @@ - #include "DHCP.h" #include "events.bif.h" diff --git a/src/analyzer/protocol/dhcp/dhcp-analyzer.pac b/src/analyzer/protocol/dhcp/dhcp-analyzer.pac index 5267075445..f58a2d9b5e 100644 --- a/src/analyzer/protocol/dhcp/dhcp-analyzer.pac +++ b/src/analyzer/protocol/dhcp/dhcp-analyzer.pac @@ -8,12 +8,10 @@ flow DHCP_Flow(is_orig: bool) { %member{ BroVal dhcp_msg_val_; - BroAnalyzer interp; %} %init{ dhcp_msg_val_ = 0; - interp = connection->bro_analyzer(); %} %cleanup{ @@ -45,7 +43,7 @@ flow DHCP_Flow(is_orig: bool) { } if ( type == 0 ) - interp->Weird("DHCP_no_type_option"); + connection()->bro_analyzer()->ProtocolViolation("no DHCP message type option"); return type; %} @@ -56,54 +54,63 @@ flow DHCP_Flow(is_orig: bool) { // Requested IP address to the server. ::uint32 req_addr = 0, serv_addr = 0; + StringVal* host_name = 0; - for ( ptr = options->begin(); - ptr != options->end() && ! (*ptr)->last(); ++ptr ) + for ( ptr = options->begin(); ptr != options->end() && ! (*ptr)->last(); ++ptr ) { - switch ( (*ptr)->code() ) { - case REQ_IP_OPTION: - req_addr = htonl((*ptr)->info()->req_addr()); - break; + switch ( (*ptr)->code() ) + { + case REQ_IP_OPTION: + req_addr = htonl((*ptr)->info()->req_addr()); + break; - case SERV_ID_OPTION: - serv_addr = htonl((*ptr)->info()->serv_addr()); - break; - } + case SERV_ID_OPTION: + serv_addr = htonl((*ptr)->info()->serv_addr()); + break; + + case HOST_NAME_OPTION: + host_name = new StringVal((*ptr)->info()->host_name().length(), + (const char*) (*ptr)->info()->host_name().begin()); + break; + } } + if ( host_name == 0 ) + host_name = new StringVal(""); + switch ( type ) - { - case DHCPDISCOVER: - BifEvent::generate_dhcp_discover(connection()->bro_analyzer(), - connection()->bro_analyzer()->Conn(), - dhcp_msg_val_->Ref(), new AddrVal(req_addr)); - break; + { + case DHCPDISCOVER: + BifEvent::generate_dhcp_discover(connection()->bro_analyzer(), + connection()->bro_analyzer()->Conn(), + dhcp_msg_val_->Ref(), new AddrVal(req_addr), host_name); + break; - case DHCPREQUEST: - BifEvent::generate_dhcp_request(connection()->bro_analyzer(), - connection()->bro_analyzer()->Conn(), - dhcp_msg_val_->Ref(), new AddrVal(req_addr), - new AddrVal(serv_addr)); - break; + case DHCPREQUEST: + BifEvent::generate_dhcp_request(connection()->bro_analyzer(), + connection()->bro_analyzer()->Conn(), + dhcp_msg_val_->Ref(), new AddrVal(req_addr), + new AddrVal(serv_addr), host_name); + break; - case DHCPDECLINE: - BifEvent::generate_dhcp_decline(connection()->bro_analyzer(), - connection()->bro_analyzer()->Conn(), - dhcp_msg_val_->Ref()); - break; + case DHCPDECLINE: + BifEvent::generate_dhcp_decline(connection()->bro_analyzer(), + connection()->bro_analyzer()->Conn(), + dhcp_msg_val_->Ref(), host_name); + break; - case DHCPRELEASE: - BifEvent::generate_dhcp_release(connection()->bro_analyzer(), - connection()->bro_analyzer()->Conn(), - dhcp_msg_val_->Ref()); - break; + case DHCPRELEASE: + BifEvent::generate_dhcp_release(connection()->bro_analyzer(), + connection()->bro_analyzer()->Conn(), + dhcp_msg_val_->Ref(), host_name); + break; - case DHCPINFORM: - BifEvent::generate_dhcp_inform(connection()->bro_analyzer(), - connection()->bro_analyzer()->Conn(), - dhcp_msg_val_->Ref()); - break; - } + case DHCPINFORM: + BifEvent::generate_dhcp_inform(connection()->bro_analyzer(), + connection()->bro_analyzer()->Conn(), + dhcp_msg_val_->Ref(), host_name); + break; + } return true; %} @@ -118,72 +125,83 @@ flow DHCP_Flow(is_orig: bool) { ::uint32 subnet_mask = 0, serv_addr = 0; uint32 lease = 0; + StringVal* host_name = 0; for ( ptr = options->begin(); ptr != options->end() && ! (*ptr)->last(); ++ptr ) { - switch ( (*ptr)->code() ) { - case SUBNET_OPTION: - subnet_mask = htonl((*ptr)->info()->mask()); - break; - - case ROUTER_OPTION: - // Let's hope there aren't multiple - // such options. - Unref(router_list); - router_list = new TableVal(dhcp_router_list); - + switch ( (*ptr)->code() ) { - int num_routers = - (*ptr)->info()->router_list()->size(); + case SUBNET_OPTION: + subnet_mask = htonl((*ptr)->info()->mask()); + break; - for ( int i = 0; i < num_routers; ++i ) - { - vector* rlist = - (*ptr)->info()->router_list(); - uint32 raddr = (*rlist)[i]; - ::uint32 tmp_addr; - tmp_addr = htonl(raddr); - // index starting from 1 - Val* index = new Val(i + 1, TYPE_COUNT); - router_list->Assign(index, new AddrVal(tmp_addr)); - Unref(index); - } + case ROUTER_OPTION: + // Let's hope there aren't multiple + // such options. + Unref(router_list); + router_list = new TableVal(dhcp_router_list); + + { + int num_routers = (*ptr)->info()->router_list()->size(); + + for ( int i = 0; i < num_routers; ++i ) + { + vector* rlist = (*ptr)->info()->router_list(); + + uint32 raddr = (*rlist)[i]; + ::uint32 tmp_addr; + tmp_addr = htonl(raddr); + + // index starting from 1 + Val* index = new Val(i + 1, TYPE_COUNT); + router_list->Assign(index, new AddrVal(tmp_addr)); + Unref(index); + } + } + break; + + case LEASE_OPTION: + lease = (*ptr)->info()->lease(); + break; + + case SERV_ID_OPTION: + serv_addr = htonl((*ptr)->info()->serv_addr()); + break; + + case HOST_NAME_OPTION: + host_name = new StringVal((*ptr)->info()->host_name().length(), + (const char*) (*ptr)->info()->host_name().begin()); + break; } - break; - - case LEASE_OPTION: - lease = (*ptr)->info()->lease(); - break; - - case SERV_ID_OPTION: - serv_addr = htonl((*ptr)->info()->serv_addr()); - break; - } } - switch ( type ) { - case DHCPOFFER: - BifEvent::generate_dhcp_offer(connection()->bro_analyzer(), - connection()->bro_analyzer()->Conn(), - dhcp_msg_val_->Ref(), new AddrVal(subnet_mask), - router_list, lease, new AddrVal(serv_addr)); - break; + if ( host_name == 0 ) + host_name = new StringVal(""); - case DHCPACK: - BifEvent::generate_dhcp_ack(connection()->bro_analyzer(), - connection()->bro_analyzer()->Conn(), - dhcp_msg_val_->Ref(), new AddrVal(subnet_mask), - router_list, lease, new AddrVal(serv_addr)); - break; + switch ( type ) + { + case DHCPOFFER: + BifEvent::generate_dhcp_offer(connection()->bro_analyzer(), + connection()->bro_analyzer()->Conn(), + dhcp_msg_val_->Ref(), new AddrVal(subnet_mask), + router_list, lease, new AddrVal(serv_addr), host_name); + break; - case DHCPNAK: - BifEvent::generate_dhcp_nak(connection()->bro_analyzer(), - connection()->bro_analyzer()->Conn(), - dhcp_msg_val_->Ref()); - break; + case DHCPACK: + BifEvent::generate_dhcp_ack(connection()->bro_analyzer(), + connection()->bro_analyzer()->Conn(), + dhcp_msg_val_->Ref(), new AddrVal(subnet_mask), + router_list, lease, new AddrVal(serv_addr), host_name); + break; - } + case DHCPNAK: + BifEvent::generate_dhcp_nak(connection()->bro_analyzer(), + connection()->bro_analyzer()->Conn(), + dhcp_msg_val_->Ref(), host_name); + break; + + } return true; @@ -195,7 +213,10 @@ flow DHCP_Flow(is_orig: bool) { // DHCP or BOOTP. If not, we are unable to interpret // the message options. if ( ${msg.cookie} != 0x63825363 ) + { + connection()->bro_analyzer()->ProtocolViolation(fmt("bad cookie (%d)", ${msg.cookie})); return false; + } Unref(dhcp_msg_val_); RecordVal* r = new RecordVal(dhcp_msg); @@ -203,40 +224,44 @@ flow DHCP_Flow(is_orig: bool) { r->Assign(0, new Val(${msg.op}, TYPE_COUNT)); r->Assign(1, new Val(${msg.type}, TYPE_COUNT)); r->Assign(2, new Val(${msg.xid}, TYPE_COUNT)); - - // We want only 6 bytes for Ethernet address. - r->Assign(3, new StringVal(6, (const char*) ${msg.chaddr}.begin())); - + r->Assign(3, new StringVal(fmt_mac(${msg.chaddr}.data(), ${msg.chaddr}.length()))); r->Assign(4, new AddrVal(${msg.ciaddr})); r->Assign(5, new AddrVal(${msg.yiaddr})); dhcp_msg_val_ = r; - switch ( ${msg.op} ) { - case BOOTREQUEST: // presumablye from client to server - if ( ${msg.type} == DHCPDISCOVER || - ${msg.type} == DHCPREQUEST || - ${msg.type} == DHCPDECLINE || - ${msg.type} == DHCPRELEASE || - ${msg.type} == DHCPINFORM ) - parse_request(${msg.options}, ${msg.type}); - else - interp->Weird("DHCP_wrong_msg_type"); - break; + switch ( ${msg.op} ) + { + case BOOTREQUEST: // presumably from client to server + if ( ${msg.type} == DHCPDISCOVER || + ${msg.type} == DHCPREQUEST || + ${msg.type} == DHCPDECLINE || + ${msg.type} == DHCPRELEASE || + ${msg.type} == DHCPINFORM ) + parse_request(${msg.options}, ${msg.type}); + else + connection()->bro_analyzer()->ProtocolViolation(fmt("unknown DHCP message type option for BOOTREQUEST (%d)", + ${msg.type})); + break; - case BOOTREPLY: // presumably from server to client - if ( ${msg.type} == DHCPOFFER || - ${msg.type} == DHCPACK || ${msg.type} == DHCPNAK ) - parse_reply(${msg.options}, ${msg.type}); - else - interp->Weird("DHCP_wrong_msg_type"); - break; + case BOOTREPLY: // presumably from server to client + if ( ${msg.type} == DHCPOFFER || + ${msg.type} == DHCPACK || + ${msg.type} == DHCPNAK ) + parse_reply(${msg.options}, ${msg.type}); + else + connection()->bro_analyzer()->ProtocolViolation(fmt("unknown DHCP message type option for BOOTREPLY (%d)", + ${msg.type})); - default: - interp->Weird("DHCP_wrong_op_type"); - break; - } + break; + default: + connection()->bro_analyzer()->ProtocolViolation(fmt("unknown DHCP message op code (%d). Known codes: 1=BOOTREQUEST, 2=BOOTREPLY", + ${msg.op})); + break; + } + + connection()->bro_analyzer()->ProtocolConfirmation(); return true; %} }; diff --git a/src/analyzer/protocol/dhcp/dhcp-protocol.pac b/src/analyzer/protocol/dhcp/dhcp-protocol.pac index d77780b1b3..cf8cf69b26 100644 --- a/src/analyzer/protocol/dhcp/dhcp-protocol.pac +++ b/src/analyzer/protocol/dhcp/dhcp-protocol.pac @@ -10,13 +10,14 @@ enum OP_type { # The option types are by no means complete. # Anyone can add a new option type in RFC 1533 to be parsed here. enum OPTION_type { - SUBNET_OPTION = 1, - ROUTER_OPTION = 3, - REQ_IP_OPTION = 50, - LEASE_OPTION = 51, - MSG_TYPE_OPTION = 53, - SERV_ID_OPTION = 54, # Server address, actually :) - END_OPTION = 255, + SUBNET_OPTION = 1, + ROUTER_OPTION = 3, + HOST_NAME_OPTION = 12, + REQ_IP_OPTION = 50, + LEASE_OPTION = 51, + MSG_TYPE_OPTION = 53, + SERV_ID_OPTION = 54, # Server address, actually :) + END_OPTION = 255, }; # Refer to RFC 1533 for message types (with option = 53). @@ -34,21 +35,22 @@ enum DHCP_message_type { type Option_Info(code: uint8) = record { length : uint8; value : case code of { - SUBNET_OPTION -> mask : uint32; - ROUTER_OPTION -> router_list: uint32[length/4]; - REQ_IP_OPTION -> req_addr : uint32; - LEASE_OPTION -> lease : uint32; - MSG_TYPE_OPTION -> msg_type : uint8; - SERV_ID_OPTION -> serv_addr: uint32; - default -> other: bytestring &length = length; + SUBNET_OPTION -> mask : uint32; + ROUTER_OPTION -> router_list : uint32[length/4]; + REQ_IP_OPTION -> req_addr : uint32; + LEASE_OPTION -> lease : uint32; + MSG_TYPE_OPTION -> msg_type : uint8; + SERV_ID_OPTION -> serv_addr : uint32; + HOST_NAME_OPTION-> host_name : bytestring &length = length; + default -> other : bytestring &length = length; }; }; type DHCP_Option = record { code : uint8; data : case code of { - 0, 255 -> none : empty; - default -> info : Option_Info(code); + 0, 255 -> none : empty; + default -> info : Option_Info(code); }; } &let { last: bool = (code == 255); # Mark the end of a list of options diff --git a/src/analyzer/protocol/dhcp/dhcp.pac b/src/analyzer/protocol/dhcp/dhcp.pac index c4a684badc..706be31e10 100644 --- a/src/analyzer/protocol/dhcp/dhcp.pac +++ b/src/analyzer/protocol/dhcp/dhcp.pac @@ -1,3 +1,4 @@ +%include binpac.pac %include bro.pac %extern{ diff --git a/src/analyzer/protocol/dhcp/events.bif b/src/analyzer/protocol/dhcp/events.bif index 741504185e..1f1e84ef0c 100644 --- a/src/analyzer/protocol/dhcp/events.bif +++ b/src/analyzer/protocol/dhcp/events.bif @@ -1,8 +1,5 @@ -## Generated for DHCP messages of type *discover*. -## -## See `Wikipedia -## `__ for -## more information about the DHCP protocol. +## Generated for DHCP messages of type *DHCPDISCOVER* (client broadcast to locate +## available servers). ## ## c: The connection record describing the underlying UDP flow. ## @@ -10,33 +7,23 @@ ## ## req_addr: The specific address requested by the client. ## -## .. bro:see:: dns_AAAA_reply dns_A_reply dns_CNAME_reply dns_EDNS_addl -## dns_HINFO_reply dns_MX_reply dns_NS_reply dns_PTR_reply dns_SOA_reply -## dns_SRV_reply dns_TSIG_addl dns_TXT_reply dns_WKS_reply dns_end -## dns_full_request dns_mapping_altered dns_mapping_lost_name dns_mapping_new_name -## dns_mapping_unverified dns_mapping_valid dns_message dns_query_reply -## dns_rejected dns_request non_dns_request dns_max_queries dns_session_timeout -## dns_skip_addl dns_skip_all_addl dns_skip_all_auth dns_skip_auth +## host_name: The value of the host name option, if specified by the client. +## +## .. bro:see:: dhcp_discover dhcp_offer dhcp_request dhcp_decline dhcp_ack dhcp_nak +## dhcp_release dhcp_inform ## ## .. note:: Bro does not support broadcast packets (as used by the DHCP ## protocol). It treats broadcast addresses just like any other and ## associates packets into transport-level flows in the same way as usual. ## -## .. todo:: Bro's current default configuration does not activate the protocol -## analyzer that generates this event; the corresponding script has not yet -## been ported to Bro 2.x. To still enable this event, one needs to -## register a port for it or add a DPD payload signature. -event dhcp_discover%(c: connection, msg: dhcp_msg, req_addr: addr%); +event dhcp_discover%(c: connection, msg: dhcp_msg, req_addr: addr, host_name: string%); -## Generated for DHCP messages of type *offer*. -## -## See `Wikipedia -## `__ for -## more information about the DHCP protocol. +## Generated for DHCP messages of type *DHCPOFFER* (server to client in response to +## DHCPDISCOVER with offer of configuration parameters). ## ## c: The connection record describing the underlying UDP flow. ## -## msg: TODO. +## msg: The parsed type-independent part of the DHCP message. ## ## mask: The subnet mask specified by the message. ## @@ -46,28 +33,21 @@ event dhcp_discover%(c: connection, msg: dhcp_msg, req_addr: addr%); ## ## serv_addr: The server address specified by the message. ## -## .. bro:see:: dns_AAAA_reply dns_A_reply dns_CNAME_reply dns_EDNS_addl -## dns_HINFO_reply dns_MX_reply dns_NS_reply dns_PTR_reply dns_SOA_reply -## dns_SRV_reply dns_TSIG_addl dns_TXT_reply dns_WKS_reply dns_end -## dns_full_request dns_mapping_altered dns_mapping_lost_name dns_mapping_new_name -## dns_mapping_unverified dns_mapping_valid dns_message dns_query_reply -## dns_rejected dns_request non_dns_request +## host_name: The value of the host name option, if specified by the client. +## +## .. bro:see:: dhcp_discover dhcp_request dhcp_decline dhcp_ack dhcp_nak +## dhcp_release dhcp_inform ## ## .. note:: Bro does not support broadcast packets (as used by the DHCP ## protocol). It treats broadcast addresses just like any other and ## associates packets into transport-level flows in the same way as usual. ## -## .. todo:: Bro's current default configuration does not activate the protocol -## analyzer that generates this event; the corresponding script has not yet -## been ported to Bro 2.x. To still enable this event, one needs to -## register a port for it or add a DPD payload signature. -event dhcp_offer%(c: connection, msg: dhcp_msg, mask: addr, router: dhcp_router_list, lease: interval, serv_addr: addr%); +event dhcp_offer%(c: connection, msg: dhcp_msg, mask: addr, router: dhcp_router_list, lease: interval, serv_addr: addr, host_name: string%); -## Generated for DHCP messages of type *request*. -## -## See `Wikipedia -## `__ for -## more information about the DHCP protocol. +## Generated for DHCP messages of type *DHCPREQUEST* (Client message to servers either +## (a) requesting offered parameters from one server and implicitly declining offers +## from all others, (b) confirming correctness of previously allocated address after, +## e.g., system reboot, or (c) extending the lease on a particular network address.) ## ## c: The connection record describing the underlying UDP flow. ## @@ -77,55 +57,37 @@ event dhcp_offer%(c: connection, msg: dhcp_msg, mask: addr, router: dhcp_router_ ## ## serv_addr: The server address specified by the message. ## -## .. bro:see:: dns_AAAA_reply dns_A_reply dns_CNAME_reply dns_EDNS_addl -## dns_HINFO_reply dns_MX_reply dns_NS_reply dns_PTR_reply dns_SOA_reply -## dns_SRV_reply dns_TSIG_addl dns_TXT_reply dns_WKS_reply dns_end -## dns_full_request dns_mapping_altered dns_mapping_lost_name dns_mapping_new_name -## dns_mapping_unverified dns_mapping_valid dns_message dns_query_reply -## dns_rejected dns_request non_dns_request +## host_name: The value of the host name option, if specified by the client. +## +## .. bro:see:: dhcp_discover dhcp_offer dhcp_decline dhcp_ack dhcp_nak +## dhcp_release dhcp_inform ## ## .. note:: Bro does not support broadcast packets (as used by the DHCP ## protocol). It treats broadcast addresses just like any other and ## associates packets into transport-level flows in the same way as usual. ## -## .. todo:: Bro's current default configuration does not activate the protocol -## analyzer that generates this event; the corresponding script has not yet -## been ported to Bro 2.x. To still enable this event, one needs to -## register a port for it or add a DPD payload signature. -event dhcp_request%(c: connection, msg: dhcp_msg, req_addr: addr, serv_addr: addr%); +event dhcp_request%(c: connection, msg: dhcp_msg, req_addr: addr, serv_addr: addr, host_name: string%); -## Generated for DHCP messages of type *decline*. -## -## See `Wikipedia -## `__ for -## more information about the DHCP protocol. +## Generated for DHCP messages of type *DHCPDECLINE* (Client to server indicating +## network address is already in use). ## ## c: The connection record describing the underlying UDP flow. ## ## msg: The parsed type-independent part of the DHCP message. ## -## .. bro:see:: dns_AAAA_reply dns_A_reply dns_CNAME_reply dns_EDNS_addl -## dns_HINFO_reply dns_MX_reply dns_NS_reply dns_PTR_reply dns_SOA_reply -## dns_SRV_reply dns_TSIG_addl dns_TXT_reply dns_WKS_reply dns_end -## dns_full_request dns_mapping_altered dns_mapping_lost_name dns_mapping_new_name -## dns_mapping_unverified dns_mapping_valid dns_message dns_query_reply -## dns_rejected dns_request non_dns_request +## host_name: The value of the host name option, if specified by the client. +## +## .. bro:see:: dhcp_discover dhcp_offer dhcp_request dhcp_ack dhcp_nak +## dhcp_release dhcp_inform ## ## .. note:: Bro does not support broadcast packets (as used by the DHCP ## protocol). It treats broadcast addresses just like any other and ## associates packets into transport-level flows in the same way as usual. ## -## .. todo:: Bro's current default configuration does not activate the protocol -## analyzer that generates this event; the corresponding script has not yet -## been ported to Bro 2.x. To still enable this event, one needs to -## register a port for it or add a DPD payload signature. -event dhcp_decline%(c: connection, msg: dhcp_msg%); +event dhcp_decline%(c: connection, msg: dhcp_msg, host_name: string%); -## Generated for DHCP messages of type *acknowledgment*. -## -## See `Wikipedia -## `__ for -## more information about the DHCP protocol. +## Generated for DHCP messages of type *DHCPACK* (Server to client with configuration +## parameters, including committed network address). ## ## c: The connection record describing the underlying UDP flow. ## @@ -139,101 +101,62 @@ event dhcp_decline%(c: connection, msg: dhcp_msg%); ## ## serv_addr: The server address specified by the message. ## -## .. bro:see:: dns_AAAA_reply dns_A_reply dns_CNAME_reply dns_EDNS_addl -## dns_HINFO_reply dns_MX_reply dns_NS_reply dns_PTR_reply dns_SOA_reply -## dns_SRV_reply dns_TSIG_addl dns_TXT_reply dns_WKS_reply dns_end -## dns_full_request dns_mapping_altered dns_mapping_lost_name dns_mapping_new_name -## dns_mapping_unverified dns_mapping_valid dns_message dns_query_reply -## dns_rejected dns_request non_dns_request +## host_name: The value of the host name option, if specified by the client. ## -## .. note:: Bro does not support broadcast packets (as used by the DHCP -## protocol). It treats broadcast addresses just like any other and -## associates packets into transport-level flows in the same way as usual. +## .. bro:see:: dhcp_discover dhcp_offer dhcp_request dhcp_decline dhcp_nak +## dhcp_release dhcp_inform ## -## .. todo:: Bro's current default configuration does not activate the protocol -## analyzer that generates this event; the corresponding script has not yet -## been ported to Bro 2.x. To still enable this event, one needs to -## register a port for it or add a DPD payload signature. -event dhcp_ack%(c: connection, msg: dhcp_msg, mask: addr, router: dhcp_router_list, lease: interval, serv_addr: addr%); +event dhcp_ack%(c: connection, msg: dhcp_msg, mask: addr, router: dhcp_router_list, lease: interval, serv_addr: addr, host_name: string%); -## Generated for DHCP messages of type *negative acknowledgment*. -## -## See `Wikipedia -## `__ for -## more information about the DHCP protocol. +## Generated for DHCP messages of type *DHCPNAK* (Server to client indicating client's +## notion of network address is incorrect (e.g., client has moved to new subnet) or +## client's lease has expired). ## ## c: The connection record describing the underlying UDP flow. ## ## msg: The parsed type-independent part of the DHCP message. ## -## .. bro:see:: dns_AAAA_reply dns_A_reply dns_CNAME_reply dns_EDNS_addl -## dns_HINFO_reply dns_MX_reply dns_NS_reply dns_PTR_reply dns_SOA_reply -## dns_SRV_reply dns_TSIG_addl dns_TXT_reply dns_WKS_reply dns_end -## dns_full_request dns_mapping_altered dns_mapping_lost_name dns_mapping_new_name -## dns_mapping_unverified dns_mapping_valid dns_message dns_query_reply -## dns_rejected dns_request non_dns_request +## host_name: The value of the host name option, if specified by the client. +## +## .. bro:see:: dhcp_discover dhcp_offer dhcp_request dhcp_decline dhcp_ack dhcp_release +## dhcp_inform ## ## .. note:: Bro does not support broadcast packets (as used by the DHCP ## protocol). It treats broadcast addresses just like any other and ## associates packets into transport-level flows in the same way as usual. ## -## .. todo:: Bro's current default configuration does not activate the protocol -## analyzer that generates this event; the corresponding script has not yet -## been ported to Bro 2.x. To still enable this event, one needs to -## register a port for it or add a DPD payload signature. -event dhcp_nak%(c: connection, msg: dhcp_msg%); +event dhcp_nak%(c: connection, msg: dhcp_msg, host_name: string%); -## Generated for DHCP messages of type *release*. -## -## See `Wikipedia -## `__ for -## more information about the DHCP protocol. +## Generated for DHCP messages of type *DHCPRELEASE* (Client to server relinquishing +## network address and cancelling remaining lease). ## ## c: The connection record describing the underlying UDP flow. ## ## msg: The parsed type-independent part of the DHCP message. ## -## .. bro:see:: dns_AAAA_reply dns_A_reply dns_CNAME_reply dns_EDNS_addl -## dns_HINFO_reply dns_MX_reply dns_NS_reply dns_PTR_reply dns_SOA_reply -## dns_SRV_reply dns_TSIG_addl dns_TXT_reply dns_WKS_reply dns_end -## dns_full_request dns_mapping_altered dns_mapping_lost_name dns_mapping_new_name -## dns_mapping_unverified dns_mapping_valid dns_message dns_query_reply -## dns_rejected dns_request non_dns_request +## host_name: The value of the host name option, if specified by the client. ## -## .. note:: Bro does not support broadcast packets (as used by the DHCP -## protocol). It treats broadcast addresses just like any other and -## associates packets into transport-level flows in the same way as usual. +## .. bro:see:: dhcp_discover dhcp_offer dhcp_request dhcp_decline dhcp_ack dhcp_nak +## dhcp_inform ## -## .. todo:: Bro's current default configuration does not activate the protocol -## analyzer that generates this event; the corresponding script has not yet -## been ported to Bro 2.x. To still enable this event, one needs to -## register a port for it or add a DPD payload signature. -event dhcp_release%(c: connection, msg: dhcp_msg%); +event dhcp_release%(c: connection, msg: dhcp_msg, host_name: string%); -## Generated for DHCP messages of type *inform*. -## -## See `Wikipedia -## `__ for -## more information about the DHCP protocol. +## Generated for DHCP messages of type *DHCPINFORM* (Client to server, asking only for +## local configuration parameters; client already has externally configured network +## address). ## ## c: The connection record describing the underlying UDP flow. ## ## msg: The parsed type-independent part of the DHCP message. ## -## .. bro:see:: dns_AAAA_reply dns_A_reply dns_CNAME_reply dns_EDNS_addl -## dns_HINFO_reply dns_MX_reply dns_NS_reply dns_PTR_reply dns_SOA_reply -## dns_SRV_reply dns_TSIG_addl dns_TXT_reply dns_WKS_reply dns_end -## dns_full_request dns_mapping_altered dns_mapping_lost_name dns_mapping_new_name -## dns_mapping_unverified dns_mapping_valid dns_message dns_query_reply -## dns_rejected dns_request non_dns_request +## host_name: The value of the host name option, if specified by the client. +## +## .. bro:see:: dhcp_discover dhcp_offer dhcp_request dhcp_decline dhcp_ack dhcp_nak +## dhcp_release ## ## .. note:: Bro does not support broadcast packets (as used by the DHCP ## protocol). It treats broadcast addresses just like any other and ## associates packets into transport-level flows in the same way as usual. ## -## .. todo:: Bro's current default configuration does not activate the protocol -## analyzer that generates this event; the corresponding script has not yet -## been ported to Bro 2.x. To still enable this event, one needs to -## register a port for it or add a DPD payload signature. -event dhcp_inform%(c: connection, msg: dhcp_msg%); +event dhcp_inform%(c: connection, msg: dhcp_msg, host_name: string%); diff --git a/src/file_analysis/Analyzer.cc b/src/file_analysis/Analyzer.cc new file mode 100644 index 0000000000..e0b5011aa8 --- /dev/null +++ b/src/file_analysis/Analyzer.cc @@ -0,0 +1,11 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +#include "Analyzer.h" +#include "Manager.h" + +file_analysis::Analyzer::~Analyzer() + { + DBG_LOG(DBG_FILE_ANALYSIS, "Destroy file analyzer %s", + file_mgr->GetComponentName(tag)); + Unref(args); + } diff --git a/src/file_analysis/Analyzer.h b/src/file_analysis/Analyzer.h index 0a5aa9e25c..e20e2802cf 100644 --- a/src/file_analysis/Analyzer.h +++ b/src/file_analysis/Analyzer.h @@ -5,14 +5,12 @@ #include "Val.h" #include "NetVar.h" -#include "analyzer/Tag.h" +#include "Tag.h" #include "file_analysis/file_analysis.bif.h" namespace file_analysis { -typedef int FA_Tag; - class File; /** @@ -25,11 +23,7 @@ public: * Destructor. Nothing special about it. Virtual since we definitely expect * to delete instances of derived classes via pointers to this class. */ - virtual ~Analyzer() - { - DBG_LOG(DBG_FILE_ANALYSIS, "Destroy file analyzer %d", tag); - Unref(args); - } + virtual ~Analyzer(); /** * Subclasses may override this metod to receive file data non-sequentially. @@ -76,7 +70,7 @@ public: /** * @return the analyzer type enum value. */ - FA_Tag Tag() const { return tag; } + file_analysis::Tag Tag() const { return tag; } /** * @return the AnalyzerArgs associated with the analyzer. @@ -88,18 +82,6 @@ public: */ File* GetFile() const { return file; } - /** - * Retrieves an analyzer tag field from full analyzer argument record. - * @param args an \c AnalyzerArgs (script-layer type) value. - * @return the analyzer tag equivalent of the 'tag' field from the - * \c AnalyzerArgs value \a args. - */ - static FA_Tag ArgsTag(const RecordVal* args) - { - using BifType::Record::Files::AnalyzerArgs; - return args->Lookup(AnalyzerArgs->FieldOffset("tag"))->AsEnum(); - } - protected: /** @@ -108,15 +90,15 @@ protected: * tunable options, if any, related to a particular analyzer type. * @param arg_file the file to which the the analyzer is being attached. */ - Analyzer(RecordVal* arg_args, File* arg_file) - : tag(file_analysis::Analyzer::ArgsTag(arg_args)), + Analyzer(file_analysis::Tag arg_tag, RecordVal* arg_args, File* arg_file) + : tag(arg_tag), args(arg_args->Ref()->AsRecordVal()), file(arg_file) {} private: - FA_Tag tag; /**< The particular analyzer type of the analyzer instance. */ + file_analysis::Tag tag; /**< The particular type of the analyzer instance. */ RecordVal* args; /**< \c AnalyzerArgs val gives tunable analyzer params. */ File* file; /**< The file to which the analyzer is attached. */ }; diff --git a/src/file_analysis/AnalyzerSet.cc b/src/file_analysis/AnalyzerSet.cc index c710d8b085..f7abc01dc2 100644 --- a/src/file_analysis/AnalyzerSet.cc +++ b/src/file_analysis/AnalyzerSet.cc @@ -15,6 +15,7 @@ static void analyzer_del_func(void* v) AnalyzerSet::AnalyzerSet(File* arg_file) : file(arg_file) { TypeList* t = new TypeList(); + t->Append(file_mgr->GetTagEnumType()->Ref()); t->Append(BifType::Record::Files::AnalyzerArgs->Ref()); analyzer_hash = new CompositeHash(t); Unref(t); @@ -34,20 +35,20 @@ AnalyzerSet::~AnalyzerSet() delete analyzer_hash; } -bool AnalyzerSet::Add(RecordVal* args) +bool AnalyzerSet::Add(file_analysis::Tag tag, RecordVal* args) { - HashKey* key = GetKey(args); + HashKey* key = GetKey(tag, args); if ( analyzer_map.Lookup(key) ) { - DBG_LOG(DBG_FILE_ANALYSIS, "Instantiate analyzer %d skipped for file id" - " %s: already exists", file_analysis::Analyzer::ArgsTag(args), + DBG_LOG(DBG_FILE_ANALYSIS, "Instantiate analyzer %s skipped for file id" + " %s: already exists", file_mgr->GetComponentName(tag), file->GetID().c_str()); delete key; return true; } - file_analysis::Analyzer* a = InstantiateAnalyzer(args); + file_analysis::Analyzer* a = InstantiateAnalyzer(tag, args); if ( ! a ) { @@ -60,10 +61,10 @@ bool AnalyzerSet::Add(RecordVal* args) return true; } -bool AnalyzerSet::QueueAdd(RecordVal* args) +bool AnalyzerSet::QueueAdd(file_analysis::Tag tag, RecordVal* args) { - HashKey* key = GetKey(args); - file_analysis::Analyzer* a = InstantiateAnalyzer(args); + HashKey* key = GetKey(tag, args); + file_analysis::Analyzer* a = InstantiateAnalyzer(tag, args); if ( ! a ) { @@ -80,8 +81,9 @@ bool AnalyzerSet::AddMod::Perform(AnalyzerSet* set) { if ( set->analyzer_map.Lookup(key) ) { - DBG_LOG(DBG_FILE_ANALYSIS, "Add analyzer %d skipped for file id" - " %s: already exists", a->Tag(), a->GetFile()->GetID().c_str()); + DBG_LOG(DBG_FILE_ANALYSIS, "Add analyzer %s skipped for file id" + " %s: already exists", file_mgr->GetComponentName(a->Tag()), + a->GetFile()->GetID().c_str()); Abort(); return true; @@ -91,12 +93,12 @@ bool AnalyzerSet::AddMod::Perform(AnalyzerSet* set) return true; } -bool AnalyzerSet::Remove(const RecordVal* args) +bool AnalyzerSet::Remove(file_analysis::Tag tag, RecordVal* args) { - return Remove(file_analysis::Analyzer::ArgsTag(args), GetKey(args)); + return Remove(tag, GetKey(tag, args)); } -bool AnalyzerSet::Remove(FA_Tag tag, HashKey* key) +bool AnalyzerSet::Remove(file_analysis::Tag tag, HashKey* key) { file_analysis::Analyzer* a = (file_analysis::Analyzer*) analyzer_map.Remove(key); @@ -105,22 +107,22 @@ bool AnalyzerSet::Remove(FA_Tag tag, HashKey* key) if ( ! a ) { - DBG_LOG(DBG_FILE_ANALYSIS, "Skip remove analyzer %d for file id %s", - tag, file->GetID().c_str()); + DBG_LOG(DBG_FILE_ANALYSIS, "Skip remove analyzer %s for file id %s", + file_mgr->GetComponentName(tag), file->GetID().c_str()); return false; } - DBG_LOG(DBG_FILE_ANALYSIS, "Remove analyzer %d for file id %s", a->Tag(), + DBG_LOG(DBG_FILE_ANALYSIS, "Remove analyzer %s for file id %s", + file_mgr->GetComponentName(tag), file->GetID().c_str()); delete a; return true; } -bool AnalyzerSet::QueueRemove(const RecordVal* args) +bool AnalyzerSet::QueueRemove(file_analysis::Tag tag, RecordVal* args) { - HashKey* key = GetKey(args); - FA_Tag tag = file_analysis::Analyzer::ArgsTag(args); + HashKey* key = GetKey(tag, args); mod_queue.push(new RemoveMod(tag, key)); @@ -132,24 +134,28 @@ bool AnalyzerSet::RemoveMod::Perform(AnalyzerSet* set) return set->Remove(tag, key); } -HashKey* AnalyzerSet::GetKey(const RecordVal* args) const +HashKey* AnalyzerSet::GetKey(file_analysis::Tag t, RecordVal* args) const { - HashKey* key = analyzer_hash->ComputeHash(args, 1); + ListVal* lv = new ListVal(TYPE_ANY); + lv->Append(t.AsEnumVal()->Ref()); + lv->Append(args->Ref()); + HashKey* key = analyzer_hash->ComputeHash(lv, 1); + Unref(lv); if ( ! key ) reporter->InternalError("AnalyzerArgs type mismatch"); return key; } -file_analysis::Analyzer* AnalyzerSet::InstantiateAnalyzer(RecordVal* args) const +file_analysis::Analyzer* AnalyzerSet::InstantiateAnalyzer(Tag tag, + RecordVal* args) const { - FA_Tag tag = file_analysis::Analyzer::ArgsTag(args); file_analysis::Analyzer* a = file_mgr->InstantiateAnalyzer(tag, args, file); if ( ! a ) { reporter->Error("Failed file analyzer %s instantiation for file id %s", - file_mgr->GetAnalyzerName(tag), file->GetID().c_str()); + file_mgr->GetComponentName(tag), file->GetID().c_str()); return 0; } @@ -158,8 +164,8 @@ file_analysis::Analyzer* AnalyzerSet::InstantiateAnalyzer(RecordVal* args) const void AnalyzerSet::Insert(file_analysis::Analyzer* a, HashKey* key) { - DBG_LOG(DBG_FILE_ANALYSIS, "Add analyzer %d for file id %s", a->Tag(), - file->GetID().c_str()); + DBG_LOG(DBG_FILE_ANALYSIS, "Add analyzer %s for file id %s", + file_mgr->GetComponentName(a->Tag()), file->GetID().c_str()); analyzer_map.Insert(key, a); delete key; } diff --git a/src/file_analysis/AnalyzerSet.h b/src/file_analysis/AnalyzerSet.h index 6f14149e30..42a54f4943 100644 --- a/src/file_analysis/AnalyzerSet.h +++ b/src/file_analysis/AnalyzerSet.h @@ -9,6 +9,7 @@ #include "Dict.h" #include "CompHash.h" #include "Val.h" +#include "Tag.h" namespace file_analysis { @@ -38,31 +39,35 @@ public: /** * Attach an analyzer to #file immediately. + * @param tag the analyzer tag of the file analyzer to add. * @param args an \c AnalyzerArgs value which specifies an analyzer. * @return true if analyzer was instantiated/attached, else false. */ - bool Add(RecordVal* args); + bool Add(file_analysis::Tag tag, RecordVal* args); /** * Queue the attachment of an analyzer to #file. + * @param tag the analyzer tag of the file analyzer to add. * @param args an \c AnalyzerArgs value which specifies an analyzer. * @return true if analyzer was able to be instantiated, else false. */ - bool QueueAdd(RecordVal* args); + bool QueueAdd(file_analysis::Tag tag, RecordVal* args); /** * Remove an analyzer from #file immediately. + * @param tag the analyzer tag of the file analyzer to remove. * @param args an \c AnalyzerArgs value which specifies an analyzer. * @return false if analyzer didn't exist and so wasn't removed, else true. */ - bool Remove(const RecordVal* args); + bool Remove(file_analysis::Tag tag, RecordVal* args); /** * Queue the removal of an analyzer from #file. + * @param tag the analyzer tag of the file analyzer to remove. * @param args an \c AnalyzerArgs value which specifies an analyzer. * @return true if analyzer exists at time of call, else false; */ - bool QueueRemove(const RecordVal* args); + bool QueueRemove(file_analysis::Tag tag, RecordVal* args); /** * Perform all queued modifications to the current analyzer set. @@ -91,17 +96,20 @@ protected: /** * Get a hash key which represents an analyzer instance. + * @param tag the file analyzer tag. * @param args an \c AnalyzerArgs value which specifies an analyzer. * @return the hash key calculated from \a args */ - HashKey* GetKey(const RecordVal* args) const; + HashKey* GetKey(file_analysis::Tag tag, RecordVal* args) const; /** * Create an instance of a file analyzer. + * @param tag the tag of a file analyzer. * @param args an \c AnalyzerArgs value which specifies an analyzer. * @return a new file analyzer instance. */ - file_analysis::Analyzer* InstantiateAnalyzer(RecordVal* args) const; + file_analysis::Analyzer* InstantiateAnalyzer(file_analysis::Tag tag, + RecordVal* args) const; /** * Insert an analyzer instance in to the set. @@ -116,7 +124,7 @@ protected: * just used for debugging messages. * @param key the hash key which represents the analyzer's \c AnalyzerArgs. */ - bool Remove(FA_Tag tag, HashKey* key); + bool Remove(file_analysis::Tag tag, HashKey* key); private: @@ -175,14 +183,14 @@ private: * @param arg_a an analyzer instance to add to an analyzer set. * @param arg_key hash key representing the analyzer's \c AnalyzerArgs. */ - RemoveMod(FA_Tag arg_tag, HashKey* arg_key) + RemoveMod(file_analysis::Tag arg_tag, HashKey* arg_key) : Modification(), tag(arg_tag), key(arg_key) {} virtual ~RemoveMod() {} virtual bool Perform(AnalyzerSet* set); virtual void Abort() { delete key; } protected: - FA_Tag tag; + file_analysis::Tag tag; HashKey* key; }; diff --git a/src/file_analysis/CMakeLists.txt b/src/file_analysis/CMakeLists.txt index f22c293cc4..846fc4bf15 100644 --- a/src/file_analysis/CMakeLists.txt +++ b/src/file_analysis/CMakeLists.txt @@ -11,9 +11,10 @@ set(file_analysis_SRCS Manager.cc File.cc FileTimer.cc - Analyzer.h + Analyzer.cc AnalyzerSet.cc Component.cc + Tag.cc ) bif_target(file_analysis.bif) diff --git a/src/file_analysis/Component.cc b/src/file_analysis/Component.cc index 99531e40f5..9c47f2c75e 100644 --- a/src/file_analysis/Component.cc +++ b/src/file_analysis/Component.cc @@ -8,26 +8,22 @@ using namespace file_analysis; -analyzer::Tag::type_t Component::type_counter = 0; - -Component::Component(const char* arg_name, factory_callback arg_factory, - analyzer::Tag::subtype_t arg_subtype) - : plugin::Component(plugin::component::FILE_ANALYZER) +Component::Component(const char* arg_name, factory_callback arg_factory) + : plugin::Component(plugin::component::FILE_ANALYZER), + plugin::TaggedComponent() { name = copy_string(arg_name); canon_name = canonify_name(arg_name); factory = arg_factory; - - tag = analyzer::Tag(++type_counter, arg_subtype); } Component::Component(const Component& other) - : plugin::Component(Type()) + : plugin::Component(Type()), + plugin::TaggedComponent(other) { name = copy_string(other.name); canon_name = copy_string(other.canon_name); factory = other.factory; - tag = other.tag; } Component::~Component() @@ -36,11 +32,6 @@ Component::~Component() delete [] canon_name; } -analyzer::Tag Component::Tag() const - { - return tag; - } - void Component::Describe(ODesc* d) const { plugin::Component::Describe(d); @@ -58,11 +49,12 @@ void Component::Describe(ODesc* d) const Component& Component::operator=(const Component& other) { + plugin::TaggedComponent::operator=(other); + if ( &other != this ) { name = copy_string(other.name); factory = other.factory; - tag = other.tag; } return *this; diff --git a/src/file_analysis/Component.h b/src/file_analysis/Component.h index 3cdc69efdf..4cf2dced60 100644 --- a/src/file_analysis/Component.h +++ b/src/file_analysis/Component.h @@ -3,8 +3,9 @@ #ifndef FILE_ANALYZER_PLUGIN_COMPONENT_H #define FILE_ANALYZER_PLUGIN_COMPONENT_H -#include "analyzer/Tag.h" +#include "Tag.h" #include "plugin/Component.h" +#include "plugin/TaggedComponent.h" #include "Val.h" @@ -22,7 +23,8 @@ class Analyzer; * A plugin can provide a specific file analyzer by registering this * analyzer component, describing the analyzer. */ -class Component : public plugin::Component { +class Component : public plugin::Component, + public plugin::TaggedComponent { public: typedef Analyzer* (*factory_callback)(RecordVal* args, File* file); @@ -38,15 +40,8 @@ public: * from file_analysis::Analyzer. This is typically a static \c * Instatiate() method inside the class that just allocates and * returns a new instance. - * - * @param subtype A subtype associated with this component that - * further distinguishes it. The subtype will be integrated into - * the analyzer::Tag that the manager associates with this analyzer, - * and analyzer instances can accordingly access it via analyzer::Tag(). - * If not used, leave at zero. */ - Component(const char* name, factory_callback factory, - analyzer::Tag::subtype_t subtype = 0); + Component(const char* name, factory_callback factory); /** * Copy constructor. @@ -79,13 +74,6 @@ public: */ factory_callback Factory() const { return factory; } - /** - * Returns the analyzer's tag. Note that this is automatically - * generated for each new Components, and hence unique across all of - * them. - */ - analyzer::Tag Tag() const; - /** * Generates a human-readable description of the component's main * parameters. This goes into the output of \c "bro -NN". @@ -98,10 +86,6 @@ private: const char* name; // The analyzer's name. const char* canon_name; // The analyzer's canonical name. factory_callback factory; // The analyzer's factory callback. - analyzer::Tag tag; // The automatically assigned analyzer tag. - - // Global counter used to generate unique tags. - static analyzer::Tag::type_t type_counter; }; } diff --git a/src/file_analysis/File.cc b/src/file_analysis/File.cc index 9e44e327e3..1197cd06f6 100644 --- a/src/file_analysis/File.cc +++ b/src/file_analysis/File.cc @@ -88,7 +88,7 @@ File::File(const string& file_id, Connection* conn, analyzer::Tag tag, if ( conn ) { // add source, connection, is_orig fields - SetSource(analyzer_mgr->GetAnalyzerName(tag)); + SetSource(analyzer_mgr->GetComponentName(tag)); val->Assign(is_orig_idx, new Val(is_orig, TYPE_BOOL)); UpdateConnectionFields(conn, is_orig); } @@ -231,14 +231,14 @@ void File::ScheduleInactivityTimer() const timer_mgr->Add(new FileTimer(network_time, id, GetTimeoutInterval())); } -bool File::AddAnalyzer(RecordVal* args) +bool File::AddAnalyzer(file_analysis::Tag tag, RecordVal* args) { - return done ? false : analyzers.QueueAdd(args); + return done ? false : analyzers.QueueAdd(tag, args); } -bool File::RemoveAnalyzer(const RecordVal* args) +bool File::RemoveAnalyzer(file_analysis::Tag tag, RecordVal* args) { - return done ? false : analyzers.QueueRemove(args); + return done ? false : analyzers.QueueRemove(tag, args); } bool File::BufferBOF(const u_char* data, uint64 len) @@ -321,7 +321,7 @@ void File::DataIn(const u_char* data, uint64 len, uint64 offset) while ( (a = analyzers.NextEntry(c)) ) { if ( ! a->DeliverChunk(data, len, offset) ) - analyzers.QueueRemove(a->Args()); + analyzers.QueueRemove(a->Tag(), a->Args()); } analyzers.DrainModifications(); @@ -356,7 +356,7 @@ void File::DataIn(const u_char* data, uint64 len) { if ( ! a->DeliverStream(data, len) ) { - analyzers.QueueRemove(a->Args()); + analyzers.QueueRemove(a->Tag(), a->Args()); continue; } @@ -364,7 +364,7 @@ void File::DataIn(const u_char* data, uint64 len) LookupFieldDefaultCount(missing_bytes_idx); if ( ! a->DeliverChunk(data, len, offset) ) - analyzers.QueueRemove(a->Args()); + analyzers.QueueRemove(a->Tag(), a->Args()); } analyzers.DrainModifications(); @@ -389,7 +389,7 @@ void File::EndOfFile() while ( (a = analyzers.NextEntry(c)) ) { if ( ! a->EndOfFile() ) - analyzers.QueueRemove(a->Args()); + analyzers.QueueRemove(a->Tag(), a->Args()); } FileEvent(file_state_remove); @@ -411,7 +411,7 @@ void File::Gap(uint64 offset, uint64 len) while ( (a = analyzers.NextEntry(c)) ) { if ( ! a->Undelivered(offset, len) ) - analyzers.QueueRemove(a->Args()); + analyzers.QueueRemove(a->Tag(), a->Args()); } if ( FileEventAvailable(file_gap) ) diff --git a/src/file_analysis/File.h b/src/file_analysis/File.h index 794734d24b..12c1e061a8 100644 --- a/src/file_analysis/File.h +++ b/src/file_analysis/File.h @@ -10,6 +10,7 @@ #include "Conn.h" #include "Val.h" +#include "Tag.h" #include "AnalyzerSet.h" #include "BroString.h" @@ -94,17 +95,19 @@ public: /** * Queues attaching an analyzer. Only one analyzer per type can be attached * at a time unless the arguments differ. + * @param tag the analyzer tag of the file analyzer to add. * @param args an \c AnalyzerArgs value representing a file analyzer. * @return false if analyzer can't be instantiated, else true. */ - bool AddAnalyzer(RecordVal* args); + bool AddAnalyzer(file_analysis::Tag tag, RecordVal* args); /** * Queues removal of an analyzer. + * @param tag the analyzer tag of the file analyzer to remove. * @param args an \c AnalyzerArgs value representing a file analyzer. * @return true if analyzer was active at time of call, else false. */ - bool RemoveAnalyzer(const RecordVal* args); + bool RemoveAnalyzer(file_analysis::Tag tag, RecordVal* args); /** * Pass in non-sequential data and deliver to attached analyzers. diff --git a/src/file_analysis/FileTimer.cc b/src/file_analysis/FileTimer.cc index 575857fd15..6b1d70f136 100644 --- a/src/file_analysis/FileTimer.cc +++ b/src/file_analysis/FileTimer.cc @@ -14,7 +14,7 @@ FileTimer::FileTimer(double t, const string& id, double interval) void FileTimer::Dispatch(double t, int is_expire) { - File* file = file_mgr->Lookup(file_id); + File* file = file_mgr->LookupFile(file_id); if ( ! file ) return; diff --git a/src/file_analysis/Manager.cc b/src/file_analysis/Manager.cc index 7a92e92109..5975133356 100644 --- a/src/file_analysis/Manager.cc +++ b/src/file_analysis/Manager.cc @@ -18,10 +18,9 @@ TableVal* Manager::disabled = 0; string Manager::salt; Manager::Manager() + : plugin::ComponentManager("Files") { - tag_enum_type = new EnumType("Files::Tag"); - ::ID* id = install_ID("Tag", "Files", true, true); - add_type(id, tag_enum_type, 0, 0); } Manager::~Manager() @@ -35,27 +34,7 @@ void Manager::InitPreScript() for ( std::list::const_iterator i = analyzers.begin(); i != analyzers.end(); ++i ) - RegisterAnalyzerComponent(*i); - } - -void Manager::RegisterAnalyzerComponent(Component* component) - { - const char* cname = component->CanonicalName(); - - if ( tag_enum_type->Lookup("Files", cname) != -1 ) - reporter->FatalError("File Analyzer %s defined more than once", cname); - - DBG_LOG(DBG_FILE_ANALYSIS, "Registering analyzer %s (tag %s)", - component->Name(), component->Tag().AsString().c_str()); - - analyzers_by_name.insert(std::make_pair(cname, component)); - analyzers_by_tag.insert(std::make_pair(component->Tag(), component)); - analyzers_by_val.insert(std::make_pair( - component->Tag().AsEnumVal()->InternalInt(), component)); - - string id = fmt("ANALYZER_%s", cname); - tag_enum_type->AddName("Files", id.c_str(), - component->Tag().AsEnumVal()->InternalInt(), true); + RegisterComponent(*i, "ANALYZER_"); } void Manager::InitPostScript() @@ -193,7 +172,7 @@ void Manager::SetSize(uint64 size, analyzer::Tag tag, Connection* conn, bool Manager::SetTimeoutInterval(const string& file_id, double interval) const { - File* file = Lookup(file_id); + File* file = LookupFile(file_id); if ( ! file ) return false; @@ -205,24 +184,26 @@ bool Manager::SetTimeoutInterval(const string& file_id, double interval) const return true; } -bool Manager::AddAnalyzer(const string& file_id, RecordVal* args) const +bool Manager::AddAnalyzer(const string& file_id, file_analysis::Tag tag, + RecordVal* args) const { - File* file = Lookup(file_id); + File* file = LookupFile(file_id); if ( ! file ) return false; - return file->AddAnalyzer(args); + return file->AddAnalyzer(tag, args); } -bool Manager::RemoveAnalyzer(const string& file_id, const RecordVal* args) const +bool Manager::RemoveAnalyzer(const string& file_id, file_analysis::Tag tag, + RecordVal* args) const { - File* file = Lookup(file_id); + File* file = LookupFile(file_id); if ( ! file ) return false; - return file->RemoveAnalyzer(args); + return file->RemoveAnalyzer(tag, args); } File* Manager::GetFile(const string& file_id, Connection* conn, @@ -255,7 +236,7 @@ File* Manager::GetFile(const string& file_id, Connection* conn, return rval; } -File* Manager::Lookup(const string& file_id) const +File* Manager::LookupFile(const string& file_id) const { IDMap::const_iterator it = id_map.find(file_id); @@ -267,7 +248,7 @@ File* Manager::Lookup(const string& file_id) const void Manager::Timeout(const string& file_id, bool is_terminating) { - File* file = Lookup(file_id); + File* file = LookupFile(file_id); if ( ! file ) return; @@ -366,15 +347,13 @@ bool Manager::IsDisabled(analyzer::Tag tag) return rval; } -Analyzer* Manager::InstantiateAnalyzer(int tag, RecordVal* args, File* f) const +Analyzer* Manager::InstantiateAnalyzer(Tag tag, RecordVal* args, File* f) const { - analyzer_map_by_val::const_iterator it = analyzers_by_val.find(tag); + Component* c = Lookup(tag); - if ( it == analyzers_by_val.end() ) - reporter->InternalError("cannot instantiate unknown file analyzer: %d", - tag); - - Component* c = it->second; + if ( ! c ) + reporter->InternalError("cannot instantiate unknown file analyzer: %s", + tag.AsString().c_str()); if ( ! c->Factory() ) reporter->InternalError("file analyzer %s cannot be instantiated " @@ -382,14 +361,3 @@ Analyzer* Manager::InstantiateAnalyzer(int tag, RecordVal* args, File* f) const return c->Factory()(args, f); } - -const char* Manager::GetAnalyzerName(int tag) const - { - analyzer_map_by_val::const_iterator it = analyzers_by_val.find(tag); - - if ( it == analyzers_by_val.end() ) - reporter->InternalError("cannot get name of unknown file analyzer: %d", - tag); - - return it->second->CanonicalName(); - } diff --git a/src/file_analysis/Manager.h b/src/file_analysis/Manager.h index 84b606173d..dcf33edc99 100644 --- a/src/file_analysis/Manager.h +++ b/src/file_analysis/Manager.h @@ -18,7 +18,8 @@ #include "File.h" #include "FileTimer.h" #include "Component.h" - +#include "Tag.h" +#include "plugin/ComponentManager.h" #include "analyzer/Tag.h" #include "file_analysis/file_analysis.bif.h" @@ -28,7 +29,7 @@ namespace file_analysis { /** * Main entry point for interacting with file analysis. */ -class Manager { +class Manager : public plugin::ComponentManager { public: /** @@ -177,18 +178,22 @@ public: * analyzers of a given type can be attached per file identifier at a time * as long as the arguments differ. * @param file_id the file identifier/hash. + * @param tag the analyzer tag of the file analyzer to add. * @param args a \c AnalyzerArgs value which describes a file analyzer. * @return false if the analyzer failed to be instantiated, else true. */ - bool AddAnalyzer(const string& file_id, RecordVal* args) const; + bool AddAnalyzer(const string& file_id, file_analysis::Tag tag, + RecordVal* args) const; /** * Queue removal of an analyzer for a given file identifier. * @param file_id the file identifier/hash. + * @param tag the analyzer tag of the file analyzer to remove. * @param args a \c AnalyzerArgs value which describes a file analyzer. * @return true if the analyzer is active at the time of call, else false. */ - bool RemoveAnalyzer(const string& file_id, const RecordVal* args) const; + bool RemoveAnalyzer(const string& file_id, file_analysis::Tag tag, + RecordVal* args) const; /** * Tells whether analysis for a file is active or ignored. @@ -204,15 +209,7 @@ public: * @param f The file analzer is to be associated with. * @return The new analyzer instance or null if tag is invalid. */ - Analyzer* InstantiateAnalyzer(int tag, RecordVal* args, File* f) const; - - /** - * Translates a script-level file analyzer tag in to corresponding file - * analyzer name. - * @param tag The enum val of a file analyzer. - * @return The human-readable name of the file analyzer. - */ - const char* GetAnalyzerName(int tag) const; + Analyzer* InstantiateAnalyzer(Tag tag, RecordVal* args, File* f) const; protected: friend class FileTimer; @@ -247,7 +244,7 @@ protected: * @return the File object mapped to \a file_id, or a null pointer if no * mapping exists. */ - File* Lookup(const string& file_id) const; + File* LookupFile(const string& file_id) const; /** * Evaluate timeout policy for a file and remove the File object mapped to @@ -287,20 +284,10 @@ protected: static bool IsDisabled(analyzer::Tag tag); private: - typedef map analyzer_map_by_name; - typedef map analyzer_map_by_tag; - typedef map analyzer_map_by_val; - - void RegisterAnalyzerComponent(Component* component); IDMap id_map; /**< Map file ID to file_analysis::File records. */ IDSet ignored; /**< Ignored files. Will be finally removed on EOF. */ string current_file_id; /**< Hash of what get_file_handle event sets. */ - EnumType* tag_enum_type; /**< File analyzer tag type. */ - - analyzer_map_by_name analyzers_by_name; - analyzer_map_by_tag analyzers_by_tag; - analyzer_map_by_val analyzers_by_val; static TableVal* disabled; /**< Table of disabled analyzers. */ static string salt; /**< A salt added to file handles before hashing. */ diff --git a/src/file_analysis/Tag.cc b/src/file_analysis/Tag.cc new file mode 100644 index 0000000000..6f0774a4b4 --- /dev/null +++ b/src/file_analysis/Tag.cc @@ -0,0 +1,24 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +#include "Tag.h" +#include "Manager.h" + +using namespace file_analysis; + +file_analysis::Tag file_analysis::Tag::Error; + +file_analysis::Tag::Tag(type_t type, subtype_t subtype) + : ::Tag(file_mgr->GetTagEnumType(), type, subtype) + { + } + +file_analysis::Tag& file_analysis::Tag::operator=(const file_analysis::Tag& other) + { + ::Tag::operator=(other); + return *this; + } + +EnumVal* file_analysis::Tag::AsEnumVal() const + { + return ::Tag::AsEnumVal(file_mgr->GetTagEnumType()); + } diff --git a/src/file_analysis/Tag.h b/src/file_analysis/Tag.h new file mode 100644 index 0000000000..aa38836403 --- /dev/null +++ b/src/file_analysis/Tag.h @@ -0,0 +1,116 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +#ifndef FILE_ANALYZER_TAG_H +#define FILE_ANALYZER_TAG_H + +#include "config.h" +#include "util.h" +#include "../Tag.h" +#include "plugin/TaggedComponent.h" +#include "plugin/ComponentManager.h" + +class EnumVal; + +namespace file_analysis { + +class Component; + +/** + * Class to identify a file analyzer type. + * + * The script-layer analogue is Files::Tag. + */ +class Tag : public ::Tag { +public: + /* + * Copy constructor. + */ + Tag(const Tag& other) : ::Tag(other) {} + + /** + * Default constructor. This initializes the tag with an error value + * that will make \c operator \c bool return false. + */ + Tag() : ::Tag() {} + + /** + * Destructor. + */ + ~Tag() {} + + /** + * Returns false if the tag represents an error value rather than a + * legal analyzer type. + * TODO: make this conversion operator "explicit" (C++11) or use a + * "safe bool" idiom (not necessary if "explicit" is available), + * otherwise this may allow nonsense/undesired comparison operations. + * + */ + operator bool() const { return *this != Tag(); } + + /** + * Assignment operator. + */ + Tag& operator=(const Tag& other); + + /** + * Compares two tags for equality. + */ + bool operator==(const Tag& other) const + { + return ::Tag::operator==(other); + } + + /** + * Compares two tags for inequality. + */ + bool operator!=(const Tag& other) const + { + return ::Tag::operator!=(other); + } + + /** + * Compares two tags for less-than relationship. + */ + bool operator<(const Tag& other) const + { + return ::Tag::operator<(other); + } + + /** + * Returns the \c Files::Tag enum that corresponds to this tag. + * The returned value does not have its ref-count increased. + * + * @param etype the script-layer enum type associated with the tag. + */ + EnumVal* AsEnumVal() const; + + static Tag Error; + +protected: + friend class plugin::ComponentManager; + friend class plugin::TaggedComponent; + + /** + * Constructor. + * + * @param type The main type. Note that the \a file_analysis::Manager + * manages the value space internally, so noone else should assign + * main types. + * + * @param subtype The sub type, which is left to an analyzer for + * interpretation. By default it's set to zero. + */ + Tag(type_t type, subtype_t subtype = 0); + + /** + * Constructor. + * + * @param val An enum value of script type \c Files::Tag. + */ + Tag(EnumVal* val) : ::Tag(val) {} +}; + +} + +#endif diff --git a/src/file_analysis/analyzer/data_event/CMakeLists.txt b/src/file_analysis/analyzer/data_event/CMakeLists.txt index 81551feda2..49e23d49a0 100644 --- a/src/file_analysis/analyzer/data_event/CMakeLists.txt +++ b/src/file_analysis/analyzer/data_event/CMakeLists.txt @@ -4,5 +4,5 @@ include_directories(BEFORE ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR}) bro_plugin_begin(Bro FileDataEvent) -bro_plugin_cc(DataEvent.cc Plugin.cc) +bro_plugin_cc(DataEvent.cc Plugin.cc ../../Analyzer.cc) bro_plugin_end() diff --git a/src/file_analysis/analyzer/data_event/DataEvent.cc b/src/file_analysis/analyzer/data_event/DataEvent.cc index 1b04111c44..cf2d7e52ec 100644 --- a/src/file_analysis/analyzer/data_event/DataEvent.cc +++ b/src/file_analysis/analyzer/data_event/DataEvent.cc @@ -6,12 +6,15 @@ #include "EventRegistry.h" #include "Event.h" #include "util.h" +#include "file_analysis/Manager.h" using namespace file_analysis; DataEvent::DataEvent(RecordVal* args, File* file, EventHandlerPtr ce, EventHandlerPtr se) - : file_analysis::Analyzer(args, file), chunk_event(ce), stream_event(se) + : file_analysis::Analyzer(file_mgr->GetComponentTag("DATA_EVENT"), + args, file), + chunk_event(ce), stream_event(se) { } diff --git a/src/file_analysis/analyzer/extract/CMakeLists.txt b/src/file_analysis/analyzer/extract/CMakeLists.txt index df3fa2646d..e413196db2 100644 --- a/src/file_analysis/analyzer/extract/CMakeLists.txt +++ b/src/file_analysis/analyzer/extract/CMakeLists.txt @@ -4,5 +4,5 @@ include_directories(BEFORE ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR}) bro_plugin_begin(Bro FileExtract) -bro_plugin_cc(Extract.cc Plugin.cc) +bro_plugin_cc(Extract.cc Plugin.cc ../../Analyzer.cc) bro_plugin_end() diff --git a/src/file_analysis/analyzer/extract/Extract.cc b/src/file_analysis/analyzer/extract/Extract.cc index ef37425003..28b5cf5a63 100644 --- a/src/file_analysis/analyzer/extract/Extract.cc +++ b/src/file_analysis/analyzer/extract/Extract.cc @@ -4,11 +4,13 @@ #include "Extract.h" #include "util.h" +#include "file_analysis/Manager.h" using namespace file_analysis; Extract::Extract(RecordVal* args, File* file, const string& arg_filename) - : file_analysis::Analyzer(args, file), filename(arg_filename) + : file_analysis::Analyzer(file_mgr->GetComponentTag("EXTRACT"), args, file), + filename(arg_filename) { fd = open(filename.c_str(), O_WRONLY | O_CREAT | O_TRUNC, 0666); diff --git a/src/file_analysis/analyzer/hash/CMakeLists.txt b/src/file_analysis/analyzer/hash/CMakeLists.txt index 5734740198..0e3143ee05 100644 --- a/src/file_analysis/analyzer/hash/CMakeLists.txt +++ b/src/file_analysis/analyzer/hash/CMakeLists.txt @@ -4,6 +4,6 @@ include_directories(BEFORE ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR}) bro_plugin_begin(Bro FileHash) -bro_plugin_cc(Hash.cc Plugin.cc) +bro_plugin_cc(Hash.cc Plugin.cc ../../Analyzer.cc) bro_plugin_bif(events.bif) bro_plugin_end() diff --git a/src/file_analysis/analyzer/hash/Hash.cc b/src/file_analysis/analyzer/hash/Hash.cc index 9835f343b6..9829934301 100644 --- a/src/file_analysis/analyzer/hash/Hash.cc +++ b/src/file_analysis/analyzer/hash/Hash.cc @@ -5,11 +5,12 @@ #include "Hash.h" #include "util.h" #include "Event.h" +#include "file_analysis/Manager.h" using namespace file_analysis; Hash::Hash(RecordVal* args, File* file, HashVal* hv, const char* arg_kind) - : file_analysis::Analyzer(args, file), hash(hv), fed(false), kind(arg_kind) + : file_analysis::Analyzer(file_mgr->GetComponentTag(to_upper(arg_kind).c_str()), args, file), hash(hv), fed(false), kind(arg_kind) { hash->Init(); } diff --git a/src/file_analysis/file_analysis.bif b/src/file_analysis/file_analysis.bif index b6c80ac800..0e904f298f 100644 --- a/src/file_analysis/file_analysis.bif +++ b/src/file_analysis/file_analysis.bif @@ -16,21 +16,23 @@ function Files::__set_timeout_interval%(file_id: string, t: interval%): bool %} ## :bro:see:`Files::add_analyzer`. -function Files::__add_analyzer%(file_id: string, args: any%): bool +function Files::__add_analyzer%(file_id: string, tag: Files::Tag, args: any%): bool %{ using BifType::Record::Files::AnalyzerArgs; RecordVal* rv = args->AsRecordVal()->CoerceTo(AnalyzerArgs); - bool result = file_mgr->AddAnalyzer(file_id->CheckString(), rv); + bool result = file_mgr->AddAnalyzer(file_id->CheckString(), + file_mgr->GetComponentTag(tag), rv); Unref(rv); return new Val(result, TYPE_BOOL); %} ## :bro:see:`Files::remove_analyzer`. -function Files::__remove_analyzer%(file_id: string, args: any%): bool +function Files::__remove_analyzer%(file_id: string, tag: Files::Tag, args: any%): bool %{ using BifType::Record::Files::AnalyzerArgs; RecordVal* rv = args->AsRecordVal()->CoerceTo(AnalyzerArgs); - bool result = file_mgr->RemoveAnalyzer(file_id->CheckString(), rv); + bool result = file_mgr->RemoveAnalyzer(file_id->CheckString(), + file_mgr->GetComponentTag(tag) , rv); Unref(rv); return new Val(result, TYPE_BOOL); %} @@ -45,7 +47,7 @@ function Files::__stop%(file_id: string%): bool ## :bro:see:`Files::analyzer_name`. function Files::__analyzer_name%(tag: Files::Tag%) : string %{ - return new StringVal(file_mgr->GetAnalyzerName(tag->InternalInt())); + return new StringVal(file_mgr->GetComponentName(tag)); %} module GLOBAL; diff --git a/src/input/readers/Raw.cc b/src/input/readers/Raw.cc index 2820923a25..0f4c4ca7d1 100644 --- a/src/input/readers/Raw.cc +++ b/src/input/readers/Raw.cc @@ -95,29 +95,32 @@ bool Raw::Execute() else if ( childpid == 0 ) { // we are the child. - close(pipes[stdout_in]); - dup2(pipes[stdout_out], stdout_fileno); + safe_close(pipes[stdout_in]); + if ( dup2(pipes[stdout_out], stdout_fileno) == -1 ) + Error(Fmt("Error on dup2 stdout_out: %d", errno)); if ( stdin_towrite ) { - close(pipes[stdin_out]); - dup2(pipes[stdin_in], stdin_fileno); + safe_close(pipes[stdin_out]); + if ( dup2(pipes[stdin_in], stdin_fileno) == -1 ) + Error(Fmt("Error on dup2 stdin_in: %d", errno)); } if ( use_stderr ) { - close(pipes[stderr_in]); - dup2(pipes[stderr_out], stderr_fileno); + safe_close(pipes[stderr_in]); + if ( dup2(pipes[stderr_out], stderr_fileno) == -1 ) + Error(Fmt("Error on dup2 stderr_out: %d", errno)); } - execl("/bin/sh", "sh", "-c", fname.c_str(), NULL); + execl("/bin/sh", "sh", "-c", fname.c_str(), (char*) NULL); fprintf(stderr, "Exec failed :(......\n"); exit(255); } else { // we are the parent - close(pipes[stdout_out]); + safe_close(pipes[stdout_out]); pipes[stdout_out] = -1; if ( Info().mode == MODE_STREAM ) @@ -125,7 +128,7 @@ bool Raw::Execute() if ( stdin_towrite ) { - close(pipes[stdin_in]); + safe_close(pipes[stdin_in]); pipes[stdin_in] = -1; fcntl(pipes[stdin_out], F_SETFL, O_NONBLOCK); // ya, just always set this to nonblocking. we do not want to block on a program receiving data. // note that there is a small gotcha with it. More data is queued when more data is read from the program output. Hence, when having @@ -134,7 +137,7 @@ bool Raw::Execute() if ( use_stderr ) { - close(pipes[stderr_out]); + safe_close(pipes[stderr_out]); pipes[stderr_out] = -1; fcntl(pipes[stderr_in], F_SETFL, O_NONBLOCK); // true for this too. } @@ -195,7 +198,10 @@ bool Raw::CloseInput() { for ( int i = 0; i < 6; i ++ ) if ( pipes[i] != -1 ) - close(pipes[i]); + { + safe_close(pipes[i]); + pipes[i] = -1; + } } file = 0; @@ -393,11 +399,13 @@ void Raw::WriteToStdin() { Error(Fmt("Writing to child process stdin failed: %d. Stopping writing at position %d", errno, pos)); stdin_towrite = 0; - close(pipes[stdin_out]); } if ( stdin_towrite == 0 ) // send EOF when we are done. - close(pipes[stdin_out]); + { + safe_close(pipes[stdin_out]); + pipes[stdin_out] = -1; + } if ( Info().mode == MODE_MANUAL && stdin_towrite != 0 ) { @@ -528,6 +536,7 @@ bool Raw::DoUpdate() if ( childpid != -1 && waitpid(childpid, &return_code, WNOHANG) != 0 ) { // child died + childpid = -1; bool signal = false; int code = 0; if ( WIFEXITED(return_code) ) @@ -539,7 +548,7 @@ bool Raw::DoUpdate() else if ( WIFSIGNALED(return_code) ) { - signal = false; + signal = true; code = WTERMSIG(return_code); Error(Fmt("Child process exited due to signal %d", code)); } @@ -564,7 +573,7 @@ bool Raw::DoUpdate() EndCurrentSend(); SendEvent("InputRaw::process_finished", 4, vals); - } + } diff --git a/src/main.cc b/src/main.cc index 56193a935b..6a58832964 100644 --- a/src/main.cc +++ b/src/main.cc @@ -872,6 +872,7 @@ int main(int argc, char** argv) if ( generate_documentation ) { CreateProtoAnalyzerDoc("proto-analyzers.rst"); + CreateFileAnalyzerDoc("file-analyzers.rst"); std::list::iterator it; diff --git a/src/net_util.cc b/src/net_util.cc index d91cf02de9..aa88903a8a 100644 --- a/src/net_util.cc +++ b/src/net_util.cc @@ -148,6 +148,26 @@ const char* fmt_conn_id(const uint32* src_addr, uint32 src_port, return fmt_conn_id(src, src_port, dst, dst_port); } +char* fmt_mac(const unsigned char* m, int len) + { + char* buf = new char[25]; + + if ( len < 8 ) + { + *buf = '\0'; + return buf; + } + + if ( m[6] == 0 && m[7] == 0 ) // EUI-48 + snprintf(buf, 19, "%02x:%02x:%02x:%02x:%02x:%02x", + m[0], m[1], m[2], m[3], m[4], m[5]); + else + snprintf(buf, 25, "%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x", + m[0], m[1], m[2], m[3], m[4], m[5], m[6], m[7]); + + return buf; + } + uint32 extract_uint32(const u_char* data) { uint32 val; diff --git a/src/net_util.h b/src/net_util.h index 733d946564..e5dbbcdae2 100644 --- a/src/net_util.h +++ b/src/net_util.h @@ -156,6 +156,18 @@ extern const char* fmt_conn_id(const IPAddr& src_addr, uint32 src_port, extern const char* fmt_conn_id(const uint32* src_addr, uint32 src_port, const uint32* dst_addr, uint32 dst_port); +/** +* Given a MAC address, formats it in hex as 00:de:ad:be:ef. +* Supports both EUI-48 and EUI-64. If it's neither, returns +* an empty string. +* +* @param m EUI-48 or EUI-64 MAC address to format, as a char array +* @param len Number of bytes valid starting at *n*. This must be at +* least 8 for a valid address. +* @return A string of the formatted MAC. Passes ownership to caller. +*/ +extern char* fmt_mac(const unsigned char* m, int len); + // Read 4 bytes from data and return in network order. extern uint32 extract_uint32(const u_char* data); diff --git a/src/plugin/ComponentManager.h b/src/plugin/ComponentManager.h new file mode 100644 index 0000000000..16f9d80743 --- /dev/null +++ b/src/plugin/ComponentManager.h @@ -0,0 +1,248 @@ +#ifndef PLUGIN_COMPONENT_MANAGER_H +#define PLUGIN_COMPONENT_MANAGER_H + +#include +#include +#include + +#include "Type.h" +#include "ID.h" +#include "Var.h" +#include "Val.h" +#include "Reporter.h" + +namespace plugin { + +/** + * A class that manages tracking of plugin components (e.g. analyzers) and + * installs identifiers in the script-layer to identify them by a unique tag, + * (a script-layer enum value). + * + * @tparam T A ::Tag type or derivative. + * @tparam C A plugin::TaggedComponent type derivative. + */ +template +class ComponentManager { +public: + + /** + * Constructor creates a new enum type called a "Tag" to associate with + * a component. + * + * @param module The script-layer module in which to install the "Tag" ID + * representing an enum type. + */ + ComponentManager(const string& module); + + /** + * @return The script-layer module in which the component's "Tag" ID lives. + */ + const char* GetModule() const; + + /** + * @return A list of all registered components. + */ + list GetComponents() const; + + /** + * @return The enum type associated with the script-layer "Tag". + */ + EnumType* GetTagEnumType() const; + + /** + * Get a component name from its tag. + * + * @param tag A component's tag. + * @return The canonical component name. + */ + const char* GetComponentName(T tag) const; + + /** + * Get a component name from it's enum value. + * + * @param val A component's enum value. + * @return The canonical component name. + */ + const char* GetComponentName(Val* val) const; + + /** + * Get a component tag from its name. + * + * @param name A component's canonical name. + * @return The component's tag, or a tag representing an error if + * no such component assoicated with the name exists. + */ + T GetComponentTag(const string& name) const; + + /** + * Get a component tag from its enum value. + * + * @param v A component's enum value. + * @return The component's tag, or a tag representing an error if + * no such component assoicated with the value exists. + */ + T GetComponentTag(Val* v) const; + +protected: + + /** + * Add a component the internal maps used to keep track of it and create + * a script-layer ID for the component's enum value. + * + * @param component A component to track. + * @param prefix The script-layer ID associated with the component's enum + * value will be a concatenation of this prefix and the component's + * canonical name. + */ + void RegisterComponent(C* component, const string& prefix = ""); + + /** + * @param name The canonical name of a component. + * @return The component associated with the name or a null pointer if no + * such component exists. + */ + C* Lookup(const string& name) const; + + /** + * @param name A component tag. + * @return The component associated with the tag or a null pointer if no + * such component exists. + */ + C* Lookup(const T& tag) const; + + /** + * @param name A component's enum value. + * @return The component associated with the value or a null pointer if no + * such component exists. + */ + C* Lookup(EnumVal* val) const; + +private: + + string module; /**< Script layer module in which component tags live. */ + EnumType* tag_enum_type; /**< Enum type of component tags. */ + map components_by_name; + map components_by_tag; + map components_by_val; +}; + +template +ComponentManager::ComponentManager(const string& arg_module) + : module(arg_module) + { + tag_enum_type = new EnumType(module + "::Tag"); + ::ID* id = install_ID("Tag", module.c_str(), true, true); + add_type(id, tag_enum_type, 0, 0); + } + +template +const char* ComponentManager::GetModule() const + { + return module.c_str(); + } + +template +list ComponentManager::GetComponents() const + { + list rval; + typename map::const_iterator i; + + for ( i = components_by_tag.begin(); i != components_by_tag.end(); ++i ) + rval.push_back(i->second); + + return rval; + } + +template +EnumType* ComponentManager::GetTagEnumType() const + { + return tag_enum_type; + } + +template +const char* ComponentManager::GetComponentName(T tag) const + { + static const char* error = ""; + + if ( ! tag ) + return error; + + C* c = Lookup(tag); + + if ( ! c ) + reporter->InternalError("request for name of unknown component tag %s", + tag.AsString().c_str()); + + return c->CanonicalName(); + } + +template +const char* ComponentManager::GetComponentName(Val* val) const + { + return GetComponentName(T(val->AsEnumVal())); + } + +template +T ComponentManager::GetComponentTag(const string& name) const + { + C* c = Lookup(name); + return c ? c->Tag() : T(); + } + +template +T ComponentManager::GetComponentTag(Val* v) const + { + C* c = Lookup(v->AsEnumVal()); + return c ? c->Tag() : T(); + } + +template +C* ComponentManager::Lookup(const string& name) const + { + typename map::const_iterator i = + components_by_name.find(to_upper(name)); + return i != components_by_name.end() ? i->second : 0; + } + +template +C* ComponentManager::Lookup(const T& tag) const + { + typename map::const_iterator i = components_by_tag.find(tag); + return i != components_by_tag.end() ? i->second : 0; + } + +template +C* ComponentManager::Lookup(EnumVal* val) const + { + typename map::const_iterator i = + components_by_val.find(val->InternalInt()); + return i != components_by_val.end() ? i->second : 0; + } + +template +void ComponentManager::RegisterComponent(C* component, + const string& prefix) + { + const char* cname = component->CanonicalName(); + + if ( Lookup(cname) ) + reporter->FatalError("Component '%s::%s' defined more than once", + module.c_str(), cname); + + DBG_LOG(DBG_PLUGINS, "Registering component %s (tag %s)", + component->Name(), component->Tag().AsString().c_str()); + + components_by_name.insert(std::make_pair(cname, component)); + components_by_tag.insert(std::make_pair(component->Tag(), component)); + components_by_val.insert(std::make_pair( + component->Tag().AsEnumVal()->InternalInt(), component)); + + // Install an identfier for enum value + string id = fmt("%s%s", prefix.c_str(), cname); + tag_enum_type->AddName(module, id.c_str(), + component->Tag().AsEnumVal()->InternalInt(), true); + } + +} // namespace plugin + +#endif diff --git a/src/plugin/TaggedComponent.h b/src/plugin/TaggedComponent.h new file mode 100644 index 0000000000..99eab9f230 --- /dev/null +++ b/src/plugin/TaggedComponent.h @@ -0,0 +1,85 @@ +#ifndef PLUGIN_TAGGED_COMPONENT_H +#define PLUGIN_TAGGED_COMPONENT_H + +namespace plugin { + +/** + * A class which has a tag of a given type associated with it. + * + * @tparam T A ::Tag type or derivative. + */ +template +class TaggedComponent { +public: + + /** + * Constructor creates a unique tag value for this component. + * + * @param subtype A subtype associated with this component that + * further distinguishes it. The subtype will be integrated into + * the Tag that the manager associates with this component, + * and component instances can accordingly access it via Tag(). + * If not used, leave at zero. + */ + TaggedComponent(typename T::subtype_t subtype = 0); + + /** + * Copy constructor. + * + * @param other Another component from which to copy its tag value. + */ + TaggedComponent(const TaggedComponent& other); + + /** + * Assignment operator. + * + * @param other A component to assign. + * @return The assigned object. + */ + TaggedComponent& operator=(const TaggedComponent& other); + + /** + * @return The component's tag. + */ + T Tag() const; + +private: + + T tag; /**< The automatically assigned analyzer tag. */ + static typename T::type_t type_counter; /**< Used to generate globally + unique tags. */ +}; + +template +TaggedComponent::TaggedComponent(typename T::subtype_t subtype) + { + tag = T(++type_counter, subtype); + } + +template +TaggedComponent::TaggedComponent(const TaggedComponent& other) + { + tag = other.tag; + } + +template +TaggedComponent& +TaggedComponent::operator =(const TaggedComponent& other) + { + if ( &other != this ) + tag = other.tag; + + return *this; + } + +template +T TaggedComponent::Tag() const + { + return tag; + } + +template typename T::type_t TaggedComponent::type_counter(0); + +} // namespace plugin + +#endif diff --git a/src/probabilistic/BitVector.cc b/src/probabilistic/BitVector.cc index 6e642e62c1..e8c2b2f80e 100644 --- a/src/probabilistic/BitVector.cc +++ b/src/probabilistic/BitVector.cc @@ -1,10 +1,12 @@ // See the file "COPYING" in the main distribution directory for copyright. -#include "BitVector.h" - +#include #include #include + +#include "BitVector.h" #include "Serializer.h" +#include "digest.h" using namespace probabilistic; @@ -490,6 +492,21 @@ BitVector::size_type BitVector::FindNext(size_type i) const return block ? bi * bits_per_block + lowest_bit(block) : find_from(bi + 1); } +size_t BitVector::Hash() const + { + size_t hash = 0; + + u_char buf[SHA256_DIGEST_LENGTH]; + SHA256_CTX ctx; + sha256_init(&ctx); + + for ( size_type i = 0; i < Blocks(); ++i ) + sha256_update(&ctx, &bits[i], sizeof(bits[i])); + + sha256_final(&ctx, buf); + return *reinterpret_cast(buf); // Use the first bytes as seed. + } + BitVector::size_type BitVector::lowest_bit(block_type block) { block_type x = block - (block & (block - 1)); diff --git a/src/probabilistic/BitVector.h b/src/probabilistic/BitVector.h index d9c55d53c6..8e24336345 100644 --- a/src/probabilistic/BitVector.h +++ b/src/probabilistic/BitVector.h @@ -276,6 +276,13 @@ public: */ size_type FindNext(size_type i) const; + /** Computes a hash value of the internal representation. + * This is mainly for debugging/testing purposes. + * + * @return The hash. + */ + size_t Hash() const; + /** * Serializes the bit vector. * diff --git a/src/probabilistic/BloomFilter.cc b/src/probabilistic/BloomFilter.cc index 23b812269c..bcab6c9b54 100644 --- a/src/probabilistic/BloomFilter.cc +++ b/src/probabilistic/BloomFilter.cc @@ -9,6 +9,8 @@ #include "CounterVector.h" #include "Serializer.h" +#include "../util.h" + using namespace probabilistic; BloomFilter::BloomFilter() @@ -107,6 +109,11 @@ BasicBloomFilter* BasicBloomFilter::Clone() const return copy; } +std::string BasicBloomFilter::InternalState() const + { + return fmt("%" PRIu64, (uint64_t)bits->Hash()); + } + BasicBloomFilter::BasicBloomFilter() { bits = 0; @@ -133,14 +140,18 @@ bool BasicBloomFilter::DoUnserialize(UnserialInfo* info) return (bits != 0); } -void BasicBloomFilter::AddImpl(const Hasher::digest_vector& h) +void BasicBloomFilter::Add(const HashKey* key) { + Hasher::digest_vector h = hasher->Hash(key); + for ( size_t i = 0; i < h.size(); ++i ) bits->Set(h[i] % bits->Size()); } -size_t BasicBloomFilter::CountImpl(const Hasher::digest_vector& h) const +size_t BasicBloomFilter::Count(const HashKey* key) const { + Hasher::digest_vector h = hasher->Hash(key); + for ( size_t i = 0; i < h.size(); ++i ) { if ( ! (*bits)[h[i] % bits->Size()] ) @@ -206,6 +217,11 @@ CountingBloomFilter* CountingBloomFilter::Clone() const return copy; } +string CountingBloomFilter::InternalState() const + { + return fmt("%" PRIu64, (uint64_t)cells->Hash()); + } + IMPLEMENT_SERIAL(CountingBloomFilter, SER_COUNTINGBLOOMFILTER) bool CountingBloomFilter::DoSerialize(SerialInfo* info) const @@ -222,14 +238,18 @@ bool CountingBloomFilter::DoUnserialize(UnserialInfo* info) } // TODO: Use partitioning in add/count to allow for reusing CMS bounds. -void CountingBloomFilter::AddImpl(const Hasher::digest_vector& h) +void CountingBloomFilter::Add(const HashKey* key) { + Hasher::digest_vector h = hasher->Hash(key); + for ( size_t i = 0; i < h.size(); ++i ) cells->Increment(h[i] % cells->Size()); } -size_t CountingBloomFilter::CountImpl(const Hasher::digest_vector& h) const +size_t CountingBloomFilter::Count(const HashKey* key) const { + Hasher::digest_vector h = hasher->Hash(key); + CounterVector::size_type min = std::numeric_limits::max(); diff --git a/src/probabilistic/BloomFilter.h b/src/probabilistic/BloomFilter.h index 4865ae145c..65dda2396d 100644 --- a/src/probabilistic/BloomFilter.h +++ b/src/probabilistic/BloomFilter.h @@ -22,27 +22,20 @@ public: virtual ~BloomFilter(); /** - * Adds an element of type T to the Bloom filter. - * @param x The element to add + * Adds an element to the Bloom filter. + * + * @param key The key associated with the element to add. */ - template - void Add(const T& x) - { - AddImpl((*hasher)(x)); - } + virtual void Add(const HashKey* key) = 0; /** * Retrieves the associated count of a given value. * - * @param x The value of type `T` to check. + * @param key The key associated with the element to check. * - * @return The counter associated with *x*. + * @return The counter associated with *key*. */ - template - size_t Count(const T& x) const - { - return CountImpl((*hasher)(x)); - } + virtual size_t Count(const HashKey* key) const = 0; /** * Checks whether the Bloom filter is empty. @@ -72,6 +65,12 @@ public: */ virtual BloomFilter* Clone() const = 0; + /** + * Returns a string with a representation of the Bloom filter's + * internal state. This is for debugging/testing purposes only. + */ + virtual string InternalState() const = 0; + /** * Serializes the Bloom filter. * @@ -106,25 +105,6 @@ protected: */ BloomFilter(const Hasher* hasher); - /** - * Abstract method for implementinng the *Add* operation. - * - * @param hashes A set of *k* hashes for the item to add, computed by - * the internal hasher object. - * - */ - virtual void AddImpl(const Hasher::digest_vector& hashes) = 0; - - /** - * Abstract method for implementing the *Count* operation. - * - * @param hashes A set of *k* hashes for the item to add, computed by - * the internal hasher object. - * - * @return Returns the counter associated with the hashed element. - */ - virtual size_t CountImpl(const Hasher::digest_vector& hashes) const = 0; - const Hasher* hasher; }; @@ -177,6 +157,7 @@ public: virtual void Clear(); virtual bool Merge(const BloomFilter* other); virtual BasicBloomFilter* Clone() const; + virtual string InternalState() const; protected: DECLARE_SERIAL(BasicBloomFilter); @@ -187,8 +168,8 @@ protected: BasicBloomFilter(); // Overridden from BloomFilter. - virtual void AddImpl(const Hasher::digest_vector& h); - virtual size_t CountImpl(const Hasher::digest_vector& h) const; + virtual void Add(const HashKey* key); + virtual size_t Count(const HashKey* key) const; private: BitVector* bits; @@ -216,6 +197,7 @@ public: virtual void Clear(); virtual bool Merge(const BloomFilter* other); virtual CountingBloomFilter* Clone() const; + virtual string InternalState() const; protected: DECLARE_SERIAL(CountingBloomFilter); @@ -226,8 +208,8 @@ protected: CountingBloomFilter(); // Overridden from BloomFilter. - virtual void AddImpl(const Hasher::digest_vector& h); - virtual size_t CountImpl(const Hasher::digest_vector& h) const; + virtual void Add(const HashKey* key); + virtual size_t Count(const HashKey* key) const; private: CounterVector* cells; diff --git a/src/probabilistic/CMakeLists.txt b/src/probabilistic/CMakeLists.txt index 7e7848cd57..7d8b7c7803 100644 --- a/src/probabilistic/CMakeLists.txt +++ b/src/probabilistic/CMakeLists.txt @@ -11,11 +11,12 @@ set(probabilistic_SRCS BloomFilter.cc CounterVector.cc Hasher.cc - HyperLogLog.cc) + HyperLogLog.cc + Topk.cc) bif_target(bloom-filter.bif) bif_target(hyper-loglog.bif) - +bif_target(top-k.bif) bro_add_subdir_library(probabilistic ${probabilistic_SRCS}) add_dependencies(bro_probabilistic generate_outputs) diff --git a/src/probabilistic/CounterVector.cc b/src/probabilistic/CounterVector.cc index d5635fc0f2..8a6feae5fd 100644 --- a/src/probabilistic/CounterVector.cc +++ b/src/probabilistic/CounterVector.cc @@ -153,6 +153,11 @@ CounterVector operator|(const CounterVector& x, const CounterVector& y) } +size_t CounterVector::Hash() const + { + return bits->Hash(); + } + bool CounterVector::Serialize(SerialInfo* info) const { return SerialObj::Serialize(info); diff --git a/src/probabilistic/CounterVector.h b/src/probabilistic/CounterVector.h index df6fc57ac2..9ce522d61c 100644 --- a/src/probabilistic/CounterVector.h +++ b/src/probabilistic/CounterVector.h @@ -126,6 +126,13 @@ public: */ CounterVector& operator|=(const CounterVector& other); + /** Computes a hash value of the internal representation. + * This is mainly for debugging/testing purposes. + * + * @return The hash. + */ + size_t Hash() const; + /** * Serializes the bit vector. * diff --git a/src/probabilistic/Hasher.cc b/src/probabilistic/Hasher.cc index 17597b9a82..f5b1f4f5f7 100644 --- a/src/probabilistic/Hasher.cc +++ b/src/probabilistic/Hasher.cc @@ -1,13 +1,42 @@ // See the file "COPYING" in the main distribution directory for copyright. #include +#include #include "Hasher.h" +#include "NetVar.h" #include "digest.h" #include "Serializer.h" using namespace probabilistic; +size_t Hasher::MakeSeed(const void* data, size_t size) + { + u_char buf[SHA256_DIGEST_LENGTH]; + SHA256_CTX ctx; + sha256_init(&ctx); + + if ( data ) + sha256_update(&ctx, data, size); + + else if ( global_hash_seed && global_hash_seed->Len() > 0 ) + sha256_update(&ctx, global_hash_seed->Bytes(), global_hash_seed->Len()); + + else + { + unsigned int first_seed = initial_seed(); + sha256_update(&ctx, &first_seed, sizeof(first_seed)); + } + + sha256_final(&ctx, buf); + return *reinterpret_cast(buf); // Use the first bytes as seed. + } + +Hasher::digest_vector Hasher::Hash(const HashKey* key) const + { + return Hash(key->Key(), key->Size()); + } + bool Hasher::Serialize(SerialInfo* info) const { return SerialObj::Serialize(info); @@ -25,7 +54,7 @@ bool Hasher::DoSerialize(SerialInfo* info) const if ( ! SERIALIZE(static_cast(k)) ) return false; - return SERIALIZE_STR(name.c_str(), name.size()); + return SERIALIZE(static_cast(seed)); } bool Hasher::DoUnserialize(UnserialInfo* info) @@ -39,62 +68,52 @@ bool Hasher::DoUnserialize(UnserialInfo* info) k = serial_k; assert(k > 0); - const char* serial_name; - if ( ! UNSERIALIZE_STR(&serial_name, 0) ) + uint64 serial_seed; + if ( ! UNSERIALIZE(&serial_seed) ) return false; - name = serial_name; - delete [] serial_name; + seed = serial_seed; return true; } -Hasher::Hasher(size_t k, const std::string& arg_name) - : k(k) +Hasher::Hasher(size_t arg_k, size_t arg_seed) { - k = k; - name = arg_name; + k = arg_k; + seed = arg_seed; } - -UHF::UHF(size_t seed, const std::string& extra) - : h(compute_seed(seed, extra)) +UHF::UHF(size_t arg_seed) + : h(arg_seed) { + seed = arg_seed; } +// This function is almost equivalent to HashKey::HashBytes except that it +// does not depend on global state and that we mix in the seed multiple +// times. Hasher::digest UHF::hash(const void* x, size_t n) const { - assert(n <= UHASH_KEY_SIZE); - return n == 0 ? 0 : h(x, n); + if ( n <= UHASH_KEY_SIZE ) + return n == 0 ? 0 : h(x, n); + + unsigned char d[16]; + MD5(reinterpret_cast(x), n, d); + + const unsigned char* s = reinterpret_cast(&seed); + for ( size_t i = 0; i < 16; ++i ) + d[i] ^= s[i % sizeof(seed)]; + + MD5(d, 16, d); + + return d[0]; } -size_t UHF::compute_seed(size_t seed, const std::string& extra) +DefaultHasher::DefaultHasher(size_t k, size_t seed) + : Hasher(k, seed) { - u_char buf[SHA256_DIGEST_LENGTH]; - SHA256_CTX ctx; - sha256_init(&ctx); - - if ( extra.empty() ) - { - unsigned int first_seed = initial_seed(); - sha256_update(&ctx, &first_seed, sizeof(first_seed)); - } - - else - sha256_update(&ctx, extra.c_str(), extra.size()); - - sha256_update(&ctx, &seed, sizeof(seed)); - sha256_final(&ctx, buf); - - // Take the first sizeof(size_t) bytes as seed. - return *reinterpret_cast(buf); - } - -DefaultHasher::DefaultHasher(size_t k, const std::string& name) - : Hasher(k, name) - { - for ( size_t i = 0; i < k; ++i ) - hash_functions.push_back(UHF(i, name)); + for ( size_t i = 1; i <= k; ++i ) + hash_functions.push_back(UHF(Seed() + bro_prng(i))); } Hasher::digest_vector DefaultHasher::Hash(const void* x, size_t n) const @@ -137,13 +156,13 @@ bool DefaultHasher::DoUnserialize(UnserialInfo* info) hash_functions.clear(); for ( size_t i = 0; i < K(); ++i ) - hash_functions.push_back(UHF(i, Name())); + hash_functions.push_back(UHF(Seed() + bro_prng(i))); return true; } -DoubleHasher::DoubleHasher(size_t k, const std::string& name) - : Hasher(k, name), h1(1, name), h2(2, name) +DoubleHasher::DoubleHasher(size_t k, size_t seed) + : Hasher(k, seed), h1(seed + bro_prng(1)), h2(seed + bro_prng(2)) { } @@ -187,8 +206,8 @@ bool DoubleHasher::DoUnserialize(UnserialInfo* info) { DO_UNSERIALIZE(Hasher); - h1 = UHF(1, Name()); - h2 = UHF(2, Name()); + h1 = UHF(Seed() + bro_prng(1)); + h2 = UHF(Seed() + bro_prng(2)); return true; } diff --git a/src/probabilistic/Hasher.h b/src/probabilistic/Hasher.h index 3acd5c5867..a3322f5e37 100644 --- a/src/probabilistic/Hasher.h +++ b/src/probabilistic/Hasher.h @@ -18,6 +18,20 @@ public: typedef hash_t digest; typedef std::vector digest_vector; + /** + * Creates a valid hasher seed from an arbitrary string. + * + * @param data A pointer to contiguous data that should be crunched into a + * seed. If 0, the function tries to find a global_hash_seed script variable + * to derive a seed from. If this variable does not exist, the function uses + * the initial seed generated at Bro startup. + * + * @param size The number of bytes of *data*. + * + * @return A seed suitable for hashers. + */ + static size_t MakeSeed(const void* data, size_t size); + /** * Destructor. */ @@ -36,6 +50,15 @@ public: return Hash(&x, sizeof(T)); } + /** + * Computes hash values for an element. + * + * @param x The key of the value to hash. + * + * @return Vector of *k* hash values. + */ + digest_vector Hash(const HashKey* key) const; + /** * Computes the hashes for a set of bytes. * @@ -64,11 +87,9 @@ public: size_t K() const { return k; } /** - * Returns the hasher's name. If not empty, the hasher uses this descriptor - * to seed its *k* hash functions. Otherwise the hasher mixes in the initial - * seed derived from the environment variable `$BRO_SEED`. + * Returns the seed used to construct the hasher. */ - const std::string& Name() const { return name; } + size_t Seed() const { return seed; } bool Serialize(SerialInfo* info) const; static Hasher* Unserialize(UnserialInfo* info); @@ -81,16 +102,15 @@ protected: /** * Constructor. * - * @param k the number of hash functions. + * @param arg_k the number of hash functions. * - * @param name A name for the hasher. Hashers with the same name - * should provide consistent results. + * @param arg_seed The seed for the hasher. */ - Hasher(size_t k, const std::string& name); + Hasher(size_t arg_k, size_t arg_seed); private: size_t k; - std::string name; + size_t seed; }; /** @@ -103,13 +123,9 @@ public: * Constructs an H3 hash function seeded with a given seed and an * optional extra seed to replace the initial Bro seed. * - * @param seed The seed to use for this instance. - * - * @param extra If not empty, this parameter replaces the initial - * seed to compute the seed for t to compute the seed NUL-terminated - * string as additional seed. + * @param arg_seed The seed to use for this instance. */ - UHF(size_t seed = 0, const std::string& extra = ""); + UHF(size_t arg_seed = 0); template Hasher::digest operator()(const T& x) const @@ -152,9 +168,10 @@ public: } private: - static size_t compute_seed(size_t seed, const std::string& extra); + static size_t compute_seed(size_t seed); H3 h; + size_t seed; }; @@ -169,9 +186,9 @@ public: * * @param k The number of hash functions to use. * - * @param name The name of the hasher. + * @param seed The seed for the hasher. */ - DefaultHasher(size_t k, const std::string& name = ""); + DefaultHasher(size_t k, size_t seed); // Overridden from Hasher. virtual digest_vector Hash(const void* x, size_t n) const /* final */; @@ -197,9 +214,9 @@ public: * * @param k The number of hash functions to use. * - * @param name The name of the hasher. + * @param seed The seed for the hasher. */ - DoubleHasher(size_t k, const std::string& name = ""); + DoubleHasher(size_t k, size_t seed); // Overridden from Hasher. virtual digest_vector Hash(const void* x, size_t n) const /* final */; diff --git a/src/probabilistic/Topk.cc b/src/probabilistic/Topk.cc new file mode 100644 index 0000000000..95d0ac732e --- /dev/null +++ b/src/probabilistic/Topk.cc @@ -0,0 +1,499 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +#include "probabilistic/Topk.h" +#include "CompHash.h" +#include "Reporter.h" +#include "Serializer.h" +#include "NetVar.h" + +namespace probabilistic { + +IMPLEMENT_SERIAL(TopkVal, SER_TOPK_VAL); + +static void topk_element_hash_delete_func(void* val) + { + Element* e = (Element*) val; + delete e; + } + +Element::~Element() + { + Unref(value); + } + +void TopkVal::Typify(BroType* t) + { + assert(!hash && !type); + type = t->Ref(); + TypeList* tl = new TypeList(t); + tl->Append(t->Ref()); + hash = new CompositeHash(tl); + Unref(tl); + } + +HashKey* TopkVal::GetHash(Val* v) const + { + HashKey* key = hash->ComputeHash(v, 1); + assert(key); + return key; + } + +TopkVal::TopkVal(uint64 arg_size) : OpaqueVal(topk_type) + { + elementDict = new PDict(Element); + elementDict->SetDeleteFunc(topk_element_hash_delete_func); + size = arg_size; + type = 0; + numElements = 0; + pruned = false; + hash = 0; + } + +TopkVal::TopkVal() : OpaqueVal(topk_type) + { + elementDict = new PDict(Element); + elementDict->SetDeleteFunc(topk_element_hash_delete_func); + size = 0; + type = 0; + numElements = 0; + hash = 0; + } + +TopkVal::~TopkVal() + { + elementDict->Clear(); + delete elementDict; + + // now all elements are already gone - delete the buckets + std::list::iterator bi = buckets.begin(); + while ( bi != buckets.end() ) + { + delete *bi; + bi++; + } + + Unref(type); + delete hash; + } + +void TopkVal::Merge(const TopkVal* value, bool doPrune) + { + if ( type == 0 ) + { + assert(numElements == 0); + Typify(value->type); + } + + else + { + if ( ! same_type(type, value->type) ) + { + reporter->Error("Cannot merge top-k elements of differing types."); + return; + } + } + + std::list::const_iterator it = value->buckets.begin(); + while ( it != value->buckets.end() ) + { + Bucket* b = *it; + uint64_t currcount = b->count; + std::list::const_iterator eit = b->elements.begin(); + + while ( eit != b->elements.end() ) + { + Element* e = *eit; + // lookup if we already know this one... + HashKey* key = GetHash(e->value); + Element* olde = (Element*) elementDict->Lookup(key); + + if ( olde == 0 ) + { + olde = new Element(); + olde->epsilon = 0; + olde->value = e->value->Ref(); + // insert at bucket position 0 + if ( buckets.size() > 0 ) + { + assert (buckets.front()-> count > 0 ); + } + + Bucket* newbucket = new Bucket(); + newbucket->count = 0; + newbucket->bucketPos = buckets.insert(buckets.begin(), newbucket); + + olde->parent = newbucket; + newbucket->elements.insert(newbucket->elements.end(), olde); + + elementDict->Insert(key, olde); + numElements++; + + } + + // now that we are sure that the old element is present - increment epsilon + olde->epsilon += e->epsilon; + + // and increment position... + IncrementCounter(olde, currcount); + delete key; + + eit++; + } + + it++; + } + + // now we have added everything. And our top-k table could be too big. + // prune everything... + + assert(size > 0); + + if ( ! doPrune ) + return; + + while ( numElements > size ) + { + pruned = true; + assert(buckets.size() > 0 ); + Bucket* b = buckets.front(); + assert(b->elements.size() > 0); + + Element* e = b->elements.front(); + HashKey* key = GetHash(e->value); + elementDict->RemoveEntry(key); + delete e; + + b->elements.pop_front(); + + if ( b->elements.size() == 0 ) + { + delete b; + buckets.pop_front(); + } + + numElements--; + } + } + +bool TopkVal::DoSerialize(SerialInfo* info) const + { + DO_SERIALIZE(SER_TOPK_VAL, OpaqueVal); + + bool v = true; + + v &= SERIALIZE(size); + v &= SERIALIZE(numElements); + v &= SERIALIZE(pruned); + + bool type_present = (type != 0); + v &= SERIALIZE(type_present); + + if ( type_present ) + v &= type->Serialize(info); + else + assert(numElements == 0); + + uint64_t i = 0; + std::list::const_iterator it = buckets.begin(); + while ( it != buckets.end() ) + { + Bucket* b = *it; + uint32_t elements_count = b->elements.size(); + v &= SERIALIZE(elements_count); + v &= SERIALIZE(b->count); + + std::list::const_iterator eit = b->elements.begin(); + while ( eit != b->elements.end() ) + { + Element* element = *eit; + v &= SERIALIZE(element->epsilon); + v &= element->value->Serialize(info); + + eit++; + i++; + } + + it++; + } + + assert(i == numElements); + + return v; + } + +bool TopkVal::DoUnserialize(UnserialInfo* info) + { + DO_UNSERIALIZE(OpaqueVal); + + bool v = true; + + v &= UNSERIALIZE(&size); + v &= UNSERIALIZE(&numElements); + v &= UNSERIALIZE(&pruned); + + bool type_present = false; + v &= UNSERIALIZE(&type_present); + if ( type_present ) + { + BroType* deserialized_type = BroType::Unserialize(info); + + Typify(deserialized_type); + Unref(deserialized_type); + assert(type); + } + else + assert(numElements == 0); + + uint64_t i = 0; + while ( i < numElements ) + { + Bucket* b = new Bucket(); + uint32_t elements_count; + v &= UNSERIALIZE(&elements_count); + v &= UNSERIALIZE(&b->count); + b->bucketPos = buckets.insert(buckets.end(), b); + + for ( uint64_t j = 0; j < elements_count; j++ ) + { + Element* e = new Element(); + v &= UNSERIALIZE(&e->epsilon); + e->value = Val::Unserialize(info, type); + e->parent = b; + + b->elements.insert(b->elements.end(), e); + + HashKey* key = GetHash(e->value); + assert (elementDict->Lookup(key) == 0); + + elementDict->Insert(key, e); + delete key; + + i++; + } + } + + assert(i == numElements); + + return v; + } + + +VectorVal* TopkVal::GetTopK(int k) const // returns vector + { + if ( numElements == 0 ) + { + reporter->Error("Cannot return topk of empty"); + return 0; + } + + TypeList* vector_index = new TypeList(type); + vector_index->Append(type->Ref()); + VectorType* v = new VectorType(vector_index); + VectorVal* t = new VectorVal(v); + + // this does no estimation if the results is correct! + // in any case - just to make this future-proof (and I am lazy) - this can return more than k. + + int read = 0; + std::list::const_iterator it = buckets.end(); + it--; + while (read < k ) + { + //printf("Bucket %llu\n", (*it)->count); + std::list::iterator eit = (*it)->elements.begin(); + while ( eit != (*it)->elements.end() ) + { + //printf("Size: %ld\n", (*it)->elements.size()); + t->Assign(read, (*eit)->value->Ref()); + read++; + eit++; + } + + if ( it == buckets.begin() ) + break; + + it--; + } + + Unref(v); + return t; + } + +uint64_t TopkVal::GetCount(Val* value) const + { + HashKey* key = GetHash(value); + Element* e = (Element*) elementDict->Lookup(key); + + if ( e == 0 ) + { + reporter->Error("GetCount for element that is not in top-k"); + return 0; + } + + delete key; + return e->parent->count; + } + +uint64_t TopkVal::GetEpsilon(Val* value) const + { + HashKey* key = GetHash(value); + Element* e = (Element*) elementDict->Lookup(key); + + if ( e == 0 ) + { + reporter->Error("GetEpsilon for element that is not in top-k"); + return 0; + } + + delete key; + return e->epsilon; + } + +uint64_t TopkVal::GetSum() const + { + uint64_t sum = 0; + + std::list::const_iterator it = buckets.begin(); + while ( it != buckets.end() ) + { + sum += (*it)->elements.size() * (*it)->count; + + it++; + } + + if ( pruned ) + reporter->Warning("TopkVal::GetSum() was used on a pruned data structure. Result values do not represent total element count"); + + return sum; + } + +void TopkVal::Encountered(Val* encountered) + { + // ok, let's see if we already know this one. + + if ( numElements == 0 ) + Typify(encountered->Type()); + else + if ( ! same_type(type, encountered->Type()) ) + { + reporter->Error("Trying to add element to topk with differing type from other elements"); + return; + } + + // Step 1 - get the hash. + HashKey* key = GetHash(encountered); + Element* e = (Element*) elementDict->Lookup(key); + + if ( e == 0 ) + { + e = new Element(); + e->epsilon = 0; + e->value = encountered->Ref(); // or no ref? + + // well, we do not know this one yet... + if ( numElements < size ) + { + // brilliant. just add it at position 1 + if ( buckets.size() == 0 || (*buckets.begin())->count > 1 ) + { + Bucket* b = new Bucket(); + b->count = 1; + std::list::iterator pos = buckets.insert(buckets.begin(), b); + b->bucketPos = pos; + b->elements.insert(b->elements.end(), e); + e->parent = b; + } + else + { + Bucket* b = *buckets.begin(); + assert(b->count == 1); + b->elements.insert(b->elements.end(), e); + e->parent = b; + } + + elementDict->Insert(key, e); + numElements++; + delete key; + + return; // done. it is at pos 1. + } + + else + { + // replace element with min-value + Bucket* b = *buckets.begin(); // bucket with smallest elements + + // evict oldest element with least hits. + assert(b->elements.size() > 0); + HashKey* deleteKey = GetHash((*(b->elements.begin()))->value); + b->elements.erase(b->elements.begin()); + Element* deleteElement = (Element*) elementDict->RemoveEntry(deleteKey); + assert(deleteElement); // there has to have been a minimal element... + delete deleteElement; + delete deleteKey; + + // and add the new one to the end + e->epsilon = b->count; + b->elements.insert(b->elements.end(), e); + elementDict->Insert(key, e); + e->parent = b; + + // fallthrough, increment operation has to run! + } + + } + + // ok, we now have an element in e + delete key; + IncrementCounter(e); // well, this certainly was anticlimatic. + } + +// increment by count +void TopkVal::IncrementCounter(Element* e, unsigned int count) + { + Bucket* currBucket = e->parent; + uint64 currcount = currBucket->count; + + // well, let's test if there is a bucket for currcount++ + std::list::iterator bucketIter = currBucket->bucketPos; + + Bucket* nextBucket = 0; + + bucketIter++; + + while ( bucketIter != buckets.end() && (*bucketIter)->count < currcount+count ) + bucketIter++; + + if ( bucketIter != buckets.end() && (*bucketIter)->count == currcount+count ) + nextBucket = *bucketIter; + + if ( nextBucket == 0 ) + { + // the bucket for the value that we want does not exist. + // create it... + + Bucket* b = new Bucket(); + b->count = currcount+count; + + std::list::iterator nextBucketPos = buckets.insert(bucketIter, b); + b->bucketPos = nextBucketPos; // and give it the iterator we know now. + + nextBucket = b; + } + + // ok, now we have the new bucket in nextBucket. Shift the element over... + currBucket->elements.remove(e); + nextBucket->elements.insert(nextBucket->elements.end(), e); + + e->parent = nextBucket; + + // if currBucket is empty, we have to delete it now + if ( currBucket->elements.size() == 0 ) + { + buckets.remove(currBucket); + delete currBucket; + currBucket = 0; + } + } + +}; diff --git a/src/probabilistic/Topk.h b/src/probabilistic/Topk.h new file mode 100644 index 0000000000..a9a0d80818 --- /dev/null +++ b/src/probabilistic/Topk.h @@ -0,0 +1,170 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +#ifndef topk_h +#define topk_h + +#include +#include "Val.h" +#include "CompHash.h" +#include "OpaqueVal.h" + +// This class implements the top-k algorithm. Or - to be more precise - an +// interpretation of it. + +namespace probabilistic { + +struct Element; + +struct Bucket { + uint64 count; + std::list elements; + + // Iterators only get invalidated for removed elements. This one + // points to us - so it is invalid when we are no longer there. Cute, + // isn't it? + std::list::iterator bucketPos; +}; + +struct Element { + uint64 epsilon; + Val* value; + Bucket* parent; + + ~Element(); +}; + +declare(PDict, Element); + +class TopkVal : public OpaqueVal { + +public: + /** + * Construct a TopkVal. + * + * @param size specifies how many total elements are tracked + * + * @return A newly initialized TopkVal + */ + TopkVal(uint64 size); + + /** + * Destructor. + */ + ~TopkVal(); + + /** + * Call this when a new value is encountered. Note that on the first + * call, the Bro type of the value types that are counted is set. All + * following calls to encountered have to specify the same type. + * + * @param value The encountered element + */ + void Encountered(Val* value); + + /** + * Get the first *k* elements of the result vector. At the moment, + * this does not check if it is in the right order or if we can prove + * that these are the correct top-k. Use count and epsilon for this. + * + * @param k Number of top-elements to return + * + * @returns The top-k encountered elements + */ + VectorVal* GetTopK(int k) const; + + /** + * Get the current count tracked in the top-k data structure for a + * certain val. Returns 0 if the val is unknown (and logs the error + * to reporter). + * + * @param value Bro value to get counts for + * + * @returns internal count for val, 0 if unknown + */ + uint64_t GetCount(Val* value) const; + + /** + * Get the current epsilon tracked in the top-k data structure for a + * certain val. + * + * @param value Bro value to get epsilons for + * + * @returns the epsilon. Returns 0 if the val is unknown (and logs + * the error to reporter) + */ + uint64_t GetEpsilon(Val* value) const; + + /** + * Get the size set in the constructor + * + * @returns size of the top-k structure + */ + uint64_t GetSize() const { return size; } + + /** + * Get the sum of all counts of all tracked elements. This is equal + * to the number of total observations up to this moment, if no + * elements were pruned from the data structure. + * + * @returns sum of all counts + */ + uint64_t GetSum() const; + + /** + * Merge another top-k data structure into this one. doPrune + * specifies if the total count of elements is limited to size after + * merging. Please note, that pruning will invalidate the results of + * getSum. + * + * @param value TopkVal to merge into this TopkVal + * + * @param doPrune prune resulting TopkVal to size after merging + */ + void Merge(const TopkVal* value, bool doPrune=false); + +protected: + /** + * Construct an empty TopkVal. Only used for deserialization + */ + TopkVal(); + +private: + /** + * Increment the counter for a specific element + * + * @param e element to increment counter for + * + * @param count increment counter by this much + */ + void IncrementCounter(Element* e, unsigned int count = 1); + + /** + * get the hashkey for a specific value + * + * @param v value to generate key for + * + * @returns HashKey for value + */ + HashKey* GetHash(Val* v) const; // this probably should go somewhere else. + + /** + * Set the type that this TopK instance tracks + * + * @param t type that is tracked + */ + void Typify(BroType* t); + + BroType* type; + CompositeHash* hash; + std::list buckets; + PDict(Element)* elementDict; + uint64 size; // how many elements are we tracking? + uint64 numElements; // how many elements do we have at the moment + bool pruned; // was this data structure pruned? + + DECLARE_SERIAL(TopkVal); +}; + +}; + +#endif diff --git a/src/probabilistic/bloom-filter.bif b/src/probabilistic/bloom-filter.bif index a3567ad6f7..6994f651dd 100644 --- a/src/probabilistic/bloom-filter.bif +++ b/src/probabilistic/bloom-filter.bif @@ -20,23 +20,20 @@ module GLOBAL; ## Creates a basic Bloom filter. ## -## .. note:: A Bloom filter can have a name associated with it. In the future, -## Bloom filters with the same name will be compatible across indepedent Bro -## instances, i.e., it will be possible to merge them. Currently, however, that is -## not yet supported. -## ## fp: The desired false-positive rate. ## ## capacity: the maximum number of elements that guarantees a false-positive ## rate of *fp*. ## ## name: A name that uniquely identifies and seeds the Bloom filter. If empty, -## the filter will remain tied to the current Bro process. +## the filter will use :bro:id:`global_hash_seed` if that's set, and otherwise use +## a local seed tied to the current Bro process. Only filters with the same seed +## can be merged with :bro:id:`bloomfilter_merge` . ## ## Returns: A Bloom filter handle. ## -## .. bro:see:: bloomfilter_counting_init bloomfilter_add bloomfilter_lookup -## bloomfilter_clear bloomfilter_merge +## .. bro:see:: bloomfilter_basic_init2 bloomfilter_counting_init bloomfilter_add +## bloomfilter_lookup bloomfilter_clear bloomfilter_merge global_hash_seed function bloomfilter_basic_init%(fp: double, capacity: count, name: string &default=""%): opaque of bloomfilter %{ @@ -48,18 +45,53 @@ function bloomfilter_basic_init%(fp: double, capacity: count, size_t cells = BasicBloomFilter::M(fp, capacity); size_t optimal_k = BasicBloomFilter::K(cells, capacity); - const Hasher* h = new DefaultHasher(optimal_k, name->CheckString()); + size_t seed = Hasher::MakeSeed(name->Len() > 0 ? name->Bytes() : 0, + name->Len()); + const Hasher* h = new DefaultHasher(optimal_k, seed); + + return new BloomFilterVal(new BasicBloomFilter(h, cells)); + %} + +## Creates a basic Bloom filter. This function serves as a low-level +## alternative to bloomfilter_basic_init where the user has full control over +## the number of hash functions and cells in the underlying bit vector. +## +## k: The number of hash functions to use. +## +## cells: The number of cells of the underlying bit vector. +## +## name: A name that uniquely identifies and seeds the Bloom filter. If empty, +## the filter will use :bro:id:`global_hash_seed` if that's set, and otherwise use +## a local seed tied to the current Bro process. Only filters with the same seed +## can be merged with :bro:id:`bloomfilter_merge` . +## +## Returns: A Bloom filter handle. +## +## .. bro:see:: bloom_filter_basic_init bloomfilter_counting_init bloomfilter_add +## bloomfilter_lookup bloomfilter_clear bloomfilter_merge global_hash_seed +function bloomfilter_basic_init2%(k: count, cells: count, + name: string &default=""%): opaque of bloomfilter + %{ + if ( k == 0 ) + { + reporter->Error("number of hash functions must be non-negative"); + return 0; + } + if ( cells == 0 ) + { + reporter->Error("number of cells must be non-negative"); + return 0; + } + + size_t seed = Hasher::MakeSeed(name->Len() > 0 ? name->Bytes() : 0, + name->Len()); + const Hasher* h = new DefaultHasher(k, seed); return new BloomFilterVal(new BasicBloomFilter(h, cells)); %} ## Creates a counting Bloom filter. ## -## .. note:: A Bloom filter can have a name associated with it. In the future, -## Bloom filters with the same name will be compatible across indepedent Bro -## instances, i.e., it will be possible to merge them. Currently, however, that is -## not yet supported. -## ## k: The number of hash functions to use. ## ## cells: The number of cells of the underlying counter vector. As there's no @@ -71,12 +103,14 @@ function bloomfilter_basic_init%(fp: double, capacity: count, ## becomes a cell of size *w* bits. ## ## name: A name that uniquely identifies and seeds the Bloom filter. If empty, -## the filter will remain tied to the current Bro process. +## the filter will use :bro:id:`global_hash_seed` if that's set, and otherwise use +## a local seed tied to the current Bro process. Only filters with the same seed +## can be merged with :bro:id:`bloomfilter_merge` . ## ## Returns: A Bloom filter handle. ## -## .. bro:see:: bloomfilter_basic_init bloomfilter_add bloomfilter_lookup -## bloomfilter_clear bloomfilter_merge +## .. bro:see:: bloomfilter_basic_init bloomfilter_basic_init2 bloomfilter_add +## bloomfilter_lookup bloomfilter_clear bloomfilter_merge global_hash_seed function bloomfilter_counting_init%(k: count, cells: count, max: count, name: string &default=""%): opaque of bloomfilter %{ @@ -86,7 +120,10 @@ function bloomfilter_counting_init%(k: count, cells: count, max: count, return 0; } - const Hasher* h = new DefaultHasher(k, name->CheckString()); + size_t seed = Hasher::MakeSeed(name->Len() > 0 ? name->Bytes() : 0, + name->Len()); + + const Hasher* h = new DefaultHasher(k, seed); uint16 width = 1; while ( max >>= 1 ) @@ -101,8 +138,9 @@ function bloomfilter_counting_init%(k: count, cells: count, max: count, ## ## x: The element to add. ## -## .. bro:see:: bloomfilter_counting_init bloomfilter_basic_init loomfilter_lookup -## bloomfilter_clear bloomfilter_merge +## .. bro:see:: bloomfilter_basic_init bloomfilter_basic_init2 +## bloomfilter_counting_init bloomfilter_lookup bloomfilter_clear +## bloomfilter_merge function bloomfilter_add%(bf: opaque of bloomfilter, x: any%): any %{ BloomFilterVal* bfv = static_cast(bf); @@ -127,8 +165,9 @@ function bloomfilter_add%(bf: opaque of bloomfilter, x: any%): any ## ## Returns: the counter associated with *x* in *bf*. ## -## .. bro:see:: bloomfilter_counting_init bloomfilter_basic_init -## bloomfilter_add bloomfilter_clear bloomfilter_merge +## .. bro:see:: bloomfilter_basic_init bloomfilter_basic_init2 +## bloomfilter_counting_init bloomfilter_add bloomfilter_clear +## bloomfilter_merge function bloomfilter_lookup%(bf: opaque of bloomfilter, x: any%): count %{ const BloomFilterVal* bfv = static_cast(bf); @@ -154,8 +193,9 @@ function bloomfilter_lookup%(bf: opaque of bloomfilter, x: any%): count ## ## bf: The Bloom filter handle. ## -## .. bro:see:: bloomfilter_counting_init bloomfilter_basic_init -## bloomfilter_add bloomfilter_lookup bloomfilter_merge +## .. bro:see:: bloomfilter_basic_init bloomfilter_counting_init2 +## bloomfilter_counting_init bloomfilter_add bloomfilter_lookup +## bloomfilter_merge function bloomfilter_clear%(bf: opaque of bloomfilter%): any %{ BloomFilterVal* bfv = static_cast(bf); @@ -178,15 +218,18 @@ function bloomfilter_clear%(bf: opaque of bloomfilter%): any ## ## Returns: The union of *bf1* and *bf2*. ## -## .. bro:see:: bloomfilter_counting_init bloomfilter_basic_init -## bloomfilter_add bloomfilter_lookup bloomfilter_clear +## .. bro:see:: bloomfilter_basic_init bloomfilter_basic_init2 +## bloomfilter_counting_init bloomfilter_add bloomfilter_lookup +## bloomfilter_clear function bloomfilter_merge%(bf1: opaque of bloomfilter, bf2: opaque of bloomfilter%): opaque of bloomfilter %{ const BloomFilterVal* bfv1 = static_cast(bf1); const BloomFilterVal* bfv2 = static_cast(bf2); - if ( ! same_type(bfv1->Type(), bfv2->Type()) ) + if ( bfv1->Type() && // any one 0 is ok here + bfv2->Type() && + ! same_type(bfv1->Type(), bfv2->Type()) ) { reporter->Error("incompatible Bloom filter types"); return 0; @@ -194,3 +237,13 @@ function bloomfilter_merge%(bf1: opaque of bloomfilter, return BloomFilterVal::Merge(bfv1, bfv2); %} + +## Returns a string with a representation of a Bloom filter's internal +## state. This is for debugging/testing purposes only. +## +## bf: The Bloom filter handle. +function bloomfilter_internal_state%(bf: opaque of bloomfilter%): string + %{ + BloomFilterVal* bfv = static_cast(bf); + return new StringVal(bfv->InternalState()); + %} diff --git a/src/probabilistic/top-k.bif b/src/probabilistic/top-k.bif new file mode 100644 index 0000000000..9ad36cadef --- /dev/null +++ b/src/probabilistic/top-k.bif @@ -0,0 +1,184 @@ +# =========================================================================== +# +# Top-K Functions +# +# =========================================================================== + + +%%{ +#include "probabilistic/Topk.h" +%%} + +## Creates a top-k data structure which tracks *size* elements. +## +## size: number of elements to track +## +## Returns: Opaque pointer to the data structure. +## +## .. bro:see:: topk_add topk_get_top topk_count topk_epsilon +## topk_size topk_sum topk_merge topk_merge_prune +function topk_init%(size: count%): opaque of topk + %{ + probabilistic::TopkVal* v = new probabilistic::TopkVal(size); + return v; + %} + +## Add a new observed object to the data structure. +## +## .. note:: The first added object sets the type of data tracked by +## the top-k data structure. All following values have to be of the same +## type. +## +## handle: the TopK handle +## +## value: observed value +## +## .. bro:see:: topk_init topk_get_top topk_count topk_epsilon +## topk_size topk_sum topk_merge topk_merge_prune +function topk_add%(handle: opaque of topk, value: any%): any + %{ + assert(handle); + probabilistic::TopkVal* h = (probabilistic::TopkVal*) handle; + h->Encountered(value); + + return 0; + %} + +## Get the first *k* elements of the top-k data structure. +## +## handle: the TopK handle +## +## k: number of elements to return +## +## Returns: vector of the first k elements +## +## .. bro:see:: topk_init topk_add topk_count topk_epsilon +## topk_size topk_sum topk_merge topk_merge_prune +function topk_get_top%(handle: opaque of topk, k: count%): any + %{ + assert(handle); + probabilistic::TopkVal* h = (probabilistic::TopkVal*) handle; + return h->GetTopK(k); + %} + +## Get an overestimated count of how often value has been encountered. +## +## .. note:: value has to be part of the currently tracked elements, otherwise +## 0 will be returned and an error message will be added to reporter. +## +## handle: the TopK handle +## +## value: Value to look up count for. +## +## Returns: Overestimated number for how often the element has been encountered +## +## .. bro:see:: topk_init topk_add topk_get_top topk_epsilon +## topk_size topk_sum topk_merge topk_merge_prune +function topk_count%(handle: opaque of topk, value: any%): count + %{ + assert(handle); + probabilistic::TopkVal* h = (probabilistic::TopkVal*) handle; + return new Val(h->GetCount(value), TYPE_COUNT); + %} + +## Get the maximal overestimation for count. +## +## .. note:: Same restrictions as for :bro:id:`topk_count` apply. +## +## handle: the TopK handle +## +## value: Value to look up epsilon for. +## +## Returns: Number which represents the maximal overesimation for the count of this element. +## +## .. bro:see:: topk_init topk_add topk_get_top topk_count +## topk_size topk_sum topk_merge topk_merge_prune +function topk_epsilon%(handle: opaque of topk, value: any%): count + %{ + assert(handle); + probabilistic::TopkVal* h = (probabilistic::TopkVal*) handle; + return new Val(h->GetEpsilon(value), TYPE_COUNT); + %} + +## Get the number of elements this data structure is supposed to track (given on init). +## +## .. note ::Note that the actual number of elements in the data structure can be lower +## or higher (due to non-pruned merges) than this. +## +## handle: the TopK handle +## +## Returns: size given during initialization +## +## .. bro:see:: topk_init topk_add topk_get_top topk_count topk_epsilon +## topk_sum topk_merge topk_merge_prune +function topk_size%(handle: opaque of topk%): count + %{ + assert(handle); + probabilistic::TopkVal* h = (probabilistic::TopkVal*) handle; + return new Val(h->GetSize(), TYPE_COUNT); + %} + +## Get the sum of all counts of all elements in the data structure. +## +## .. note:: This is equal to the number of all inserted objects if the data structure +## never has been pruned. Do not use after calling topk_merge_prune (will throw a +## warning message if used afterwards) +## +## handle: the TopK handle +## +## Returns: sum of all counts +## +## .. bro:see:: topk_init topk_add topk_get_top topk_count topk_epsilon +## topk_size topk_merge topk_merge_prune +function topk_sum%(handle: opaque of topk%): count + %{ + assert(handle); + probabilistic::TopkVal* h = (probabilistic::TopkVal*) handle; + return new Val(h->GetSum(), TYPE_COUNT); + %} + +## Merge the second topk data structure into the first. +## +## .. note:: This does not remove any elements, the resulting data structure can be +## bigger than the maximum size given on initialization. +## +## .. bro:see:: topk_init topk_add topk_get_top topk_count topk_epsilon +## topk_size topk_sum topk_merge_prune +function topk_merge%(handle1: opaque of topk, handle2: opaque of topk%): any + %{ + assert(handle1); + assert(handle2); + + probabilistic::TopkVal* h1 = (probabilistic::TopkVal*) handle1; + probabilistic::TopkVal* h2 = (probabilistic::TopkVal*) handle2; + + h1->Merge(h2); + + return 0; + %} + +## Merge the second topk data structure into the first and prunes the final data +## structure back to the size given on initialization. +## +## .. note:: Use with care and only when being aware of the restrictions this +## entails. Do not call :bro:id:`topk_size` or :bro:id:`topk_add` afterwards, +## results will probably not be what you expect. +## +## handle1: the TopK handle in which the second TopK structure is merged +## +## handle2: the TopK handle in which is merged into the first TopK structure +## +## .. bro:see:: topk_init topk_add topk_get_top topk_count topk_epsilon +## topk_size topk_sum topk_merge +function topk_merge_prune%(handle1: opaque of topk, handle2: opaque of topk%): any + %{ + assert(handle1); + assert(handle2); + + probabilistic::TopkVal* h1 = (probabilistic::TopkVal*) handle1; + probabilistic::TopkVal* h2 = (probabilistic::TopkVal*) handle2; + + h1->Merge(h2, true); + + return 0; + %} diff --git a/testing/btest/Baseline/bifs.bloomfilter-seed/output b/testing/btest/Baseline/bifs.bloomfilter-seed/output new file mode 100644 index 0000000000..533085900f --- /dev/null +++ b/testing/btest/Baseline/bifs.bloomfilter-seed/output @@ -0,0 +1,8 @@ +bf1, global_seed, 11979365913534242684 +bf2, global_seed, 12550100962110750449 +bf3, my_seed, 12550100962110750449 +bf4, my_seed, 945716460325754659 +bf1, global_seed, 12550100962110750449 +bf2, global_seed, 945716460325754659 +bf3, my_seed, 12550100962110750449 +bf4, my_seed, 945716460325754659 diff --git a/testing/btest/Baseline/bifs.bloomfilter/output b/testing/btest/Baseline/bifs.bloomfilter/output index 14e1f038c0..82414f0686 100644 --- a/testing/btest/Baseline/bifs.bloomfilter/output +++ b/testing/btest/Baseline/bifs.bloomfilter/output @@ -12,6 +12,9 @@ error: false-positive rate must take value between 0 and 1 1 1 1 +0, no fp +1 +1 1 1 1 diff --git a/testing/btest/Baseline/bifs.topk/.stderr b/testing/btest/Baseline/bifs.topk/.stderr new file mode 100644 index 0000000000..a711333fc0 --- /dev/null +++ b/testing/btest/Baseline/bifs.topk/.stderr @@ -0,0 +1,11 @@ +error: GetCount for element that is not in top-k +error: GetEpsilon for element that is not in top-k +error: GetCount for element that is not in top-k +error: GetEpsilon for element that is not in top-k +error: GetCount for element that is not in top-k +error: GetEpsilon for element that is not in top-k +error: GetCount for element that is not in top-k +error: GetEpsilon for element that is not in top-k +warning: TopkVal::GetSum() was used on a pruned data structure. Result values do not represent total element count +error: GetCount for element that is not in top-k +error: GetEpsilon for element that is not in top-k diff --git a/testing/btest/Baseline/bifs.topk/out b/testing/btest/Baseline/bifs.topk/out new file mode 100644 index 0000000000..1ce5c4b850 --- /dev/null +++ b/testing/btest/Baseline/bifs.topk/out @@ -0,0 +1,81 @@ +[b, c] +4 +0 +0 +2 +0 +2 +1 +[d, c] +5 +0 +0 +2 +1 +3 +2 +[d, e] +6 +3 +2 +3 +2 +[f, e] +7 +4 +3 +3 +2 +[f, e] +8 +4 +3 +4 +2 +[g, e] +9 +0 +0 +4 +2 +5 +4 +[c, e, d] +19 +6 +0 +5 +0 +4 +0 +[c, e] +6 +0 +5 +0 +0 +0 +[c, e] +22 +12 +0 +10 +0 +0 +0 +[c, e] +19 +6 +0 +5 +0 +4 +0 +[c, e, d] +38 +12 +0 +10 +0 +8 +0 diff --git a/testing/btest/Baseline/core.print-bpf-filters/output2 b/testing/btest/Baseline/core.print-bpf-filters/output2 index 99ad929fbf..d7d8c8b05b 100644 --- a/testing/btest/Baseline/core.print-bpf-filters/output2 +++ b/testing/btest/Baseline/core.print-bpf-filters/output2 @@ -26,6 +26,8 @@ 1 6667 1 6668 1 6669 +1 67 +1 68 1 80 1 8000 1 8080 @@ -36,8 +38,8 @@ 1 992 1 993 1 995 -40 and -39 or -40 port +42 and +41 or +42 port 31 tcp -9 udp +11 udp diff --git a/testing/btest/Baseline/core.tunnels.teredo/conn.log b/testing/btest/Baseline/core.tunnels.teredo/conn.log index b71e56f073..8bb55e11d2 100644 --- a/testing/btest/Baseline/core.tunnels.teredo/conn.log +++ b/testing/btest/Baseline/core.tunnels.teredo/conn.log @@ -3,7 +3,7 @@ #empty_field (empty) #unset_field - #path conn -#open 2008-05-16-15-50-57 +#open 2013-08-04-03-28-45 #fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p proto service duration orig_bytes resp_bytes conn_state local_orig missed_bytes history orig_pkts orig_ip_bytes resp_pkts resp_ip_bytes tunnel_parents #types time string addr port addr port enum string interval count count string bool count string count count count count table[string] 1210953047.736921 arKYeMETxOg 192.168.2.16 1576 75.126.130.163 80 tcp - 0.000357 0 0 SHR - 0 fA 1 40 1 40 (empty) @@ -21,10 +21,10 @@ 1210953074.570439 c4Zw9TmAE05 192.168.2.16 1580 67.228.110.120 80 tcp http 0.466677 469 3916 SF - 0 ShADadFf 7 757 6 4164 (empty) 1210953052.202579 nQcgTWjvg4c 192.168.2.16 3797 65.55.158.80 3544 udp teredo 8.928880 129 48 SF - 0 Dd 2 185 1 76 (empty) 1210953060.829233 GSxOnSLghOa 192.168.2.16 3797 83.170.1.38 32900 udp teredo 13.293994 2359 11243 SF - 0 Dd 12 2695 13 11607 (empty) -1210953058.933954 iE6yhOq3SF 0.0.0.0 68 255.255.255.255 67 udp - - - - S0 - 0 D 1 328 0 0 (empty) +1210953058.933954 iE6yhOq3SF 0.0.0.0 68 255.255.255.255 67 udp dhcp - - - S0 - 0 D 1 328 0 0 (empty) 1210953052.324629 TEfuqmmG4bh 192.168.2.16 3797 65.55.158.81 3544 udp - - - - SHR - 0 d 0 0 1 137 (empty) 1210953046.591933 UWkUyAuUGXf 192.168.2.16 138 192.168.2.255 138 udp - 28.448321 416 0 S0 - 0 D 2 472 0 0 (empty) 1210953052.324629 FrJExwHcSal fe80::8000:f227:bec8:61af 134 fe80::8000:ffff:ffff:fffd 133 icmp - - - - OTH - 0 - 1 88 0 0 TEfuqmmG4bh 1210953060.829303 qCaWGmzFtM5 2001:0:4137:9e50:8000:f12a:b9c8:2815 128 2001:4860:0:2001::68 129 icmp - 0.463615 4 4 OTH - 0 - 1 52 1 52 GSxOnSLghOa,nQcgTWjvg4c 1210953052.202579 j4u32Pc5bif fe80::8000:ffff:ffff:fffd 133 ff02::2 134 icmp - - - - OTH - 0 - 1 64 0 0 nQcgTWjvg4c -#close 2008-05-16-15-51-16 +#close 2013-08-04-03-28-45 diff --git a/testing/btest/Baseline/coverage.bare-load-baseline/canonified_loaded_scripts.log b/testing/btest/Baseline/coverage.bare-load-baseline/canonified_loaded_scripts.log index e65b72a30b..6f85862bd7 100644 --- a/testing/btest/Baseline/coverage.bare-load-baseline/canonified_loaded_scripts.log +++ b/testing/btest/Baseline/coverage.bare-load-baseline/canonified_loaded_scripts.log @@ -3,7 +3,7 @@ #empty_field (empty) #unset_field - #path loaded_scripts -#open 2013-07-29-22-37-52 +#open 2013-08-04-03-27-22 #fields name #types string scripts/base/init-bare.bro @@ -91,6 +91,7 @@ scripts/base/init-bare.bro scripts/base/utils/site.bro scripts/base/utils/patterns.bro build/scripts/base/bif/__load__.bro + build/scripts/base/bif/top-k.bif.bro scripts/policy/misc/loaded-scripts.bro scripts/base/utils/paths.bro -#close 2013-07-29-22-37-52 +#close 2013-08-04-03-27-22 diff --git a/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log b/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log index dbbf689185..0d9a490080 100644 --- a/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log +++ b/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log @@ -3,7 +3,7 @@ #empty_field (empty) #unset_field - #path loaded_scripts -#open 2013-07-29-22-37-53 +#open 2013-08-04-03-27-23 #fields name #types string scripts/base/init-bare.bro @@ -91,6 +91,7 @@ scripts/base/init-bare.bro scripts/base/utils/site.bro scripts/base/utils/patterns.bro build/scripts/base/bif/__load__.bro + build/scripts/base/bif/top-k.bif.bro scripts/base/init-default.bro scripts/base/utils/active-http.bro scripts/base/utils/exec.bro @@ -147,6 +148,7 @@ scripts/base/init-default.bro scripts/base/frameworks/sumstats/plugins/std-dev.bro scripts/base/frameworks/sumstats/plugins/variance.bro scripts/base/frameworks/sumstats/plugins/sum.bro + scripts/base/frameworks/sumstats/plugins/topk.bro scripts/base/frameworks/sumstats/plugins/unique.bro scripts/base/frameworks/sumstats/non-cluster.bro scripts/base/frameworks/tunnels/__load__.bro @@ -156,11 +158,16 @@ scripts/base/init-default.bro scripts/base/protocols/conn/contents.bro scripts/base/protocols/conn/inactivity.bro scripts/base/protocols/conn/polling.bro + scripts/base/protocols/dhcp/__load__.bro + scripts/base/protocols/dhcp/consts.bro + scripts/base/protocols/dhcp/main.bro + scripts/base/protocols/dhcp/utils.bro scripts/base/protocols/dns/__load__.bro scripts/base/protocols/dns/consts.bro scripts/base/protocols/dns/main.bro scripts/base/protocols/ftp/__load__.bro scripts/base/protocols/ftp/utils-commands.bro + scripts/base/protocols/ftp/info.bro scripts/base/protocols/ftp/main.bro scripts/base/protocols/ftp/utils.bro scripts/base/protocols/ftp/files.bro @@ -201,4 +208,4 @@ scripts/base/init-default.bro scripts/base/files/extract/main.bro scripts/base/misc/find-checksum-offloading.bro scripts/policy/misc/loaded-scripts.bro -#close 2013-07-29-22-37-53 +#close 2013-08-04-03-27-23 diff --git a/testing/btest/Baseline/istate.topk/out b/testing/btest/Baseline/istate.topk/out new file mode 100644 index 0000000000..ef3d0cef30 --- /dev/null +++ b/testing/btest/Baseline/istate.topk/out @@ -0,0 +1,21 @@ +1 +2 +6 +4 +5 +1 +[c, e, d] +1 +2 +6 +4 +5 +1 +[c, e, d] +2 +4 +12 +8 +10 +2 +[c, e, d] diff --git a/testing/btest/Baseline/scripts.base.frameworks.sumstats.cluster-intermediate-update/manager-1..stdout b/testing/btest/Baseline/scripts.base.frameworks.sumstats.cluster-intermediate-update/manager-1..stdout index a5428dd3b7..810cdb0ae8 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.sumstats.cluster-intermediate-update/manager-1..stdout +++ b/testing/btest/Baseline/scripts.base.frameworks.sumstats.cluster-intermediate-update/manager-1..stdout @@ -1,3 +1,3 @@ A test metric threshold was crossed with a value of: 101.0 -End of epoch handler was called 101.0 +End of epoch handler was called diff --git a/testing/btest/Baseline/scripts.base.frameworks.sumstats.on-demand-cluster/manager-1..stdout b/testing/btest/Baseline/scripts.base.frameworks.sumstats.on-demand-cluster/manager-1..stdout new file mode 100644 index 0000000000..0445fc68b2 --- /dev/null +++ b/testing/btest/Baseline/scripts.base.frameworks.sumstats.on-demand-cluster/manager-1..stdout @@ -0,0 +1,2 @@ +SumStat key request + Host: 7.2.1.5 -> 145 diff --git a/testing/btest/Baseline/scripts.base.frameworks.sumstats.on-demand/.stdout b/testing/btest/Baseline/scripts.base.frameworks.sumstats.on-demand/.stdout new file mode 100644 index 0000000000..7d62edb7f7 --- /dev/null +++ b/testing/btest/Baseline/scripts.base.frameworks.sumstats.on-demand/.stdout @@ -0,0 +1,2 @@ +Key request for 1.2.3.4 + Host: 1.2.3.4 -> 42 diff --git a/testing/btest/Baseline/scripts.base.frameworks.sumstats.sample-cluster/manager-1..stdout b/testing/btest/Baseline/scripts.base.frameworks.sumstats.sample-cluster/manager-1..stdout new file mode 100644 index 0000000000..2eb4687e41 --- /dev/null +++ b/testing/btest/Baseline/scripts.base.frameworks.sumstats.sample-cluster/manager-1..stdout @@ -0,0 +1,8 @@ +Host: 6.5.4.3 Sampled observations: 2 + [2, 5] +Host: 10.10.10.10 Sampled observations: 1 + [5] +Host: 1.2.3.4 Sampled observations: 34 + [5, 22, 52, 91, 94] +Host: 7.2.1.5 Sampled observations: 2 + [1, 91] diff --git a/testing/btest/Baseline/scripts.base.frameworks.sumstats.sample-cluster/out b/testing/btest/Baseline/scripts.base.frameworks.sumstats.sample-cluster/out deleted file mode 100644 index 2451b82f45..0000000000 --- a/testing/btest/Baseline/scripts.base.frameworks.sumstats.sample-cluster/out +++ /dev/null @@ -1,18 +0,0 @@ -1 -1.2.3.4 -10.10.10.10 -2 -2 -34 -6.5.4.3 -7.2.1.5 -[num=1, dbl=, str=] -[num=2, dbl=, str=] -[num=22, dbl=, str=] -[num=5, dbl=, str=] -[num=5, dbl=, str=] -[num=5, dbl=, str=] -[num=52, dbl=, str=] -[num=91, dbl=, str=] -[num=91, dbl=, str=] -[num=94, dbl=, str=] diff --git a/testing/btest/Baseline/scripts.base.frameworks.sumstats.topk-cluster/manager-1..stdout b/testing/btest/Baseline/scripts.base.frameworks.sumstats.topk-cluster/manager-1..stdout new file mode 100644 index 0000000000..2d076eeac7 --- /dev/null +++ b/testing/btest/Baseline/scripts.base.frameworks.sumstats.topk-cluster/manager-1..stdout @@ -0,0 +1,9 @@ +Top entries for key counter +Num: 995, count: 100, epsilon: 0 +Num: 1, count: 99, epsilon: 0 +Num: 2, count: 98, epsilon: 0 +Num: 3, count: 97, epsilon: 0 +Num: 4, count: 96, epsilon: 0 +Top entries for key two +Num: 2, count: 4, epsilon: 0 +Num: 1, count: 3, epsilon: 0 diff --git a/testing/btest/Baseline/scripts.base.frameworks.sumstats.topk/.stdout b/testing/btest/Baseline/scripts.base.frameworks.sumstats.topk/.stdout new file mode 100644 index 0000000000..c85316eecc --- /dev/null +++ b/testing/btest/Baseline/scripts.base.frameworks.sumstats.topk/.stdout @@ -0,0 +1,8 @@ +Top entries for key counter +Num: 1, count: 99, epsilon: 0 +Num: 2, count: 98, epsilon: 0 +Num: 3, count: 97, epsilon: 0 +Num: 4, count: 96, epsilon: 0 +Num: 5, count: 95, epsilon: 0 +Top entries for key two +Num: 1, count: 2, epsilon: 0 diff --git a/testing/btest/Baseline/scripts.base.protocols.dhcp.dhcp-all-msg-types/dhcp.log b/testing/btest/Baseline/scripts.base.protocols.dhcp.dhcp-all-msg-types/dhcp.log new file mode 100644 index 0000000000..b52d455a4a --- /dev/null +++ b/testing/btest/Baseline/scripts.base.protocols.dhcp.dhcp-all-msg-types/dhcp.log @@ -0,0 +1,10 @@ +#separator \x09 +#set_separator , +#empty_field (empty) +#unset_field - +#path dhcp +#open 2013-07-31-21-00-49 +#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p mac assigned_ip lease_time trans_id +#types time string addr port addr port string addr interval count +1370200444.371332 nQcgTWjvg4c 128.2.6.189 68 128.2.6.152 67 90:b1:1c:99:49:29 128.2.6.189 900.000000 1984 +#close 2013-07-31-21-00-50 diff --git a/testing/btest/Baseline/scripts.base.protocols.dhcp.inform/dhcp.log b/testing/btest/Baseline/scripts.base.protocols.dhcp.inform/dhcp.log new file mode 100644 index 0000000000..51b4a28f9c --- /dev/null +++ b/testing/btest/Baseline/scripts.base.protocols.dhcp.inform/dhcp.log @@ -0,0 +1,10 @@ +#separator \x09 +#set_separator , +#empty_field (empty) +#unset_field - +#path dhcp +#open 2013-08-03-01-18-52 +#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p mac assigned_ip lease_time trans_id +#types time string addr port addr port string addr interval count +1374432420.191205 UWkUyAuUGXf 128.2.6.122 68 128.2.6.152 67 90:b1:1c:99:49:29 128.2.6.122 0.000000 2754407505 +#close 2013-08-03-01-18-52 diff --git a/testing/btest/Baseline/scripts.base.utils.exec/bro..stdout b/testing/btest/Baseline/scripts.base.utils.exec/bro..stdout index 5352d15d18..3cfdaafb4c 100644 --- a/testing/btest/Baseline/scripts.base.utils.exec/bro..stdout +++ b/testing/btest/Baseline/scripts.base.utils.exec/bro..stdout @@ -3,5 +3,4 @@ test1, [exit_code=0, signal_exit=F, stdout=[done, exit, stop], stderr=] -test3, [exit_code=9, signal_exit=F, stdout=[FML], stderr=, files=] test4, [exit_code=0, signal_exit=F, stdout=[hibye], stderr=, files=] diff --git a/testing/btest/Baseline/scripts.policy.protocols.dhcp.known-devices-and-hostnames.basic/known_devices.log b/testing/btest/Baseline/scripts.policy.protocols.dhcp.known-devices-and-hostnames.basic/known_devices.log new file mode 100644 index 0000000000..91d37f8950 --- /dev/null +++ b/testing/btest/Baseline/scripts.policy.protocols.dhcp.known-devices-and-hostnames.basic/known_devices.log @@ -0,0 +1,11 @@ +#separator \x09 +#set_separator , +#empty_field (empty) +#unset_field - +#path known_devices +#open 2013-07-31-21-27-41 +#fields ts mac dhcp_host_name +#types time string string +1370200443.344965 90:b1:1c:99:49:29 btest.is.cool +1374432420.186878 90:b1:1c:99:49:29 (empty) +#close 2013-07-31-21-27-41 diff --git a/testing/btest/Traces/dhcp/dhcp.trace b/testing/btest/Traces/dhcp/dhcp.trace new file mode 100644 index 0000000000..aeb00a133f Binary files /dev/null and b/testing/btest/Traces/dhcp/dhcp.trace differ diff --git a/testing/btest/Traces/dhcp/dhcp_inform.trace b/testing/btest/Traces/dhcp/dhcp_inform.trace new file mode 100644 index 0000000000..798ca84149 Binary files /dev/null and b/testing/btest/Traces/dhcp/dhcp_inform.trace differ diff --git a/testing/btest/bifs/bloomfilter-seed.bro b/testing/btest/bifs/bloomfilter-seed.bro new file mode 100644 index 0000000000..436638e2af --- /dev/null +++ b/testing/btest/bifs/bloomfilter-seed.bro @@ -0,0 +1,40 @@ +# @TEST-EXEC: bro -b %INPUT global_hash_seed="foo" >>output +# @TEST-EXEC: bro -b %INPUT global_hash_seed="my_seed" >>output +# @TEST-EXEC: btest-diff output + +type Foo: record + { + a: count; + b: string; + }; + +function test_bloom_filter() + { + local bf1 = bloomfilter_basic_init(0.9, 10); + bloomfilter_add(bf1, "foo"); + bloomfilter_add(bf1, "bar"); + + local bf2 = bloomfilter_basic_init(0.9, 10); + bloomfilter_add(bf2, Foo($a=1, $b="xx")); + bloomfilter_add(bf2, Foo($a=2, $b="yy")); + + local bf3 = bloomfilter_basic_init(0.9, 10, "my_seed"); + bloomfilter_add(bf3, "foo"); + bloomfilter_add(bf3, "bar"); + + local bf4 = bloomfilter_basic_init(0.9, 10, "my_seed"); + bloomfilter_add(bf4, Foo($a=1, $b="xx")); + bloomfilter_add(bf4, Foo($a=2, $b="yy")); + + print "bf1, global_seed", bloomfilter_internal_state(bf1); + print "bf2, global_seed", bloomfilter_internal_state(bf2); + print "bf3, my_seed", bloomfilter_internal_state(bf3); + print "bf4, my_seed", bloomfilter_internal_state(bf4); + + + } + +event bro_init() + { + test_bloom_filter(); + } diff --git a/testing/btest/bifs/bloomfilter.bro b/testing/btest/bifs/bloomfilter.bro index 3b40f29553..95455bc74c 100644 --- a/testing/btest/bifs/bloomfilter.bro +++ b/testing/btest/bifs/bloomfilter.bro @@ -15,14 +15,21 @@ function test_basic_bloom_filter() bloomfilter_add(bf_cnt, 0.5); # Type mismatch bloomfilter_add(bf_cnt, "foo"); # Type mismatch + # Alternative constructor. + local bf_dbl = bloomfilter_basic_init2(4, 10); + bloomfilter_add(bf_dbl, 4.2); + bloomfilter_add(bf_dbl, 3.14); + print bloomfilter_lookup(bf_dbl, 4.2); + print bloomfilter_lookup(bf_dbl, 3.14); + # Basic usage with strings. local bf_str = bloomfilter_basic_init(0.9, 10); bloomfilter_add(bf_str, "foo"); bloomfilter_add(bf_str, "bar"); print bloomfilter_lookup(bf_str, "foo"); print bloomfilter_lookup(bf_str, "bar"); - print bloomfilter_lookup(bf_str, "b4z"); # FP - print bloomfilter_lookup(bf_str, "quux"); # FP + print bloomfilter_lookup(bf_str, "b4zzz"), "no fp"; # FP + print bloomfilter_lookup(bf_str, "quuux"); # FP bloomfilter_add(bf_str, 0.5); # Type mismatch bloomfilter_add(bf_str, 100); # Type mismatch @@ -45,6 +52,11 @@ function test_basic_bloom_filter() print bloomfilter_lookup(bf_merged, 84); print bloomfilter_lookup(bf_merged, 100); print bloomfilter_lookup(bf_merged, 168); + + #empty filter tests + local bf_empty = bloomfilter_basic_init(0.1, 1000); + local bf_empty_merged = bloomfilter_merge(bf_merged, bf_empty); + print bloomfilter_lookup(bf_empty_merged, 42); } function test_counting_bloom_filter() diff --git a/testing/btest/bifs/topk.bro b/testing/btest/bifs/topk.bro new file mode 100644 index 0000000000..02d13c4195 --- /dev/null +++ b/testing/btest/bifs/topk.bro @@ -0,0 +1,154 @@ +# @TEST-EXEC: bro -b %INPUT > out +# @TEST-EXEC: btest-diff out +# @TEST-EXEC: btest-diff .stderr + +event bro_init() + { + local k1 = topk_init(2); + + # first - peculiarity check... + topk_add(k1, "a"); + topk_add(k1, "b"); + topk_add(k1, "b"); + topk_add(k1, "c"); + + local s = topk_get_top(k1, 5); + print s; + print topk_sum(k1); + print topk_count(k1, "a"); + print topk_epsilon(k1, "a"); + print topk_count(k1, "b"); + print topk_epsilon(k1, "b"); + print topk_count(k1, "c"); + print topk_epsilon(k1, "c"); + + topk_add(k1, "d"); + s = topk_get_top(k1, 5); + print s; + print topk_sum(k1); + print topk_count(k1, "b"); + print topk_epsilon(k1, "b"); + print topk_count(k1, "c"); + print topk_epsilon(k1, "c"); + print topk_count(k1, "d"); + print topk_epsilon(k1, "d"); + + topk_add(k1, "e"); + s = topk_get_top(k1, 5); + print s; + print topk_sum(k1); + print topk_count(k1, "d"); + print topk_epsilon(k1, "d"); + print topk_count(k1, "e"); + print topk_epsilon(k1, "e"); + + topk_add(k1, "f"); + s = topk_get_top(k1, 5); + print s; + print topk_sum(k1); + print topk_count(k1, "f"); + print topk_epsilon(k1, "f"); + print topk_count(k1, "e"); + print topk_epsilon(k1, "e"); + + topk_add(k1, "e"); + s = topk_get_top(k1, 5); + print s; + print topk_sum(k1); + print topk_count(k1, "f"); + print topk_epsilon(k1, "f"); + print topk_count(k1, "e"); + print topk_epsilon(k1, "e"); + + topk_add(k1, "g"); + s = topk_get_top(k1, 5); + print s; + print topk_sum(k1); + print topk_count(k1, "f"); + print topk_epsilon(k1, "f"); + print topk_count(k1, "e"); + print topk_epsilon(k1, "e"); + print topk_count(k1, "g"); + print topk_epsilon(k1, "g"); + + k1 = topk_init(100); + topk_add(k1, "a"); + topk_add(k1, "b"); + topk_add(k1, "b"); + topk_add(k1, "c"); + topk_add(k1, "c"); + topk_add(k1, "c"); + topk_add(k1, "c"); + topk_add(k1, "c"); + topk_add(k1, "c"); + topk_add(k1, "d"); + topk_add(k1, "d"); + topk_add(k1, "d"); + topk_add(k1, "d"); + topk_add(k1, "e"); + topk_add(k1, "e"); + topk_add(k1, "e"); + topk_add(k1, "e"); + topk_add(k1, "e"); + topk_add(k1, "f"); + s = topk_get_top(k1, 3); + print s; + print topk_sum(k1); + print topk_count(k1, "c"); + print topk_epsilon(k1, "c"); + print topk_count(k1, "e"); + print topk_epsilon(k1, "d"); + print topk_count(k1, "d"); + print topk_epsilon(k1, "d"); + + local k3 = topk_init(2); + topk_merge_prune(k3, k1); + + s = topk_get_top(k3, 3); + print s; + print topk_count(k3, "c"); + print topk_epsilon(k3, "c"); + print topk_count(k3, "e"); + print topk_epsilon(k3, "e"); + print topk_count(k3, "d"); + print topk_epsilon(k3, "d"); + + topk_merge_prune(k3, k1); + + s = topk_get_top(k3, 3); + print s; + print topk_sum(k3); # this gives a warning and a wrong result. + print topk_count(k3, "c"); + print topk_epsilon(k3, "c"); + print topk_count(k3, "e"); + print topk_epsilon(k3, "e"); + print topk_count(k3, "d"); + print topk_epsilon(k3, "d"); + + k3 = topk_init(2); + topk_merge(k3, k1); + print s; + print topk_sum(k3); + print topk_count(k3, "c"); + print topk_epsilon(k3, "c"); + print topk_count(k3, "e"); + print topk_epsilon(k3, "e"); + print topk_count(k3, "d"); + print topk_epsilon(k3, "d"); + + topk_merge(k3, k1); + + s = topk_get_top(k3, 3); + print s; + print topk_sum(k3); + print topk_count(k3, "c"); + print topk_epsilon(k3, "c"); + print topk_count(k3, "e"); + print topk_epsilon(k3, "e"); + print topk_count(k3, "d"); + print topk_epsilon(k3, "d"); + + + + +} diff --git a/testing/btest/istate/topk.bro b/testing/btest/istate/topk.bro new file mode 100644 index 0000000000..4d599c2780 --- /dev/null +++ b/testing/btest/istate/topk.bro @@ -0,0 +1,74 @@ +# @TEST-EXEC: bro -b %INPUT runnumber=1 >out +# @TEST-EXEC: bro -b %INPUT runnumber=2 >>out +# @TEST-EXEC: bro -b %INPUT runnumber=3 >>out +# @TEST-EXEC: btest-diff out + +global runnumber: count &redef; # differentiate runs + +global k1: opaque of topk &persistent; +global k2: opaque of topk &persistent; + +event bro_init() + { + + k2 = topk_init(20); + + if ( runnumber == 1 ) + { + k1 = topk_init(100); + + topk_add(k1, "a"); + topk_add(k1, "b"); + topk_add(k1, "b"); + topk_add(k1, "c"); + topk_add(k1, "c"); + topk_add(k1, "c"); + topk_add(k1, "c"); + topk_add(k1, "c"); + topk_add(k1, "c"); + topk_add(k1, "d"); + topk_add(k1, "d"); + topk_add(k1, "d"); + topk_add(k1, "d"); + topk_add(k1, "e"); + topk_add(k1, "e"); + topk_add(k1, "e"); + topk_add(k1, "e"); + topk_add(k1, "e"); + topk_add(k1, "f"); + } + + local s = topk_get_top(k1, 3); + print topk_count(k1, "a"); + print topk_count(k1, "b"); + print topk_count(k1, "c"); + print topk_count(k1, "d"); + print topk_count(k1, "e"); + print topk_count(k1, "f"); + + if ( runnumber == 2 ) + { + topk_add(k1, "a"); + topk_add(k1, "b"); + topk_add(k1, "b"); + topk_add(k1, "c"); + topk_add(k1, "c"); + topk_add(k1, "c"); + topk_add(k1, "c"); + topk_add(k1, "c"); + topk_add(k1, "c"); + topk_add(k1, "d"); + topk_add(k1, "d"); + topk_add(k1, "d"); + topk_add(k1, "d"); + topk_add(k1, "e"); + topk_add(k1, "e"); + topk_add(k1, "e"); + topk_add(k1, "e"); + topk_add(k1, "e"); + topk_add(k1, "f"); + } + + print s; + + } diff --git a/testing/btest/scripts/base/frameworks/input/raw/executestdin.bro b/testing/btest/scripts/base/frameworks/input/raw/executestdin.bro index 729844e4b4..f6513dc6aa 100644 --- a/testing/btest/scripts/base/frameworks/input/raw/executestdin.bro +++ b/testing/btest/scripts/base/frameworks/input/raw/executestdin.bro @@ -39,6 +39,5 @@ event bro_init() try = 0; outfile = open("../out"); Input::add_event([$source="cat > ../test.txt |", $reader=Input::READER_RAW, $mode=Input::STREAM, $name="input", $fields=Val, $ev=line, $want_record=F, $config=config_strings]); - Input::remove("input"); Input::add_event([$source="cat |", $reader=Input::READER_RAW, $mode=Input::STREAM, $name="input2", $fields=Val, $ev=line, $want_record=F, $config=config_strings]); } diff --git a/testing/btest/scripts/base/frameworks/sumstats/basic-cluster.bro b/testing/btest/scripts/base/frameworks/sumstats/basic-cluster.bro index 080697a824..127700e591 100644 --- a/testing/btest/scripts/base/frameworks/sumstats/basic-cluster.bro +++ b/testing/btest/scripts/base/frameworks/sumstats/basic-cluster.bro @@ -23,16 +23,16 @@ global n = 0; event bro_init() &priority=5 { local r1: SumStats::Reducer = [$stream="test", $apply=set(SumStats::SUM, SumStats::MIN, SumStats::MAX, SumStats::AVERAGE, SumStats::STD_DEV, SumStats::VARIANCE, SumStats::UNIQUE, SumStats::HLLUNIQUE)]; - SumStats::create([$epoch=5secs, + SumStats::create([$name="test", + $epoch=5secs, $reducers=set(r1), - $epoch_finished(rt: SumStats::ResultTable) = + $epoch_result(ts: time, key: SumStats::Key, result: SumStats::Result) = + { + local r = result["test"]; + print fmt("Host: %s - num:%d - sum:%.1f - avg:%.1f - max:%.1f - min:%.1f - var:%.1f - std_dev:%.1f - unique:%d - hllunique:%d", key$host, r$num, r$sum, r$average, r$max, r$min, r$variance, r$std_dev, r$unique, r$hllunique); + }, + $epoch_finished(ts: time) = { - for ( key in rt ) - { - local r = rt[key]["test"]; - print fmt("Host: %s - num:%d - sum:%.1f - avg:%.1f - max:%.1f - min:%.1f - var:%.1f - std_dev:%.1f - unique:%d - hllunique:%d", key$host, r$num, r$sum, r$average, r$max, r$min, r$variance, r$std_dev, r$unique, r$hllunique); - } - terminate(); }]); } diff --git a/testing/btest/scripts/base/frameworks/sumstats/basic.bro b/testing/btest/scripts/base/frameworks/sumstats/basic.bro index cafd834a42..d337767d24 100644 --- a/testing/btest/scripts/base/frameworks/sumstats/basic.bro +++ b/testing/btest/scripts/base/frameworks/sumstats/basic.bro @@ -11,17 +11,15 @@ event bro_init() &priority=5 SumStats::MIN, SumStats::STD_DEV, SumStats::UNIQUE, - SumStats::HLLUNIQUE)]; - SumStats::create([$epoch=3secs, - $reducers=set(r1), - $epoch_finished(data: SumStats::ResultTable) = - { - for ( key in data ) - { - local r = data[key]["test.metric"]; - print fmt("Host: %s - num:%d - sum:%.1f - var:%.1f - avg:%.1f - max:%.1f - min:%.1f - std_dev:%.1f - unique:%d - hllunique:%d", key$host, r$num, r$sum, r$variance, r$average, r$max, r$min, r$std_dev, r$unique, r$hllunique); - } - } + SumStats::HLLUNIQUE)]; + SumStats::create([$name="test", + $epoch=3secs, + $reducers=set(r1), + $epoch_result(ts: time, key: SumStats::Key, result: SumStats::Result) = + { + local r = result["test.metric"]; + print fmt("Host: %s - num:%d - sum:%.1f - var:%.1f - avg:%.1f - max:%.1f - min:%.1f - std_dev:%.1f - unique:%d - hllunique:%d", key$host, r$num, r$sum, r$variance, r$average, r$max, r$min, r$std_dev, r$unique, r$hllunique); + } ]); SumStats::observe("test.metric", [$host=1.2.3.4], [$num=5]); diff --git a/testing/btest/scripts/base/frameworks/sumstats/cluster-intermediate-update.bro b/testing/btest/scripts/base/frameworks/sumstats/cluster-intermediate-update.bro index bed1793721..4fb6b817d3 100644 --- a/testing/btest/scripts/base/frameworks/sumstats/cluster-intermediate-update.bro +++ b/testing/btest/scripts/base/frameworks/sumstats/cluster-intermediate-update.bro @@ -20,20 +20,23 @@ redef Log::default_rotation_interval = 0secs; event bro_init() &priority=5 { local r1: SumStats::Reducer = [$stream="test.metric", $apply=set(SumStats::SUM)]; - SumStats::create([$epoch=10secs, + SumStats::create([$name="test", + $epoch=10secs, $reducers=set(r1), - $epoch_finished(data: SumStats::ResultTable) = - { - print "End of epoch handler was called"; - for ( res in data ) - print data[res]["test.metric"]$sum; - terminate(); - }, + $epoch_result(ts: time, key: SumStats::Key, result: SumStats::Result) = + { + print result["test.metric"]$sum; + }, + $epoch_finished(ts: time) = + { + print "End of epoch handler was called"; + terminate(); + }, $threshold_val(key: SumStats::Key, result: SumStats::Result) = { - return double_to_count(result["test.metric"]$sum); + return result["test.metric"]$sum; }, - $threshold=100, + $threshold=100.0, $threshold_crossed(key: SumStats::Key, result: SumStats::Result) = { print fmt("A test metric threshold was crossed with a value of: %.1f", result["test.metric"]$sum); diff --git a/testing/btest/scripts/base/frameworks/sumstats/on-demand-cluster.bro b/testing/btest/scripts/base/frameworks/sumstats/on-demand-cluster.bro new file mode 100644 index 0000000000..48068d8cfe --- /dev/null +++ b/testing/btest/scripts/base/frameworks/sumstats/on-demand-cluster.bro @@ -0,0 +1,96 @@ +# @TEST-SERIALIZE: comm +# +# @TEST-EXEC: btest-bg-run manager-1 BROPATH=$BROPATH:.. CLUSTER_NODE=manager-1 bro %INPUT +# @TEST-EXEC: sleep 1 +# @TEST-EXEC: btest-bg-run worker-1 BROPATH=$BROPATH:.. CLUSTER_NODE=worker-1 bro %INPUT +# @TEST-EXEC: btest-bg-run worker-2 BROPATH=$BROPATH:.. CLUSTER_NODE=worker-2 bro %INPUT +# @TEST-EXEC: btest-bg-wait 15 + +# @TEST-EXEC: btest-diff manager-1/.stdout + +@TEST-START-FILE cluster-layout.bro +redef Cluster::nodes = { + ["manager-1"] = [$node_type=Cluster::MANAGER, $ip=127.0.0.1, $p=37757/tcp, $workers=set("worker-1", "worker-2")], + ["worker-1"] = [$node_type=Cluster::WORKER, $ip=127.0.0.1, $p=37760/tcp, $manager="manager-1", $interface="eth0"], + ["worker-2"] = [$node_type=Cluster::WORKER, $ip=127.0.0.1, $p=37761/tcp, $manager="manager-1", $interface="eth1"], +}; +@TEST-END-FILE + +redef Log::default_rotation_interval = 0secs; + +global n = 0; + +event bro_init() &priority=5 + { + local r1 = SumStats::Reducer($stream="test", $apply=set(SumStats::SUM, SumStats::MIN, SumStats::MAX, SumStats::AVERAGE, SumStats::STD_DEV, SumStats::VARIANCE, SumStats::UNIQUE)); + SumStats::create([$name="test sumstat", + $epoch=1hr, + $reducers=set(r1)]); + } + +event remote_connection_closed(p: event_peer) + { + terminate(); + } + +global ready_for_data: event(); +redef Cluster::manager2worker_events += /^ready_for_data$/; + +event ready_for_data() + { + if ( Cluster::node == "worker-1" ) + { + SumStats::observe("test", [$host=1.2.3.4], [$num=34]); + SumStats::observe("test", [$host=1.2.3.4], [$num=30]); + SumStats::observe("test", [$host=6.5.4.3], [$num=1]); + SumStats::observe("test", [$host=7.2.1.5], [$num=54]); + } + if ( Cluster::node == "worker-2" ) + { + SumStats::observe("test", [$host=1.2.3.4], [$num=75]); + SumStats::observe("test", [$host=1.2.3.4], [$num=30]); + SumStats::observe("test", [$host=7.2.1.5], [$num=91]); + SumStats::observe("test", [$host=10.10.10.10], [$num=5]); + } + } + + +event on_demand2() + { + local host = 7.2.1.5; + when ( local result = SumStats::request_key("test sumstat", [$host=host]) ) + { + print "SumStat key request"; + if ( "test" in result ) + print fmt(" Host: %s -> %.0f", host, result["test"]$sum); + terminate(); + } + } + +event on_demand() + { + #when ( local results = SumStats::request("test sumstat") ) + # { + # print "Complete SumStat request"; + # print fmt(" Host: %s -> %.0f", 6.5.4.3, results[[$host=6.5.4.3]]["test"]$sum); + # print fmt(" Host: %s -> %.0f", 10.10.10.10, results[[$host=10.10.10.10]]["test"]$sum); + # print fmt(" Host: %s -> %.0f", 1.2.3.4, results[[$host=1.2.3.4]]["test"]$sum); + # print fmt(" Host: %s -> %.0f", 7.2.1.5, results[[$host=7.2.1.5]]["test"]$sum); + + event on_demand2(); + # } + } + +global peer_count = 0; +event remote_connection_handshake_done(p: event_peer) &priority=-5 + { + ++peer_count; + if ( peer_count == 2 ) + { + if ( Cluster::local_node_type() == Cluster::MANAGER ) + event ready_for_data(); + + schedule 1sec { on_demand() }; + } + } + diff --git a/testing/btest/scripts/base/frameworks/sumstats/on-demand.bro b/testing/btest/scripts/base/frameworks/sumstats/on-demand.bro new file mode 100644 index 0000000000..78aba726ca --- /dev/null +++ b/testing/btest/scripts/base/frameworks/sumstats/on-demand.bro @@ -0,0 +1,46 @@ +# @TEST-EXEC: bro %INPUT +# @TEST-EXEC: btest-diff .stdout + +redef exit_only_after_terminate=T; + + +## Requesting a full sumstats resulttable is not supported yet. +#event on_demand() +# { +# when ( local results = SumStats::request("test") ) +# { +# print "Complete SumStat request"; +# for ( key in results ) +# { +# print fmt(" Host: %s -> %.0f", key$host, results[key]["test.reducer"]$sum); +# } +# } +# } + +event on_demand_key() + { + local host = 1.2.3.4; + when ( local result = SumStats::request_key("test", [$host=host]) ) + { + print fmt("Key request for %s", host); + print fmt(" Host: %s -> %.0f", host, result["test.reducer"]$sum); + terminate(); + } + } + +event bro_init() &priority=5 + { + local r1: SumStats::Reducer = [$stream="test.reducer", + $apply=set(SumStats::SUM)]; + SumStats::create([$name="test", + $epoch=1hr, + $reducers=set(r1)]); + + # Seed some data but notice there are no callbacks defined in the sumstat! + SumStats::observe("test.reducer", [$host=1.2.3.4], [$num=42]); + SumStats::observe("test.reducer", [$host=4.3.2.1], [$num=7]); + + #schedule 0.1 secs { on_demand() }; + schedule 1 secs { on_demand_key() }; + } + diff --git a/testing/btest/scripts/base/frameworks/sumstats/sample-cluster.bro b/testing/btest/scripts/base/frameworks/sumstats/sample-cluster.bro index 1b0f0eec94..1f2bab0229 100644 --- a/testing/btest/scripts/base/frameworks/sumstats/sample-cluster.bro +++ b/testing/btest/scripts/base/frameworks/sumstats/sample-cluster.bro @@ -5,8 +5,7 @@ # @TEST-EXEC: btest-bg-run worker-1 BROPATH=$BROPATH:.. CLUSTER_NODE=worker-1 bro %INPUT # @TEST-EXEC: btest-bg-run worker-2 BROPATH=$BROPATH:.. CLUSTER_NODE=worker-2 bro %INPUT # @TEST-EXEC: btest-bg-wait 15 -# @TEST-EXEC: cat manager-1/.stdout | sort > out -# @TEST-EXEC: btest-diff out +# @TEST-EXEC: btest-diff manager-1/.stdout @TEST-START-FILE cluster-layout.bro redef Cluster::nodes = { @@ -18,25 +17,24 @@ redef Cluster::nodes = { redef Log::default_rotation_interval = 0secs; -global n = 0; - event bro_init() &priority=5 { local r1: SumStats::Reducer = [$stream="test", $apply=set(SumStats::SAMPLE), $num_samples=5]; - SumStats::create([$epoch=5secs, + SumStats::create([$name="test", + $epoch=5secs, $reducers=set(r1), - $epoch_finished(rt: SumStats::ResultTable) = + $epoch_result(ts: time, key: SumStats::Key, result: SumStats::Result) = { - for ( key in rt ) - { - print key$host; - local r = rt[key]["test"]; - for ( sample in r$samples ) { - print r$samples[sample]; - } - print r$sample_elements; - } + local r = result["test"]; + print fmt("Host: %s Sampled observations: %d", key$host, r$sample_elements); + local sample_nums: vector of count = vector(); + for ( sample in r$samples ) + sample_nums[|sample_nums|] =r$samples[sample]$num; + print fmt(" %s", sort(sample_nums)); + }, + $epoch_finished(ts: time) = + { terminate(); }]); } diff --git a/testing/btest/scripts/base/frameworks/sumstats/sample.bro b/testing/btest/scripts/base/frameworks/sumstats/sample.bro index 04d7b4f256..4ba395b463 100644 --- a/testing/btest/scripts/base/frameworks/sumstats/sample.bro +++ b/testing/btest/scripts/base/frameworks/sumstats/sample.bro @@ -5,19 +5,16 @@ event bro_init() &priority=5 { local r1: SumStats::Reducer = [$stream="test.metric", $apply=set(SumStats::SAMPLE), $num_samples=2]; - SumStats::create([$epoch=3secs, - $reducers=set(r1), - $epoch_finished(data: SumStats::ResultTable) = - { - for ( key in data ) - { - print key$host; - local r = data[key]["test.metric"]; - print r$samples; - print r$sample_elements; - } - } - ]); + SumStats::create([$name="test", + $epoch=3secs, + $reducers=set(r1), + $epoch_result(ts: time, key: SumStats::Key, result: SumStats::Result) = + { + print key$host; + local r = result["test.metric"]; + print r$samples; + print r$sample_elements; + }]); SumStats::observe("test.metric", [$host=1.2.3.4], [$num=5]); SumStats::observe("test.metric", [$host=1.2.3.4], [$num=22]); diff --git a/testing/btest/scripts/base/frameworks/sumstats/thresholding.bro b/testing/btest/scripts/base/frameworks/sumstats/thresholding.bro index ddc053bd23..b00b30a375 100644 --- a/testing/btest/scripts/base/frameworks/sumstats/thresholding.bro +++ b/testing/btest/scripts/base/frameworks/sumstats/thresholding.bro @@ -8,14 +8,15 @@ redef enum Notice::Type += { event bro_init() &priority=5 { local r1: SumStats::Reducer = [$stream="test.metric", $apply=set(SumStats::SUM)]; - SumStats::create([$epoch=3secs, + SumStats::create([$name="test1", + $epoch=3secs, $reducers=set(r1), #$threshold_val = SumStats::sum_threshold("test.metric"), $threshold_val(key: SumStats::Key, result: SumStats::Result) = { - return double_to_count(result["test.metric"]$sum); + return result["test.metric"]$sum; }, - $threshold=5, + $threshold=5.0, $threshold_crossed(key: SumStats::Key, result: SumStats::Result) = { local r = result["test.metric"]; @@ -24,14 +25,15 @@ event bro_init() &priority=5 ]); local r2: SumStats::Reducer = [$stream="test.metric", $apply=set(SumStats::SUM)]; - SumStats::create([$epoch=3secs, + SumStats::create([$name="test2", + $epoch=3secs, $reducers=set(r2), #$threshold_val = SumStats::sum_threshold("test.metric"), $threshold_val(key: SumStats::Key, result: SumStats::Result) = { - return double_to_count(result["test.metric"]$sum); + return result["test.metric"]$sum; }, - $threshold_series=vector(3,6,800), + $threshold_series=vector(3.0,6.0,800.0), $threshold_crossed(key: SumStats::Key, result: SumStats::Result) = { local r = result["test.metric"]; @@ -41,19 +43,20 @@ event bro_init() &priority=5 local r3: SumStats::Reducer = [$stream="test.metric", $apply=set(SumStats::SUM)]; local r4: SumStats::Reducer = [$stream="test.metric2", $apply=set(SumStats::SUM)]; - SumStats::create([$epoch=3secs, + SumStats::create([$name="test3", + $epoch=3secs, $reducers=set(r3, r4), $threshold_val(key: SumStats::Key, result: SumStats::Result) = { # Calculate a ratio between sums of two reducers. if ( "test.metric2" in result && "test.metric" in result && result["test.metric"]$sum > 0 ) - return double_to_count(result["test.metric2"]$sum / result["test.metric"]$sum); + return result["test.metric2"]$sum / result["test.metric"]$sum; else - return 0; + return 0.0; }, # Looking for metric2 sum to be 5 times the sum of metric - $threshold=5, + $threshold=5.0, $threshold_crossed(key: SumStats::Key, result: SumStats::Result) = { local thold = result["test.metric2"]$sum / result["test.metric"]$sum; diff --git a/testing/btest/scripts/base/frameworks/sumstats/topk-cluster.bro b/testing/btest/scripts/base/frameworks/sumstats/topk-cluster.bro new file mode 100644 index 0000000000..d26cee4244 --- /dev/null +++ b/testing/btest/scripts/base/frameworks/sumstats/topk-cluster.bro @@ -0,0 +1,107 @@ +# @TEST-SERIALIZE: comm +# +# @TEST-EXEC: btest-bg-run manager-1 BROPATH=$BROPATH:.. CLUSTER_NODE=manager-1 bro %INPUT +# @TEST-EXEC: sleep 1 +# @TEST-EXEC: btest-bg-run worker-1 BROPATH=$BROPATH:.. CLUSTER_NODE=worker-1 bro %INPUT +# @TEST-EXEC: btest-bg-run worker-2 BROPATH=$BROPATH:.. CLUSTER_NODE=worker-2 bro %INPUT +# @TEST-EXEC: btest-bg-wait 15 + +# @TEST-EXEC: btest-diff manager-1/.stdout +# +@TEST-START-FILE cluster-layout.bro +redef Cluster::nodes = { + ["manager-1"] = [$node_type=Cluster::MANAGER, $ip=127.0.0.1, $p=37757/tcp, $workers=set("worker-1", "worker-2")], + ["worker-1"] = [$node_type=Cluster::WORKER, $ip=127.0.0.1, $p=37760/tcp, $manager="manager-1", $interface="eth0"], + ["worker-2"] = [$node_type=Cluster::WORKER, $ip=127.0.0.1, $p=37761/tcp, $manager="manager-1", $interface="eth1"], +}; +@TEST-END-FILE + +redef Log::default_rotation_interval = 0secs; + + +event bro_init() &priority=5 + { + local r1: SumStats::Reducer = [$stream="test.metric", + $apply=set(SumStats::TOPK)]; + SumStats::create([$name="topk-test", + $epoch=5secs, + $reducers=set(r1), + $epoch_result(ts: time, key: SumStats::Key, result: SumStats::Result) = + { + local r = result["test.metric"]; + local s: vector of SumStats::Observation; + s = topk_get_top(r$topk, 5); + print fmt("Top entries for key %s", key$str); + for ( element in s ) + { + print fmt("Num: %d, count: %d, epsilon: %d", s[element]$num, topk_count(r$topk, s[element]), topk_epsilon(r$topk, s[element])); + } + }, + $epoch_finished(ts: time) = + { + terminate(); + }]); + + + } + +event remote_connection_closed(p: event_peer) + { + terminate(); + } + +global ready_for_data: event(); +redef Cluster::manager2worker_events += /^ready_for_data$/; + +event ready_for_data() + { + const loop_v: vector of count = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100}; + + + if ( Cluster::node == "worker-1" ) + { + + local a: count; + a = 0; + + for ( i in loop_v ) + { + a = a + 1; + for ( j in loop_v ) + { + if ( i < j ) + SumStats::observe("test.metric", [$str="counter"], [$num=a]); + } + } + + + SumStats::observe("test.metric", [$str="two"], [$num=1]); + SumStats::observe("test.metric", [$str="two"], [$num=1]); + } + if ( Cluster::node == "worker-2" ) + { + SumStats::observe("test.metric", [$str="two"], [$num=2]); + SumStats::observe("test.metric", [$str="two"], [$num=2]); + SumStats::observe("test.metric", [$str="two"], [$num=2]); + SumStats::observe("test.metric", [$str="two"], [$num=2]); + SumStats::observe("test.metric", [$str="two"], [$num=1]); + + for ( i in loop_v ) + { + SumStats::observe("test.metric", [$str="counter"], [$num=995]); + } + } + } + +@if ( Cluster::local_node_type() == Cluster::MANAGER ) + +global peer_count = 0; +event remote_connection_handshake_done(p: event_peer) &priority=-5 + { + ++peer_count; + if ( peer_count == 2 ) + event ready_for_data(); + } + +@endif + diff --git a/testing/btest/scripts/base/frameworks/sumstats/topk.bro b/testing/btest/scripts/base/frameworks/sumstats/topk.bro new file mode 100644 index 0000000000..99c301c669 --- /dev/null +++ b/testing/btest/scripts/base/frameworks/sumstats/topk.bro @@ -0,0 +1,43 @@ +# @TEST-EXEC: bro %INPUT +# @TEST-EXEC: btest-diff .stdout + +event bro_init() &priority=5 + { + local r1: SumStats::Reducer = [$stream="test.metric", + $apply=set(SumStats::TOPK)]; + SumStats::create([$name="topk-test", + $epoch=3secs, + $reducers=set(r1), + $epoch_result(ts: time, key: SumStats::Key, result: SumStats::Result) = + { + local r = result["test.metric"]; + local s: vector of SumStats::Observation; + s = topk_get_top(r$topk, 5); + + print fmt("Top entries for key %s", key$str); + for ( element in s ) + { + print fmt("Num: %d, count: %d, epsilon: %d", s[element]$num, topk_count(r$topk, s[element]), topk_epsilon(r$topk, s[element])); + } + }]); + + + const loop_v: vector of count = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100}; + + local a: count; + a = 0; + + for ( i in loop_v ) + { + a = a + 1; + for ( j in loop_v ) + { + if ( i < j ) + SumStats::observe("test.metric", [$str="counter"], [$num=a]); + } + } + + + SumStats::observe("test.metric", [$str="two"], [$num=1]); + SumStats::observe("test.metric", [$str="two"], [$num=1]); + } diff --git a/testing/btest/scripts/base/protocols/dhcp/dhcp-all-msg-types.btest b/testing/btest/scripts/base/protocols/dhcp/dhcp-all-msg-types.btest new file mode 100644 index 0000000000..752ab91780 --- /dev/null +++ b/testing/btest/scripts/base/protocols/dhcp/dhcp-all-msg-types.btest @@ -0,0 +1,6 @@ +# This tests that DHCP leases are logged in dhcp.log +# The trace has a message of each DHCP message type, +# but only one lease should show up in the logs. + +# @TEST-EXEC: bro -r $TRACES/dhcp/dhcp.trace %INPUT +# @TEST-EXEC: btest-diff dhcp.log diff --git a/testing/btest/scripts/base/protocols/dhcp/inform.test b/testing/btest/scripts/base/protocols/dhcp/inform.test new file mode 100644 index 0000000000..652fd1ae45 --- /dev/null +++ b/testing/btest/scripts/base/protocols/dhcp/inform.test @@ -0,0 +1,5 @@ +# DHCPINFORM leases are special-cased in the code. +# This tests that those leases are correctly logged. + +# @TEST-EXEC: bro -r $TRACES/dhcp/dhcp_inform.trace %INPUT +# @TEST-EXEC: btest-diff dhcp.log diff --git a/testing/btest/scripts/base/utils/active-http.test b/testing/btest/scripts/base/utils/active-http.test index 127b21d77e..f547e7dd15 100644 --- a/testing/btest/scripts/base/utils/active-http.test +++ b/testing/btest/scripts/base/utils/active-http.test @@ -1,4 +1,3 @@ -# @TEST-REQUIRES: which httpd # @TEST-REQUIRES: which python # # @TEST-EXEC: btest-bg-run httpd python $SCRIPTS/httpd.py --max 1 @@ -8,7 +7,7 @@ # @TEST-EXEC: btest-diff bro/.stdout @load base/utils/active-http - +@load base/frameworks/communication # let network-time run. otherwise there are no heartbeats... redef exit_only_after_terminate = T; event bro_init() diff --git a/testing/btest/scripts/base/utils/dir.test b/testing/btest/scripts/base/utils/dir.test index 44fee3860f..aa9ee62315 100644 --- a/testing/btest/scripts/base/utils/dir.test +++ b/testing/btest/scripts/base/utils/dir.test @@ -1,11 +1,11 @@ # @TEST-EXEC: btest-bg-run bro bro -b ../dirtest.bro -# @TEST-EXEC: btest-bg-wait 10 +# @TEST-EXEC: btest-bg-wait 15 # @TEST-EXEC: TEST_DIFF_CANONIFIER=$SCRIPTS/diff-sort btest-diff bro/.stdout @TEST-START-FILE dirtest.bro @load base/utils/dir - +@load base/frameworks/communication # let network-time run. otherwise there are no heartbeats... redef exit_only_after_terminate = T; global c: count = 0; @@ -33,14 +33,20 @@ function new_file2(fname: string) event change_things() { system("touch ../testdir/newone"); - system("rm ../testdir/bye && touch ../testdir/bye"); + system("rm ../testdir/bye"); + } + +event change_things2() + { + system("touch ../testdir/bye"); } event bro_init() { Dir::monitor("../testdir", new_file1, .5sec); Dir::monitor("../testdir", new_file2, 1sec); - schedule 1sec { change_things() }; + schedule 3sec { change_things() }; + schedule 6sec { change_things2() }; } @TEST-END-FILE diff --git a/testing/btest/scripts/base/utils/exec.test b/testing/btest/scripts/base/utils/exec.test index 8876f0f49b..33ba10f97a 100644 --- a/testing/btest/scripts/base/utils/exec.test +++ b/testing/btest/scripts/base/utils/exec.test @@ -1,11 +1,11 @@ # @TEST-EXEC: btest-bg-run bro bro -b ../exectest.bro -# @TEST-EXEC: btest-bg-wait 10 +# @TEST-EXEC: btest-bg-wait 15 # @TEST-EXEC: TEST_DIFF_CANONIFIER=$SCRIPTS/diff-sort btest-diff bro/.stdout @TEST-START-FILE exectest.bro @load base/utils/exec - +@load base/frameworks/communication # let network-time run. otherwise there are no heartbeats... redef exit_only_after_terminate = T; global c: count = 0; @@ -14,7 +14,7 @@ function check_exit_condition() { c += 1; - if ( c == 4 ) + if ( c == 3 ) terminate(); } @@ -32,7 +32,8 @@ event bro_init() test_cmd("test1", [$cmd="bash ../somescript.sh", $read_files=set("out1", "out2")]); test_cmd("test2", [$cmd="bash ../nofiles.sh"]); - test_cmd("test3", [$cmd="bash ../suicide.sh"]); + # Not sure of a portable way to test signals yet. + #test_cmd("test3", [$cmd="bash ../suicide.sh"]); test_cmd("test4", [$cmd="bash ../stdin.sh", $stdin="hibye"]); } diff --git a/testing/btest/scripts/policy/protocols/dhcp/known-devices-and-hostnames/basic.test b/testing/btest/scripts/policy/protocols/dhcp/known-devices-and-hostnames/basic.test new file mode 100644 index 0000000000..1144ae1377 --- /dev/null +++ b/testing/btest/scripts/policy/protocols/dhcp/known-devices-and-hostnames/basic.test @@ -0,0 +1,8 @@ +# This tests that the known_devices log is created, +# that devices are logged by MAC address, and that +# the DHCP hostname is added, if available. + +# @TEST-EXEC: bro -r $TRACES/dhcp/dhcp.trace -r $TRACES/dhcp/dhcp_inform.trace %INPUT +# @TEST-EXEC: btest-diff known_devices.log + +@load policy/protocols/dhcp/known-devices-and-hostnames