mirror of
https://github.com/zeek/zeek.git
synced 2025-10-04 15:48:19 +00:00
Merge remote-tracking branch 'origin/master' into topic/bernhard/hyperloglog
This commit is contained in:
commit
3e74cdc6e0
37 changed files with 475 additions and 344 deletions
|
@ -10,49 +10,48 @@
|
|||
module SumStats;
|
||||
|
||||
export {
|
||||
## Allows a user to decide how large of result groups the
|
||||
## workers should transmit values for cluster stats aggregation.
|
||||
## Allows a user to decide how large of result groups the workers should transmit
|
||||
## values for cluster stats aggregation.
|
||||
const cluster_send_in_groups_of = 50 &redef;
|
||||
|
||||
## The percent of the full threshold value that needs to be met
|
||||
## on a single worker for that worker to send the value to its manager in
|
||||
## order for it to request a global view for that value. There is no
|
||||
## requirement that the manager requests a global view for the key
|
||||
## since it may opt not to if it requested a global view for the key
|
||||
## recently.
|
||||
|
||||
## The percent of the full threshold value that needs to be met on a single worker
|
||||
## for that worker to send the value to its manager in order for it to request a
|
||||
## global view for that value. There is no requirement that the manager requests
|
||||
## a global view for the key since it may opt not to if it requested a global view
|
||||
## for the key recently.
|
||||
const cluster_request_global_view_percent = 0.2 &redef;
|
||||
|
||||
## This is to deal with intermediate update overload. A manager will only allow
|
||||
## this many intermediate update requests to the workers to be inflight at
|
||||
## any given time. Requested intermediate updates are currently thrown out
|
||||
## and not performed. In practice this should hopefully have a minimal effect.
|
||||
## this many intermediate update requests to the workers to be inflight at any
|
||||
## given time. Requested intermediate updates are currently thrown out and not
|
||||
## performed. In practice this should hopefully have a minimal effect.
|
||||
const max_outstanding_global_views = 10 &redef;
|
||||
|
||||
## Intermediate updates can cause overload situations on very large clusters.
|
||||
## This option may help reduce load and correct intermittent problems.
|
||||
## The goal for this option is also meant to be temporary.
|
||||
## Intermediate updates can cause overload situations on very large clusters. This
|
||||
## option may help reduce load and correct intermittent problems. The goal for this
|
||||
## option is also meant to be temporary.
|
||||
const enable_intermediate_updates = T &redef;
|
||||
|
||||
## Event sent by the manager in a cluster to initiate the
|
||||
## collection of values for a sumstat.
|
||||
## Event sent by the manager in a cluster to initiate the collection of values for
|
||||
## a sumstat.
|
||||
global cluster_ss_request: event(uid: string, ssid: string);
|
||||
|
||||
## Event sent by nodes that are collecting sumstats after receiving
|
||||
## a request for the sumstat from the manager.
|
||||
## Event sent by nodes that are collecting sumstats after receiving a request for
|
||||
## the sumstat from the manager.
|
||||
global cluster_ss_response: event(uid: string, ssid: string, data: ResultTable, done: bool);
|
||||
|
||||
## This event is sent by the manager in a cluster to initiate the
|
||||
## collection of a single key value from a sumstat. It's typically
|
||||
## used to get intermediate updates before the break interval triggers
|
||||
## to speed detection of a value crossing a threshold.
|
||||
## This event is sent by the manager in a cluster to initiate the collection of
|
||||
## a single key value from a sumstat. It's typically used to get intermediate
|
||||
## updates before the break interval triggers to speed detection of a value
|
||||
## crossing a threshold.
|
||||
global cluster_key_request: event(uid: string, ssid: string, key: Key);
|
||||
|
||||
## This event is sent by nodes in response to a
|
||||
## This event is sent by nodes in response to a
|
||||
## :bro:id:`SumStats::cluster_key_request` event.
|
||||
global cluster_key_response: event(uid: string, ssid: string, key: Key, result: Result);
|
||||
|
||||
## This is sent by workers to indicate that they crossed the percent of the
|
||||
## current threshold by the percentage defined globally in
|
||||
## This is sent by workers to indicate that they crossed the percent
|
||||
## of the current threshold by the percentage defined globally in
|
||||
## :bro:id:`SumStats::cluster_request_global_view_percent`
|
||||
global cluster_key_intermediate_response: event(ssid: string, key: SumStats::Key);
|
||||
|
||||
|
@ -69,7 +68,7 @@ redef Cluster::manager2worker_events += /SumStats::thresholds_reset/;
|
|||
redef Cluster::worker2manager_events += /SumStats::cluster_(ss_response|key_response|key_intermediate_response)/;
|
||||
|
||||
@if ( Cluster::local_node_type() != Cluster::MANAGER )
|
||||
# This variable is maintained to know what keys have recently sent as
|
||||
# This variable is maintained to know what keys have recently sent as
|
||||
# intermediate updates so they don't overwhelm their manager. The count that is
|
||||
# yielded is the number of times the percentage threshold has been crossed and
|
||||
# an intermediate result has been received.
|
||||
|
@ -82,7 +81,7 @@ event bro_init() &priority=-100
|
|||
reducer_store = table();
|
||||
}
|
||||
|
||||
# This is done on all non-manager node types in the event that a sumstat is
|
||||
# This is done on all non-manager node types in the event that a sumstat is
|
||||
# being collected somewhere other than a worker.
|
||||
function data_added(ss: SumStat, key: Key, result: Result)
|
||||
{
|
||||
|
@ -92,9 +91,9 @@ function data_added(ss: SumStat, key: Key, result: Result)
|
|||
return;
|
||||
|
||||
# If val is 5 and global view % is 0.1 (10%), pct_val will be 50. If that
|
||||
# crosses the full threshold then it's a candidate to send as an
|
||||
# crosses the full threshold then it's a candidate to send as an
|
||||
# intermediate update.
|
||||
if ( enable_intermediate_updates &&
|
||||
if ( enable_intermediate_updates &&
|
||||
check_thresholds(ss, key, result, cluster_request_global_view_percent) )
|
||||
{
|
||||
# kick off intermediate update
|
||||
|
@ -113,19 +112,21 @@ event SumStats::send_data(uid: string, ssid: string, data: ResultTable)
|
|||
{
|
||||
local_data[key] = data[key];
|
||||
delete data[key];
|
||||
|
||||
|
||||
# Only send cluster_send_in_groups_of at a time. Queue another
|
||||
# event to send the next group.
|
||||
if ( cluster_send_in_groups_of == ++num_added )
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
local done = F;
|
||||
# If data is empty, this sumstat is done.
|
||||
if ( |data| == 0 )
|
||||
done = T;
|
||||
|
||||
event SumStats::cluster_ss_response(uid, ssid, local_data, done);
|
||||
|
||||
# Note: copy is needed to compensate serialization caching issue. This should be
|
||||
# changed to something else later.
|
||||
event SumStats::cluster_ss_response(uid, ssid, copy(local_data), done);
|
||||
if ( ! done )
|
||||
schedule 0.01 sec { SumStats::send_data(uid, ssid, data) };
|
||||
}
|
||||
|
@ -133,7 +134,7 @@ event SumStats::send_data(uid: string, ssid: string, data: ResultTable)
|
|||
event SumStats::cluster_ss_request(uid: string, ssid: string)
|
||||
{
|
||||
#print fmt("WORKER %s: received the cluster_ss_request event for %s.", Cluster::node, id);
|
||||
|
||||
|
||||
# Initiate sending all of the data for the requested stats.
|
||||
if ( ssid in result_store )
|
||||
event SumStats::send_data(uid, ssid, result_store[ssid]);
|
||||
|
@ -145,13 +146,16 @@ event SumStats::cluster_ss_request(uid: string, ssid: string)
|
|||
if ( ssid in stats_store )
|
||||
reset(stats_store[ssid]);
|
||||
}
|
||||
|
||||
|
||||
event SumStats::cluster_key_request(uid: string, ssid: string, key: Key)
|
||||
{
|
||||
if ( ssid in result_store && key in result_store[ssid] )
|
||||
{
|
||||
#print fmt("WORKER %s: received the cluster_key_request event for %s=%s.", Cluster::node, key2str(key), data);
|
||||
event SumStats::cluster_key_response(uid, ssid, key, result_store[ssid][key]);
|
||||
|
||||
# Note: copy is needed to compensate serialization caching issue. This should be
|
||||
# changed to something else later.
|
||||
event SumStats::cluster_key_response(uid, ssid, key, copy(result_store[ssid][key]));
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -179,27 +183,27 @@ event SumStats::thresholds_reset(ssid: string)
|
|||
|
||||
@if ( Cluster::local_node_type() == Cluster::MANAGER )
|
||||
|
||||
# This variable is maintained by manager nodes as they collect and aggregate
|
||||
# results.
|
||||
# This variable is maintained by manager nodes as they collect and aggregate
|
||||
# results.
|
||||
# Index on a uid.
|
||||
global stats_results: table[string] of ResultTable &read_expire=1min;
|
||||
|
||||
# This variable is maintained by manager nodes to track how many "dones" they
|
||||
# collected per collection unique id. Once the number of results for a uid
|
||||
# matches the number of peer nodes that results should be coming from, the
|
||||
# collected per collection unique id. Once the number of results for a uid
|
||||
# matches the number of peer nodes that results should be coming from, the
|
||||
# result is written out and deleted from here.
|
||||
# Indexed on a uid.
|
||||
# TODO: add an &expire_func in case not all results are received.
|
||||
global done_with: table[string] of count &read_expire=1min &default=0;
|
||||
|
||||
# This variable is maintained by managers to track intermediate responses as
|
||||
# they are getting a global view for a certain key.
|
||||
# This variable is maintained by managers to track intermediate responses as
|
||||
# they are getting a global view for a certain key.
|
||||
# Indexed on a uid.
|
||||
global key_requests: table[string] of Result &read_expire=1min;
|
||||
|
||||
# This variable is maintained by managers to prevent overwhelming communication due
|
||||
# to too many intermediate updates. Each sumstat is tracked separately so that
|
||||
# one won't overwhelm and degrade other quieter sumstats.
|
||||
# to too many intermediate updates. Each sumstat is tracked separately so that
|
||||
# one won't overwhelm and degrade other quieter sumstats.
|
||||
# Indexed on a sumstat id.
|
||||
global outstanding_global_views: table[string] of count &default=0;
|
||||
|
||||
|
@ -211,11 +215,11 @@ event SumStats::finish_epoch(ss: SumStat)
|
|||
{
|
||||
#print fmt("%.6f MANAGER: breaking %s sumstat for %s sumstat", network_time(), ss$name, ss$id);
|
||||
local uid = unique_id("");
|
||||
|
||||
|
||||
if ( uid in stats_results )
|
||||
delete stats_results[uid];
|
||||
stats_results[uid] = table();
|
||||
|
||||
|
||||
# Request data from peers.
|
||||
event SumStats::cluster_ss_request(uid, ss$id);
|
||||
}
|
||||
|
@ -224,7 +228,7 @@ event SumStats::finish_epoch(ss: SumStat)
|
|||
schedule ss$epoch { SumStats::finish_epoch(ss) };
|
||||
}
|
||||
|
||||
# This is unlikely to be called often, but it's here in
|
||||
# This is unlikely to be called often, but it's here in
|
||||
# case there are sumstats being collected by managers.
|
||||
function data_added(ss: SumStat, key: Key, result: Result)
|
||||
{
|
||||
|
@ -234,7 +238,7 @@ function data_added(ss: SumStat, key: Key, result: Result)
|
|||
event SumStats::cluster_threshold_crossed(ss$id, key, threshold_tracker[ss$id][key]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
event SumStats::cluster_key_response(uid: string, ssid: string, key: Key, result: Result)
|
||||
{
|
||||
#print fmt("%0.6f MANAGER: receiving key data from %s - %s=%s", network_time(), get_event_peer()$descr, key2str(key), result);
|
||||
|
@ -277,7 +281,7 @@ event SumStats::cluster_key_intermediate_response(ssid: string, key: Key)
|
|||
if ( ssid in outstanding_global_views &&
|
||||
|outstanding_global_views[ssid]| > max_outstanding_global_views )
|
||||
{
|
||||
# Don't do this intermediate update. Perhaps at some point in the future
|
||||
# Don't do this intermediate update. Perhaps at some point in the future
|
||||
# we will queue and randomly select from these ignored intermediate
|
||||
# update requests.
|
||||
return;
|
||||
|
@ -308,7 +312,7 @@ event SumStats::cluster_ss_response(uid: string, ssid: string, data: ResultTable
|
|||
local_data[key] = data[key];
|
||||
|
||||
# If a stat is done being collected, thresholds for each key
|
||||
# need to be checked so we're doing it here to avoid doubly
|
||||
# need to be checked so we're doing it here to avoid doubly
|
||||
# iterating over each key.
|
||||
if ( Cluster::worker_count == done_with[uid] )
|
||||
{
|
||||
|
@ -319,7 +323,7 @@ event SumStats::cluster_ss_response(uid: string, ssid: string, data: ResultTable
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
# If the data has been collected from all peers, we are done and ready to finish.
|
||||
if ( Cluster::worker_count == done_with[uid] )
|
||||
{
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
##! The summary statistics framework provides a way to
|
||||
##! summarize large streams of data into simple reduced
|
||||
##! The summary statistics framework provides a way to
|
||||
##! summarize large streams of data into simple reduced
|
||||
##! measurements.
|
||||
|
||||
module SumStats;
|
||||
|
@ -10,24 +10,24 @@ export {
|
|||
PLACEHOLDER
|
||||
};
|
||||
|
||||
## Represents a thing which is having summarization
|
||||
## Represents a thing which is having summarization
|
||||
## results collected for it.
|
||||
type Key: record {
|
||||
## A non-address related summarization or a sub-key for
|
||||
## an address based summarization. An example might be
|
||||
## A non-address related summarization or a sub-key for
|
||||
## an address based summarization. An example might be
|
||||
## successful SSH connections by client IP address
|
||||
## where the client string would be the key value.
|
||||
## Another example might be number of HTTP requests to
|
||||
## a particular value in a Host header. This is an
|
||||
## example of a non-host based metric since multiple
|
||||
## IP addresses could respond for the same Host
|
||||
## Another example might be number of HTTP requests to
|
||||
## a particular value in a Host header. This is an
|
||||
## example of a non-host based metric since multiple
|
||||
## IP addresses could respond for the same Host
|
||||
## header value.
|
||||
str: string &optional;
|
||||
|
||||
|
||||
## Host is the value to which this metric applies.
|
||||
host: addr &optional;
|
||||
};
|
||||
|
||||
|
||||
## Represents data being added for a single observation.
|
||||
## Only supply a single field at a time!
|
||||
type Observation: record {
|
||||
|
@ -40,17 +40,17 @@ export {
|
|||
};
|
||||
|
||||
type Reducer: record {
|
||||
## Observation stream identifier for the reducer
|
||||
## Observation stream identifier for the reducer
|
||||
## to attach to.
|
||||
stream: string;
|
||||
|
||||
## The calculations to perform on the data points.
|
||||
apply: set[Calculation];
|
||||
|
||||
## A predicate so that you can decide per key if you
|
||||
|
||||
## A predicate so that you can decide per key if you
|
||||
## would like to accept the data being inserted.
|
||||
pred: function(key: SumStats::Key, obs: SumStats::Observation): bool &optional;
|
||||
|
||||
|
||||
## A function to normalize the key. This can be used to aggregate or
|
||||
## normalize the entire key.
|
||||
normalize_key: function(key: SumStats::Key): Key &optional;
|
||||
|
@ -59,11 +59,11 @@ export {
|
|||
## Value calculated for an observation stream fed into a reducer.
|
||||
## Most of the fields are added by plugins.
|
||||
type ResultVal: record {
|
||||
## The time when the first observation was added to
|
||||
## The time when the first observation was added to
|
||||
## this result value.
|
||||
begin: time;
|
||||
|
||||
## The time when the last observation was added to
|
||||
## The time when the last observation was added to
|
||||
## this result value.
|
||||
end: time;
|
||||
|
||||
|
@ -74,55 +74,56 @@ export {
|
|||
## Type to store results for multiple reducers.
|
||||
type Result: table[string] of ResultVal;
|
||||
|
||||
## Type to store a table of sumstats results indexed
|
||||
## Type to store a table of sumstats results indexed
|
||||
## by keys.
|
||||
type ResultTable: table[Key] of Result;
|
||||
|
||||
## SumStats represent an aggregation of reducers along with
|
||||
## SumStats represent an aggregation of reducers along with
|
||||
## mechanisms to handle various situations like the epoch ending
|
||||
## or thresholds being crossed.
|
||||
## It's best to not access any global state outside
|
||||
## of the variables given to the callbacks because there
|
||||
## is no assurance provided as to where the callbacks
|
||||
##
|
||||
## It's best to not access any global state outside
|
||||
## of the variables given to the callbacks because there
|
||||
## is no assurance provided as to where the callbacks
|
||||
## will be executed on clusters.
|
||||
type SumStat: record {
|
||||
## The interval at which this filter should be "broken"
|
||||
## and the '$epoch_finished' callback called. The
|
||||
## The interval at which this filter should be "broken"
|
||||
## and the '$epoch_finished' callback called. The
|
||||
## results are also reset at this time so any threshold
|
||||
## based detection needs to be set to a
|
||||
## value that should be expected to happen within
|
||||
## based detection needs to be set to a
|
||||
## value that should be expected to happen within
|
||||
## this epoch.
|
||||
epoch: interval;
|
||||
|
||||
## The reducers for the SumStat
|
||||
reducers: set[Reducer];
|
||||
|
||||
## Provide a function to calculate a value from the
|
||||
## :bro:see:`Result` structure which will be used
|
||||
## for thresholding.
|
||||
## Provide a function to calculate a value from the
|
||||
## :bro:see:`Result` structure which will be used
|
||||
## for thresholding.
|
||||
## This is required if a $threshold value is given.
|
||||
threshold_val: function(key: SumStats::Key, result: SumStats::Result): count &optional;
|
||||
|
||||
## The threshold value for calling the
|
||||
## The threshold value for calling the
|
||||
## $threshold_crossed callback.
|
||||
threshold: count &optional;
|
||||
|
||||
## A series of thresholds for calling the
|
||||
|
||||
## A series of thresholds for calling the
|
||||
## $threshold_crossed callback.
|
||||
threshold_series: vector of count &optional;
|
||||
|
||||
## A callback that is called when a threshold is crossed.
|
||||
threshold_crossed: function(key: SumStats::Key, result: SumStats::Result) &optional;
|
||||
|
||||
## A callback with the full collection of Results for
|
||||
|
||||
## A callback with the full collection of Results for
|
||||
## this SumStat.
|
||||
epoch_finished: function(rt: SumStats::ResultTable) &optional;
|
||||
};
|
||||
|
||||
|
||||
## Create a summary statistic.
|
||||
global create: function(ss: SumStats::SumStat);
|
||||
|
||||
## Add data into an observation stream. This should be
|
||||
## Add data into an observation stream. This should be
|
||||
## called when a script has measured some point value.
|
||||
##
|
||||
## id: The observation stream identifier that the data
|
||||
|
@ -143,13 +144,13 @@ export {
|
|||
};
|
||||
|
||||
## This event is generated when thresholds are reset for a SumStat.
|
||||
##
|
||||
##
|
||||
## ssid: SumStats ID that thresholds were reset for.
|
||||
global thresholds_reset: event(ssid: string);
|
||||
|
||||
## Helper function to represent a :bro:type:`SumStats::Key` value as
|
||||
## Helper function to represent a :bro:type:`SumStats::Key` value as
|
||||
## a simple string.
|
||||
##
|
||||
##
|
||||
## key: The metric key that is to be converted into a string.
|
||||
##
|
||||
## Returns: A string representation of the metric key.
|
||||
|
@ -181,16 +182,17 @@ global result_store: table[string] of ResultTable = table();
|
|||
# Store of threshold information.
|
||||
global thresholds_store: table[string, Key] of bool = table();
|
||||
|
||||
# This is called whenever
|
||||
# key values are updated and the new val is given as the `val` argument.
|
||||
# It's only prototyped here because cluster and non-cluster have separate
|
||||
# implementations.
|
||||
# This is called whenever key values are updated and the new val is given as the
|
||||
# `val` argument. It's only prototyped here because cluster and non-cluster have
|
||||
# separate implementations.
|
||||
global data_added: function(ss: SumStat, key: Key, result: Result);
|
||||
|
||||
# Prototype the hook point for plugins to do calculations.
|
||||
global observe_hook: hook(r: Reducer, val: double, data: Observation, rv: ResultVal);
|
||||
|
||||
# Prototype the hook point for plugins to initialize any result values.
|
||||
global init_resultval_hook: hook(r: Reducer, rv: ResultVal);
|
||||
|
||||
# Prototype the hook point for plugins to merge Results.
|
||||
global compose_resultvals_hook: hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal);
|
||||
|
||||
|
@ -252,7 +254,7 @@ function compose_results(r1: Result, r2: Result): Result
|
|||
result[data_id] = r2[data_id];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -306,25 +308,25 @@ function observe(id: string, key: Key, obs: Observation)
|
|||
if ( r?$normalize_key )
|
||||
key = r$normalize_key(copy(key));
|
||||
|
||||
# If this reducer has a predicate, run the predicate
|
||||
# If this reducer has a predicate, run the predicate
|
||||
# and skip this key if the predicate return false.
|
||||
if ( r?$pred && ! r$pred(key, obs) )
|
||||
next;
|
||||
|
||||
|
||||
local ss = stats_store[r$sid];
|
||||
|
||||
|
||||
# If there is a threshold and no epoch_finished callback
|
||||
# we don't need to continue counting since the data will
|
||||
# never be accessed. This was leading
|
||||
# to some state management issues when measuring
|
||||
# to some state management issues when measuring
|
||||
# uniqueness.
|
||||
# NOTE: this optimization could need removed in the
|
||||
# NOTE: this optimization could need removed in the
|
||||
# future if on demand access is provided to the
|
||||
# SumStats results.
|
||||
if ( ! ss?$epoch_finished &&
|
||||
r$sid in threshold_tracker &&
|
||||
key in threshold_tracker[r$sid] &&
|
||||
( ss?$threshold &&
|
||||
( ss?$threshold &&
|
||||
threshold_tracker[r$sid][key]$is_threshold_crossed ) ||
|
||||
( ss?$threshold_series &&
|
||||
threshold_tracker[r$sid][key]$threshold_series_index+1 == |ss$threshold_series| ) )
|
||||
|
@ -356,7 +358,7 @@ function observe(id: string, key: Key, obs: Observation)
|
|||
}
|
||||
}
|
||||
|
||||
# This function checks if a threshold has been crossed. It is also used as a method to implement
|
||||
# This function checks if a threshold has been crossed. It is also used as a method to implement
|
||||
# mid-break-interval threshold crossing detection for cluster deployments.
|
||||
function check_thresholds(ss: SumStat, key: Key, result: Result, modify_pct: double): bool
|
||||
{
|
||||
|
@ -399,7 +401,7 @@ function check_thresholds(ss: SumStat, key: Key, result: Result, modify_pct: dou
|
|||
|ss$threshold_series| >= tt$threshold_series_index &&
|
||||
watch >= ss$threshold_series[tt$threshold_series_index] )
|
||||
{
|
||||
# A threshold series was given and the value crossed the next
|
||||
# A threshold series was given and the value crossed the next
|
||||
# value in the series.
|
||||
return T;
|
||||
}
|
||||
|
|
|
@ -15,8 +15,8 @@ event SumStats::finish_epoch(ss: SumStat)
|
|||
|
||||
schedule ss$epoch { SumStats::finish_epoch(ss) };
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
function data_added(ss: SumStat, key: Key, result: Result)
|
||||
{
|
||||
if ( check_thresholds(ss, key, result, 1.0) )
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
@load base/frameworks/sumstats
|
||||
@load base/frameworks/sumstats/main
|
||||
|
||||
module SumStats;
|
||||
|
||||
export {
|
||||
redef enum Calculation += {
|
||||
redef enum Calculation += {
|
||||
## Calculate the average of the values.
|
||||
AVERAGE
|
||||
};
|
||||
|
@ -33,4 +33,4 @@ hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal)
|
|||
result$average = rv1$average;
|
||||
else if ( rv2?$average )
|
||||
result$average = rv2$average;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
@load base/frameworks/sumstats
|
||||
@load base/frameworks/sumstats/main
|
||||
|
||||
module SumStats;
|
||||
|
||||
export {
|
||||
redef enum Calculation += {
|
||||
redef enum Calculation += {
|
||||
## Find the maximum value.
|
||||
MAX
|
||||
};
|
||||
|
@ -18,7 +18,7 @@ hook observe_hook(r: Reducer, val: double, obs: Observation, rv: ResultVal)
|
|||
{
|
||||
if ( MAX in r$apply )
|
||||
{
|
||||
if ( ! rv?$max )
|
||||
if ( ! rv?$max )
|
||||
rv$max = val;
|
||||
else if ( val > rv$max )
|
||||
rv$max = val;
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
@load base/frameworks/sumstats
|
||||
@load base/frameworks/sumstats/main
|
||||
|
||||
module SumStats;
|
||||
|
||||
export {
|
||||
redef enum Calculation += {
|
||||
redef enum Calculation += {
|
||||
## Find the minimum value.
|
||||
MIN
|
||||
};
|
||||
|
@ -18,7 +18,7 @@ hook observe_hook(r: Reducer, val: double, obs: Observation, rv: ResultVal)
|
|||
{
|
||||
if ( MIN in r$apply )
|
||||
{
|
||||
if ( ! rv?$min )
|
||||
if ( ! rv?$min )
|
||||
rv$min = val;
|
||||
else if ( val < rv$min )
|
||||
rv$min = val;
|
||||
|
@ -33,4 +33,4 @@ hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal)
|
|||
result$min = rv1$min;
|
||||
else if ( rv2?$min )
|
||||
result$min = rv2$min;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
@load base/frameworks/sumstats
|
||||
@load base/frameworks/sumstats/main
|
||||
@load base/utils/queue
|
||||
|
||||
module SumStats;
|
||||
|
@ -10,10 +10,8 @@ export {
|
|||
};
|
||||
|
||||
redef record ResultVal += {
|
||||
## This is the queue where samples
|
||||
## are maintained. Use the
|
||||
## :bro:see:`SumStats::get_samples` function
|
||||
## to get a vector of the samples.
|
||||
## This is the queue where samples are maintained. Use the
|
||||
## :bro:see:`SumStats::get_samples` function to get a vector of the samples.
|
||||
samples: Queue::Queue &optional;
|
||||
};
|
||||
|
||||
|
@ -48,4 +46,4 @@ hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal)
|
|||
result$samples = rv1$samples;
|
||||
else if ( rv2?$samples )
|
||||
result$samples = rv2$samples;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
@load base/frameworks/sumstats/main
|
||||
@load ./variance
|
||||
@load base/frameworks/sumstats
|
||||
|
||||
module SumStats;
|
||||
|
||||
export {
|
||||
redef enum Calculation += {
|
||||
redef enum Calculation += {
|
||||
## Find the standard deviation of the values.
|
||||
STD_DEV
|
||||
};
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
@load base/frameworks/sumstats
|
||||
@load base/frameworks/sumstats/main
|
||||
|
||||
module SumStats;
|
||||
|
||||
export {
|
||||
redef enum Calculation += {
|
||||
redef enum Calculation += {
|
||||
## Sums the values given. For string values,
|
||||
## this will be the number of strings given.
|
||||
SUM
|
||||
|
@ -48,4 +48,4 @@ hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal)
|
|||
if ( rv2?$sum )
|
||||
result$sum += rv2$sum;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
@load base/frameworks/sumstats
|
||||
@load base/frameworks/sumstats/main
|
||||
|
||||
module SumStats;
|
||||
|
||||
export {
|
||||
redef enum Calculation += {
|
||||
redef enum Calculation += {
|
||||
## Calculate the number of unique values.
|
||||
UNIQUE
|
||||
};
|
||||
|
@ -16,8 +16,8 @@ export {
|
|||
}
|
||||
|
||||
redef record ResultVal += {
|
||||
# Internal use only. This is not meant to be publically available
|
||||
# because we don't want to trust that we can inspect the values
|
||||
# Internal use only. This is not meant to be publically available
|
||||
# because we don't want to trust that we can inspect the values
|
||||
# since we will like move to a probalistic data structure in the future.
|
||||
# TODO: in the future this will optionally be a hyperloglog structure
|
||||
unique_vals: set[Observation] &optional;
|
||||
|
@ -27,7 +27,7 @@ hook observe_hook(r: Reducer, val: double, obs: Observation, rv: ResultVal)
|
|||
{
|
||||
if ( UNIQUE in r$apply )
|
||||
{
|
||||
if ( ! rv?$unique_vals )
|
||||
if ( ! rv?$unique_vals )
|
||||
rv$unique_vals=set();
|
||||
add rv$unique_vals[obs];
|
||||
rv$unique = |rv$unique_vals|;
|
||||
|
@ -40,7 +40,7 @@ hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal)
|
|||
{
|
||||
if ( rv1?$unique_vals )
|
||||
result$unique_vals = rv1$unique_vals;
|
||||
|
||||
|
||||
if ( rv2?$unique_vals )
|
||||
if ( ! result?$unique_vals )
|
||||
result$unique_vals = rv2$unique_vals;
|
||||
|
@ -50,4 +50,4 @@ hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal)
|
|||
|
||||
result$unique = |result$unique_vals|;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
@load base/frameworks/sumstats/main
|
||||
@load ./average
|
||||
@load base/frameworks/sumstats
|
||||
|
||||
module SumStats;
|
||||
|
||||
export {
|
||||
redef enum Calculation += {
|
||||
redef enum Calculation += {
|
||||
## Find the variance of the values.
|
||||
VARIANCE
|
||||
};
|
||||
|
@ -66,4 +66,4 @@ hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal)
|
|||
result$prev_avg = rv2$prev_avg;
|
||||
|
||||
calc_variance(result);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
##! Base SSH analysis script. The heuristic to blindly determine success or
|
||||
##! Base SSH analysis script. The heuristic to blindly determine success or
|
||||
##! failure for SSH connections is implemented here. At this time, it only
|
||||
##! uses the size of the data being returned from the server to make the
|
||||
##! heuristic determination about success of the connection.
|
||||
##! heuristic determination about success of the connection.
|
||||
##! Requires that :bro:id:`use_conn_size_analyzer` is set to T! The heuristic
|
||||
##! is not attempted if the connection size analyzer isn't enabled.
|
||||
|
||||
|
@ -17,7 +17,7 @@ module SSH;
|
|||
export {
|
||||
## The SSH protocol logging stream identifier.
|
||||
redef enum Log::ID += { LOG };
|
||||
|
||||
|
||||
type Info: record {
|
||||
## Time when the SSH connection began.
|
||||
ts: time &log;
|
||||
|
@ -26,9 +26,9 @@ export {
|
|||
## The connection's 4-tuple of endpoint addresses/ports.
|
||||
id: conn_id &log;
|
||||
## Indicates if the login was heuristically guessed to be "success",
|
||||
## "failure", or "undetermined".
|
||||
## "failure", or "undetermined".
|
||||
status: string &log &default="undetermined";
|
||||
## Direction of the connection. If the client was a local host
|
||||
## Direction of the connection. If the client was a local host
|
||||
## logging into an external host, this would be OUTBOUND. INBOUND
|
||||
## would be set for the opposite situation.
|
||||
# TODO: handle local-local and remote-remote better.
|
||||
|
@ -38,33 +38,33 @@ export {
|
|||
## Software string from the server.
|
||||
server: string &log &optional;
|
||||
## Amount of data returned from the server. This is currently
|
||||
## the only measure of the success heuristic and it is logged to
|
||||
## the only measure of the success heuristic and it is logged to
|
||||
## assist analysts looking at the logs to make their own determination
|
||||
## about the success on a case-by-case basis.
|
||||
resp_size: count &log &default=0;
|
||||
|
||||
|
||||
## Indicate if the SSH session is done being watched.
|
||||
done: bool &default=F;
|
||||
};
|
||||
|
||||
## The size in bytes of data sent by the server at which the SSH
|
||||
|
||||
## The size in bytes of data sent by the server at which the SSH
|
||||
## connection is presumed to be successful.
|
||||
const authentication_data_size = 4000 &redef;
|
||||
|
||||
|
||||
## If true, we tell the event engine to not look at further data
|
||||
## packets after the initial SSH handshake. Helps with performance
|
||||
## (especially with large file transfers) but precludes some
|
||||
## kinds of analyses.
|
||||
const skip_processing_after_detection = F &redef;
|
||||
|
||||
|
||||
## Event that is generated when the heuristic thinks that a login
|
||||
## was successful.
|
||||
global heuristic_successful_login: event(c: connection);
|
||||
|
||||
|
||||
## Event that is generated when the heuristic thinks that a login
|
||||
## failed.
|
||||
global heuristic_failed_login: event(c: connection);
|
||||
|
||||
|
||||
## Event that can be handled to access the :bro:type:`SSH::Info`
|
||||
## record as it is sent on to the logging framework.
|
||||
global log_ssh: event(rec: Info);
|
||||
|
@ -102,21 +102,21 @@ function check_ssh_connection(c: connection, done: bool)
|
|||
# If already done watching this connection, just return.
|
||||
if ( c$ssh$done )
|
||||
return;
|
||||
|
||||
|
||||
if ( done )
|
||||
{
|
||||
# If this connection is done, then we can look to see if
|
||||
# If this connection is done, then we can look to see if
|
||||
# this matches the conditions for a failed login. Failed
|
||||
# logins are only detected at connection state removal.
|
||||
|
||||
if ( # Require originators to have sent at least 50 bytes.
|
||||
if ( # Require originators to have sent at least 50 bytes.
|
||||
c$orig$size > 50 &&
|
||||
# Responders must be below 4000 bytes.
|
||||
c$resp$size < 4000 &&
|
||||
c$resp$size < 4000 &&
|
||||
# Responder must have sent fewer than 40 packets.
|
||||
c$resp$num_pkts < 40 &&
|
||||
# If there was a content gap we can't reliably do this heuristic.
|
||||
c?$conn && c$conn$missed_bytes == 0)# &&
|
||||
c?$conn && c$conn$missed_bytes == 0)# &&
|
||||
# Only "normal" connections can count.
|
||||
#c$conn?$conn_state && c$conn$conn_state in valid_states )
|
||||
{
|
||||
|
@ -147,13 +147,13 @@ function check_ssh_connection(c: connection, done: bool)
|
|||
|
||||
# Set the direction for the log.
|
||||
c$ssh$direction = Site::is_local_addr(c$id$orig_h) ? OUTBOUND : INBOUND;
|
||||
|
||||
|
||||
# Set the "done" flag to prevent the watching event from rescheduling
|
||||
# after detection is done.
|
||||
c$ssh$done=T;
|
||||
|
||||
Log::write(SSH::LOG, c$ssh);
|
||||
|
||||
|
||||
if ( skip_processing_after_detection )
|
||||
{
|
||||
# Stop watching this connection, we don't care about it anymore.
|
||||
|
@ -186,12 +186,12 @@ event ssh_server_version(c: connection, version: string) &priority=5
|
|||
set_session(c);
|
||||
c$ssh$server = version;
|
||||
}
|
||||
|
||||
|
||||
event ssh_client_version(c: connection, version: string) &priority=5
|
||||
{
|
||||
set_session(c);
|
||||
c$ssh$client = version;
|
||||
|
||||
|
||||
# The heuristic detection for SSH relies on the ConnSize analyzer.
|
||||
# Don't do the heuristics if it's disabled.
|
||||
if ( use_conn_size_analyzer )
|
||||
|
|
|
@ -6,7 +6,7 @@ export {
|
|||
## Settings for initializing the queue.
|
||||
type Settings: record {
|
||||
## If a maximum length is set for the queue
|
||||
## it will maintain itself at that
|
||||
## it will maintain itself at that
|
||||
## maximum length automatically.
|
||||
max_len: count &optional;
|
||||
};
|
||||
|
@ -15,17 +15,17 @@ export {
|
|||
type Queue: record {};
|
||||
|
||||
## Initialize a queue record structure.
|
||||
##
|
||||
##
|
||||
## s: A :bro:record:`Settings` record configuring the queue.
|
||||
##
|
||||
## Returns: An opaque queue record.
|
||||
global init: function(s: Settings): Queue;
|
||||
|
||||
## Put a string onto the beginning of a queue.
|
||||
##
|
||||
##
|
||||
## q: The queue to put the value into.
|
||||
##
|
||||
## val: The value to insert into the queue.
|
||||
##
|
||||
## val: The value to insert into the queue.
|
||||
global put: function(q: Queue, val: any);
|
||||
|
||||
## Get a string from the end of a queue.
|
||||
|
@ -35,29 +35,29 @@ export {
|
|||
## Returns: The value gotten from the queue.
|
||||
global get: function(q: Queue): any;
|
||||
|
||||
## Merge two queue's together. If any settings are applied
|
||||
## Merge two queue's together. If any settings are applied
|
||||
## to the queues, the settings from q1 are used for the new
|
||||
## merged queue.
|
||||
##
|
||||
##
|
||||
## q1: The first queue. Settings are taken from here.
|
||||
##
|
||||
## q2: The second queue.
|
||||
##
|
||||
##
|
||||
## Returns: A new queue from merging the other two together.
|
||||
global merge: function(q1: Queue, q2: Queue): Queue;
|
||||
|
||||
## Get the number of items in a queue.
|
||||
##
|
||||
##
|
||||
## q: The queue.
|
||||
##
|
||||
## Returns: The length of the queue.
|
||||
global len: function(q: Queue): count;
|
||||
|
||||
|
||||
## Get the contents of the queue as a vector.
|
||||
##
|
||||
##
|
||||
## q: The queue.
|
||||
##
|
||||
## ret: A vector containing the
|
||||
## ret: A vector containing the
|
||||
## current contents of q as the type of ret.
|
||||
global get_vector: function(q: Queue, ret: vector of any);
|
||||
|
||||
|
@ -130,7 +130,7 @@ function get_vector(q: Queue, ret: vector of any)
|
|||
local i = q$bottom;
|
||||
local j = 0;
|
||||
# Really dumb hack, this is only to provide
|
||||
# the iteration for the correct number of
|
||||
# the iteration for the correct number of
|
||||
# values in q$vals.
|
||||
for ( ignored_val in q$vals )
|
||||
{
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
|
||||
## Given an interval, returns a string of the form 3m34s to
|
||||
## give a minimalized human readable string for the minutes
|
||||
## give a minimalized human readable string for the minutes
|
||||
## and seconds represented by the interval.
|
||||
function duration_to_mins_secs(dur: interval): string
|
||||
{
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue