mirror of
https://github.com/zeek/zeek.git
synced 2025-10-16 21:48:21 +00:00
Checkpoint for SumStats rename.
This commit is contained in:
parent
8165d6077d
commit
fbe967e16a
32 changed files with 626 additions and 620 deletions
|
@ -7,7 +7,7 @@
|
|||
@load base/frameworks/cluster
|
||||
@load ./main
|
||||
|
||||
module Measurement;
|
||||
module SumStats;
|
||||
|
||||
export {
|
||||
## Allows a user to decide how large of result groups the
|
||||
|
@ -48,22 +48,21 @@ export {
|
|||
global cluster_key_request: event(uid: string, mid: string, key: Key);
|
||||
|
||||
## This event is sent by nodes in response to a
|
||||
## :bro:id:`Measurement::cluster_key_request` event.
|
||||
## :bro:id:`SumStats::cluster_key_request` event.
|
||||
global cluster_key_response: event(uid: string, mid: string, key: Key, result: Result);
|
||||
|
||||
## This is sent by workers to indicate that they crossed the percent of the
|
||||
## current threshold by the percentage defined globally in
|
||||
## :bro:id:`Measurement::cluster_request_global_view_percent`
|
||||
global cluster_key_intermediate_response: event(mid: string, key: Measurement::Key);
|
||||
## :bro:id:`SumStats::cluster_request_global_view_percent`
|
||||
global cluster_key_intermediate_response: event(mid: string, key: SumStats::Key);
|
||||
|
||||
## This event is scheduled internally on workers to send result chunks.
|
||||
global send_data: event(uid: string, mid: string, data: ResultTable);
|
||||
}
|
||||
|
||||
# Add events to the cluster framework to make this work.
|
||||
redef Cluster::manager2worker_events += /Measurement::cluster_(measurement_request|key_request)/;
|
||||
redef Cluster::manager2worker_events += /Measurement::new_measurement/;
|
||||
redef Cluster::worker2manager_events += /Measurement::cluster_(measurement_response|key_response|key_intermediate_response)/;
|
||||
redef Cluster::manager2worker_events += /SumStats::cluster_(measurement_request|key_request)/;
|
||||
redef Cluster::worker2manager_events += /SumStats::cluster_(measurement_response|key_response|key_intermediate_response)/;
|
||||
|
||||
@if ( Cluster::local_node_type() != Cluster::MANAGER )
|
||||
# This variable is maintained to know what keys have recently sent as
|
||||
|
@ -75,32 +74,32 @@ global recent_global_view_keys: table[string, Key] of count &create_expire=1min
|
|||
event bro_init() &priority=-100
|
||||
{
|
||||
# The manager is the only host allowed to track these.
|
||||
measurement_store = table();
|
||||
stats_store = table();
|
||||
reducer_store = table();
|
||||
}
|
||||
|
||||
# This is done on all non-manager node types in the event that a metric is
|
||||
# being collected somewhere other than a worker.
|
||||
function data_added(m: Measurement, key: Key, result: Result)
|
||||
function data_added(ss: SumStat, key: Key, result: Result)
|
||||
{
|
||||
# If an intermediate update for this value was sent recently, don't send
|
||||
# it again.
|
||||
if ( [m$id, key] in recent_global_view_keys )
|
||||
if ( [ss$id, key] in recent_global_view_keys )
|
||||
return;
|
||||
|
||||
# If val is 5 and global view % is 0.1 (10%), pct_val will be 50. If that
|
||||
# crosses the full threshold then it's a candidate to send as an
|
||||
# intermediate update.
|
||||
if ( enable_intermediate_updates &&
|
||||
check_thresholds(m, key, result, cluster_request_global_view_percent) )
|
||||
check_thresholds(ss, key, result, cluster_request_global_view_percent) )
|
||||
{
|
||||
# kick off intermediate update
|
||||
event Measurement::cluster_key_intermediate_response(m$id, key);
|
||||
++recent_global_view_keys[m$id, key];
|
||||
event SumStats::cluster_key_intermediate_response(ss$id, key);
|
||||
++recent_global_view_keys[ss$id, key];
|
||||
}
|
||||
}
|
||||
|
||||
event Measurement::send_data(uid: string, mid: string, data: ResultTable)
|
||||
event SumStats::send_data(uid: string, mid: string, data: ResultTable)
|
||||
{
|
||||
#print fmt("WORKER %s: sending data for uid %s...", Cluster::node, uid);
|
||||
|
||||
|
@ -122,39 +121,39 @@ event Measurement::send_data(uid: string, mid: string, data: ResultTable)
|
|||
if ( |data| == 0 )
|
||||
done = T;
|
||||
|
||||
event Measurement::cluster_measurement_response(uid, mid, local_data, done);
|
||||
event SumStats::cluster_measurement_response(uid, mid, local_data, done);
|
||||
if ( ! done )
|
||||
schedule 0.01 sec { Measurement::send_data(uid, mid, data) };
|
||||
schedule 0.01 sec { SumStats::send_data(uid, mid, data) };
|
||||
}
|
||||
|
||||
event Measurement::cluster_measurement_request(uid: string, mid: string)
|
||||
event SumStats::cluster_measurement_request(uid: string, mid: string)
|
||||
{
|
||||
#print fmt("WORKER %s: received the cluster_measurement_request event for %s.", Cluster::node, id);
|
||||
|
||||
# Initiate sending all of the data for the requested measurement.
|
||||
if ( mid in result_store )
|
||||
event Measurement::send_data(uid, mid, result_store[mid]);
|
||||
event SumStats::send_data(uid, mid, result_store[mid]);
|
||||
else
|
||||
event Measurement::send_data(uid, mid, table());
|
||||
event SumStats::send_data(uid, mid, table());
|
||||
|
||||
# Lookup the actual measurement and reset it, the reference to the data
|
||||
# currently stored will be maintained internally by the send_data event.
|
||||
if ( mid in measurement_store )
|
||||
reset(measurement_store[mid]);
|
||||
if ( mid in stats_store )
|
||||
reset(stats_store[mid]);
|
||||
}
|
||||
|
||||
event Measurement::cluster_key_request(uid: string, mid: string, key: Key)
|
||||
event SumStats::cluster_key_request(uid: string, mid: string, key: Key)
|
||||
{
|
||||
if ( mid in result_store && key in result_store[mid] )
|
||||
{
|
||||
#print fmt("WORKER %s: received the cluster_key_request event for %s=%s.", Cluster::node, key2str(key), data);
|
||||
event Measurement::cluster_key_response(uid, mid, key, result_store[mid][key]);
|
||||
event SumStats::cluster_key_response(uid, mid, key, result_store[mid][key]);
|
||||
}
|
||||
else
|
||||
{
|
||||
# We need to send an empty response if we don't have the data so that the manager
|
||||
# can know that it heard back from all of the workers.
|
||||
event Measurement::cluster_key_response(uid, mid, key, table());
|
||||
event SumStats::cluster_key_response(uid, mid, key, table());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -166,7 +165,7 @@ event Measurement::cluster_key_request(uid: string, mid: string, key: Key)
|
|||
# This variable is maintained by manager nodes as they collect and aggregate
|
||||
# results.
|
||||
# Index on a uid.
|
||||
global measurement_results: table[string] of ResultTable &read_expire=1min;
|
||||
global stats_results: table[string] of ResultTable &read_expire=1min;
|
||||
|
||||
# This variable is maintained by manager nodes to track how many "dones" they
|
||||
# collected per collection unique id. Once the number of results for a uid
|
||||
|
@ -189,7 +188,7 @@ global outstanding_global_views: table[string] of count &default=0;
|
|||
|
||||
const zero_time = double_to_time(0.0);
|
||||
# Managers handle logging.
|
||||
event Measurement::finish_epoch(m: Measurement)
|
||||
event SumStats::finish_epoch(ss: SumStat)
|
||||
{
|
||||
if ( network_time() > zero_time )
|
||||
{
|
||||
|
@ -198,25 +197,25 @@ event Measurement::finish_epoch(m: Measurement)
|
|||
|
||||
if ( uid in measurement_results )
|
||||
delete measurement_results[uid];
|
||||
measurement_results[uid] = table();
|
||||
stats_results[uid] = table();
|
||||
|
||||
# Request data from peers.
|
||||
event Measurement::cluster_measurement_request(uid, m$id);
|
||||
event SumStats::cluster_measurement_request(uid, ss$id);
|
||||
}
|
||||
|
||||
# Schedule the next finish_epoch event.
|
||||
schedule m$epoch { Measurement::finish_epoch(m) };
|
||||
schedule m$epoch { SumStats::finish_epoch(m) };
|
||||
}
|
||||
|
||||
# This is unlikely to be called often, but it's here in case there are measurements
|
||||
# being collected by managers.
|
||||
function data_added(m: Measurement, key: Key, result: Result)
|
||||
function data_added(ss: SumStat, key: Key, result: Result)
|
||||
{
|
||||
if ( check_thresholds(m, key, result, 1.0) )
|
||||
threshold_crossed(m, key, result);
|
||||
if ( check_thresholds(ss, key, result, 1.0) )
|
||||
threshold_crossed(ss, key, result);
|
||||
}
|
||||
|
||||
event Measurement::cluster_key_response(uid: string, mid: string, key: Key, result: Result)
|
||||
event SumStats::cluster_key_response(uid: string, ssid: string, key: Key, result: Result)
|
||||
{
|
||||
#print fmt("%0.6f MANAGER: receiving key data from %s - %s=%s", network_time(), get_event_peer()$descr, key2str(key), result);
|
||||
|
||||
|
@ -233,26 +232,26 @@ event Measurement::cluster_key_response(uid: string, mid: string, key: Key, resu
|
|||
#print fmt("worker_count:%d :: done_with:%d", Cluster::worker_count, done_with[uid]);
|
||||
if ( Cluster::worker_count == done_with[uid] )
|
||||
{
|
||||
local m = measurement_store[mid];
|
||||
local ss = stats_store[ssid];
|
||||
local ir = key_requests[uid];
|
||||
if ( check_thresholds(m, key, ir, 1.0) )
|
||||
threshold_crossed(m, key, ir);
|
||||
if ( check_thresholds(ss, key, ir, 1.0) )
|
||||
threshold_crossed(ss, key, ir);
|
||||
|
||||
delete done_with[uid];
|
||||
delete key_requests[uid];
|
||||
# Check that there is an outstanding view before subtracting.
|
||||
if ( outstanding_global_views[mid] > 0 )
|
||||
--outstanding_global_views[mid];
|
||||
if ( outstanding_global_views[ssid] > 0 )
|
||||
--outstanding_global_views[ssid];
|
||||
}
|
||||
}
|
||||
|
||||
# Managers handle intermediate updates here.
|
||||
event Measurement::cluster_key_intermediate_response(mid: string, key: Key)
|
||||
event SumStats::cluster_key_intermediate_response(ssid: string, key: Key)
|
||||
{
|
||||
#print fmt("MANAGER: receiving intermediate key data from %s", get_event_peer()$descr);
|
||||
#print fmt("MANAGER: requesting key data for %s", key2str(key));
|
||||
|
||||
if ( mid in outstanding_global_views &&
|
||||
if ( ssid in outstanding_global_views &&
|
||||
|outstanding_global_views[mid]| > max_outstanding_global_views )
|
||||
{
|
||||
# Don't do this intermediate update. Perhaps at some point in the future
|
||||
|
@ -261,13 +260,13 @@ event Measurement::cluster_key_intermediate_response(mid: string, key: Key)
|
|||
return;
|
||||
}
|
||||
|
||||
++outstanding_global_views[mid];
|
||||
++outstanding_global_views[ssid];
|
||||
|
||||
local uid = unique_id("");
|
||||
event Measurement::cluster_key_request(uid, mid, key);
|
||||
event SumStats::cluster_key_request(uid, ssid, key);
|
||||
}
|
||||
|
||||
event Measurement::cluster_measurement_response(uid: string, mid: string, data: ResultTable, done: bool)
|
||||
event SumStats::cluster_measurement_response(uid: string, ssid: string, data: ResultTable, done: bool)
|
||||
{
|
||||
#print fmt("MANAGER: receiving results from %s", get_event_peer()$descr);
|
||||
|
||||
|
@ -275,8 +274,8 @@ event Measurement::cluster_measurement_response(uid: string, mid: string, data:
|
|||
if ( done )
|
||||
++done_with[uid];
|
||||
|
||||
local local_data = measurement_results[uid];
|
||||
local m = measurement_store[mid];
|
||||
local local_data = stats_results[uid];
|
||||
local ss = stats_store[ssid];
|
||||
|
||||
for ( key in data )
|
||||
{
|
||||
|
@ -285,14 +284,14 @@ event Measurement::cluster_measurement_response(uid: string, mid: string, data:
|
|||
else
|
||||
local_data[key] = data[key];
|
||||
|
||||
# If a measurement is done being collected, thresholds for each key
|
||||
# need to be checked so we're doing it here to avoid doubly iterating
|
||||
# over each key.
|
||||
# If a stat is done being collected, thresholds for each key
|
||||
# need to be checked so we're doing it here to avoid doubly
|
||||
# iterating over each key.
|
||||
if ( Cluster::worker_count == done_with[uid] )
|
||||
{
|
||||
if ( check_thresholds(m, key, local_data[key], 1.0) )
|
||||
if ( check_thresholds(ss, key, local_data[key], 1.0) )
|
||||
{
|
||||
threshold_crossed(m, key, local_data[key]);
|
||||
threshold_crossed(ss, key, local_data[key]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -300,20 +299,20 @@ event Measurement::cluster_measurement_response(uid: string, mid: string, data:
|
|||
# If the data has been collected from all peers, we are done and ready to finish.
|
||||
if ( Cluster::worker_count == done_with[uid] )
|
||||
{
|
||||
if ( m?$epoch_finished )
|
||||
m$epoch_finished(local_data);
|
||||
if ( ss?$epoch_finished )
|
||||
ss$epoch_finished(local_data);
|
||||
|
||||
# Clean up
|
||||
delete measurement_results[uid];
|
||||
delete stats_results[uid];
|
||||
delete done_with[uid];
|
||||
# Not sure I need to reset the measurement on the manager.
|
||||
reset(m);
|
||||
reset(ss);
|
||||
}
|
||||
}
|
||||
|
||||
event remote_connection_handshake_done(p: event_peer) &priority=5
|
||||
{
|
||||
send_id(p, "Measurement::measurement_store");
|
||||
send_id(p, "Measurement::reducer_store");
|
||||
send_id(p, "SumStats::stats_store");
|
||||
send_id(p, "SumStats::reducer_store");
|
||||
}
|
||||
@endif
|
|
@ -1,6 +1,8 @@
|
|||
##! The measurement framework provides a way to count and measure data.
|
||||
##! The summary statistics framework provides a way to
|
||||
##! summarize large streams of data into simple reduced
|
||||
##! measurements.
|
||||
|
||||
module Measurement;
|
||||
module SumStats;
|
||||
|
||||
export {
|
||||
## The various calculations are all defined as plugins.
|
||||
|
@ -8,14 +10,17 @@ export {
|
|||
PLACEHOLDER
|
||||
};
|
||||
|
||||
## Represents a thing which is having measurement results collected for it.
|
||||
## Represents a thing which is having summarization
|
||||
## results collected for it.
|
||||
type Key: record {
|
||||
## A non-address related measurement or a sub-key for an address based measurement.
|
||||
## An example might be successful SSH connections by client IP address
|
||||
## A non-address related summarization or a sub-key for
|
||||
## an address based summarization. An example might be
|
||||
## successful SSH connections by client IP address
|
||||
## where the client string would be the key value.
|
||||
## Another example might be number of HTTP requests to a particular
|
||||
## value in a Host header. This is an example of a non-host based
|
||||
## metric since multiple IP addresses could respond for the same Host
|
||||
## Another example might be number of HTTP requests to
|
||||
## a particular value in a Host header. This is an
|
||||
## example of a non-host based metric since multiple
|
||||
## IP addresses could respond for the same Host
|
||||
## header value.
|
||||
str: string &optional;
|
||||
|
||||
|
@ -23,9 +28,9 @@ export {
|
|||
host: addr &optional;
|
||||
};
|
||||
|
||||
## Represents data being added for a single metric data point.
|
||||
## Only supply a single value here at a time.
|
||||
type DataPoint: record {
|
||||
## Represents data being added for a single observation.
|
||||
## Only supply a single field at a time!
|
||||
type Observation: record {
|
||||
## Count value.
|
||||
num: count &optional;
|
||||
## Double value.
|
||||
|
@ -35,102 +40,110 @@ export {
|
|||
};
|
||||
|
||||
type Reducer: record {
|
||||
## Data stream identifier for the reducer to attach to.
|
||||
## Observation stream identifier for the reducer
|
||||
## to attach to.
|
||||
stream: string;
|
||||
|
||||
## The calculations to perform on the data points.
|
||||
apply: set[Calculation];
|
||||
|
||||
## A predicate so that you can decide per key if you would like
|
||||
## to accept the data being inserted.
|
||||
pred: function(key: Measurement::Key, point: Measurement::DataPoint): bool &optional;
|
||||
## A predicate so that you can decide per key if you
|
||||
## would like to accept the data being inserted.
|
||||
pred: function(key: SumStats::Key, obs: SumStats::Observation): bool &optional;
|
||||
|
||||
## A function to normalize the key. This can be used to aggregate or
|
||||
## normalize the entire key.
|
||||
normalize_key: function(key: Measurement::Key): Key &optional;
|
||||
normalize_key: function(key: SumStats::Key): Key &optional;
|
||||
};
|
||||
|
||||
## Value calculated for a data point stream fed into a reducer.
|
||||
## Value calculated for an observation stream fed into a reducer.
|
||||
## Most of the fields are added by plugins.
|
||||
type ResultVal: record {
|
||||
## The time when the first data point was added to this result value.
|
||||
## The time when the first observation was added to
|
||||
## this result value.
|
||||
begin: time;
|
||||
|
||||
## The time when the last data point was added to this result value.
|
||||
## The time when the last observation was added to
|
||||
## this result value.
|
||||
end: time;
|
||||
|
||||
## The number of measurements received.
|
||||
## The number of observations received.
|
||||
num: count &default=0;
|
||||
};
|
||||
|
||||
## Type to store results for multiple reducers.
|
||||
type Result: table[string] of ResultVal;
|
||||
|
||||
## Type to store a table of measurement results indexed by the measurement key.
|
||||
## Type to store a table of sumstats results indexed
|
||||
## by keys.
|
||||
type ResultTable: table[Key] of Result;
|
||||
|
||||
## Measurements represent an aggregation of reducers along with
|
||||
## SumStats represent an aggregation of reducers along with
|
||||
## mechanisms to handle various situations like the epoch ending
|
||||
## or thresholds being crossed.
|
||||
type Measurement: record {
|
||||
## The interval at which this filter should be "broken" and the
|
||||
## '$epoch_finished' callback called. The results are also reset
|
||||
## at this time so any threshold based detection needs to be set to a
|
||||
## number that should be expected to happen within this epoch.
|
||||
## It's best to not access any global state outside
|
||||
## of the variables given to the callbacks because there
|
||||
## is no assurance provided as to where the callbacks
|
||||
## will be executed on clusters.
|
||||
type SumStat: record {
|
||||
## The interval at which this filter should be "broken"
|
||||
## and the '$epoch_finished' callback called. The
|
||||
## results are also reset at this time so any threshold
|
||||
## based detection needs to be set to a
|
||||
## value that should be expected to happen within
|
||||
## this epoch.
|
||||
epoch: interval;
|
||||
|
||||
## The reducers for the measurement indexed by data id.
|
||||
## The reducers for the SumStat
|
||||
reducers: set[Reducer];
|
||||
|
||||
## Provide a function to calculate a value from the :bro:see:`Result`
|
||||
## structure which will be used for thresholding.
|
||||
threshold_val: function(key: Measurement::Key, result: Measurement::Result): count &optional;
|
||||
## Provide a function to calculate a value from the
|
||||
## :bro:see:`Result` structure which will be used
|
||||
## for thresholding.
|
||||
## This is required if a $threshold value is given.
|
||||
threshold_val: function(key: SumStats::Key, result: SumStats::Result): count &optional;
|
||||
|
||||
## The threshold value for calling the $threshold_crossed callback.
|
||||
## The threshold value for calling the
|
||||
## $threshold_crossed callback.
|
||||
threshold: count &optional;
|
||||
|
||||
## A series of thresholds for calling the $threshold_crossed callback.
|
||||
## A series of thresholds for calling the
|
||||
## $threshold_crossed callback.
|
||||
threshold_series: vector of count &optional;
|
||||
|
||||
## A callback that is called when a threshold is crossed.
|
||||
threshold_crossed: function(key: Measurement::Key, result: Measurement::Result) &optional;
|
||||
threshold_crossed: function(key: SumStats::Key, result: SumStats::Result) &optional;
|
||||
|
||||
## A callback with the full collection of Results for this filter.
|
||||
## It's best to not access any global state outside of the variables
|
||||
## given to the callback because there is no assurance provided as to
|
||||
## where the callback will be executed on clusters.
|
||||
epoch_finished: function(rt: Measurement::ResultTable) &optional;
|
||||
## A callback with the full collection of Results for
|
||||
## this SumStat.
|
||||
epoch_finished: function(rt: SumStats::ResultTable) &optional;
|
||||
};
|
||||
|
||||
## Create a measurement.
|
||||
global create: function(m: Measurement::Measurement);
|
||||
## Create a summary statistic.
|
||||
global create: function(m: SumStats::SumStat);
|
||||
|
||||
## Add data into a data point stream. This should be called when
|
||||
## a script has measured some point value.
|
||||
## Add data into an observation stream. This should be
|
||||
## called when a script has measured some point value.
|
||||
##
|
||||
## id: The stream identifier that the data point represents.
|
||||
## id: The observation stream identifier that the data
|
||||
## point represents.
|
||||
##
|
||||
## key: The measurement key that the value is to be added to.
|
||||
## key: The key that the value is related to.
|
||||
##
|
||||
## point: The data point to send into the stream.
|
||||
global add_data: function(id: string, key: Measurement::Key, point: Measurement::DataPoint);
|
||||
## obs: The data point to send into the stream.
|
||||
global observe: function(id: string, key: SumStats::Key, obs: SumStats::Observation);
|
||||
|
||||
## Helper function to represent a :bro:type:`Measurement::Key` value as
|
||||
## Helper function to represent a :bro:type:`SumStats::Key` value as
|
||||
## a simple string.
|
||||
##
|
||||
## key: The metric key that is to be converted into a string.
|
||||
##
|
||||
## Returns: A string representation of the metric key.
|
||||
global key2str: function(key: Measurement::Key): string;
|
||||
|
||||
## This event is generated for each new measurement that is created.
|
||||
##
|
||||
## m: The record which describes a measurement.
|
||||
global new_measurement: event(m: Measurement);
|
||||
global key2str: function(key: SumStats::Key): string;
|
||||
}
|
||||
|
||||
redef record Reducer += {
|
||||
# Internal use only. Provides a reference back to the related Measurement by it's ID.
|
||||
# Internal use only. Provides a reference back to the related SumStats by it's ID.
|
||||
mid: string &optional;
|
||||
};
|
||||
|
||||
|
@ -142,16 +155,16 @@ type Thresholding: record {
|
|||
threshold_series_index: count &default=0;
|
||||
};
|
||||
|
||||
# Internal use only. For tracking thresholds per measurement and key.
|
||||
# Internal use only. For tracking thresholds per sumstat and key.
|
||||
global threshold_tracker: table[string] of table[Key] of Thresholding &optional;
|
||||
|
||||
redef record Measurement += {
|
||||
redef record SumStats += {
|
||||
# Internal use only (mostly for cluster coherency).
|
||||
id: string &optional;
|
||||
};
|
||||
|
||||
# Store of measurements indexed on the measurement id.
|
||||
global measurement_store: table[string] of Measurement = table();
|
||||
# Store of sumstats indexed on the sumstat id.
|
||||
global stats_store: table[string] of SumStats = table();
|
||||
|
||||
# Store of reducers indexed on the data point stream id.
|
||||
global reducer_store: table[string] of set[Reducer] = table();
|
||||
|
@ -166,10 +179,10 @@ global thresholds_store: table[string, Key] of bool = table();
|
|||
# key values are updated and the new val is given as the `val` argument.
|
||||
# It's only prototyped here because cluster and non-cluster have separate
|
||||
# implementations.
|
||||
global data_added: function(m: Measurement, key: Key, result: Result);
|
||||
global data_added: function(m: SumStats, key: Key, result: Result);
|
||||
|
||||
# Prototype the hook point for plugins to do calculations.
|
||||
global add_to_reducer_hook: hook(r: Reducer, val: double, data: DataPoint, rv: ResultVal);
|
||||
global add_to_reducer_hook: hook(r: Reducer, val: double, data: Observation, rv: ResultVal);
|
||||
# Prototype the hook point for plugins to initialize any result values.
|
||||
global init_resultval_hook: hook(r: Reducer, rv: ResultVal);
|
||||
# Prototype the hook point for plugins to merge Results.
|
||||
|
@ -177,7 +190,7 @@ global compose_resultvals_hook: hook(result: ResultVal, rv1: ResultVal, rv2: Res
|
|||
|
||||
# Event that is used to "finish" measurements and adapt the measurement
|
||||
# framework for clustered or non-clustered usage.
|
||||
global finish_epoch: event(m: Measurement);
|
||||
global finish_epoch: event(m: SumStats);
|
||||
|
||||
function key2str(key: Key): string
|
||||
{
|
||||
|
@ -186,7 +199,7 @@ function key2str(key: Key): string
|
|||
out = fmt("%shost=%s", out, key$host);
|
||||
if ( key?$str )
|
||||
out = fmt("%s%sstr=%s", out, |out|==0 ? "" : ", ", key$str);
|
||||
return fmt("measurement_key(%s)", out);
|
||||
return fmt("sumstats_key(%s)", out);
|
||||
}
|
||||
|
||||
function init_resultval(r: Reducer): ResultVal
|
||||
|
@ -200,17 +213,12 @@ function compose_resultvals(rv1: ResultVal, rv2: ResultVal): ResultVal
|
|||
{
|
||||
local result: ResultVal;
|
||||
|
||||
# Merge $begin (take the earliest one)
|
||||
result$begin = (rv1$begin < rv2$begin) ? rv1$begin : rv2$begin;
|
||||
|
||||
# Merge $end (take the latest one)
|
||||
result$end = (rv1$end > rv2$end) ? rv1$end : rv2$end;
|
||||
|
||||
# Merge $num
|
||||
result$num = rv1$num + rv2$num;
|
||||
|
||||
# Run the plugin composition hooks.
|
||||
hook compose_resultvals_hook(result, rv1, rv2);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -243,59 +251,59 @@ function compose_results(r1: Result, r2: Result): Result
|
|||
}
|
||||
|
||||
|
||||
function reset(m: Measurement)
|
||||
function reset(ss: SumStat)
|
||||
{
|
||||
if ( m$id in result_store )
|
||||
delete result_store[m$id];
|
||||
if ( ss$id in result_store )
|
||||
delete result_store[ss$id];
|
||||
|
||||
result_store[m$id] = table();
|
||||
threshold_tracker[m$id] = table();
|
||||
result_store[ss$id] = table();
|
||||
threshold_tracker[ss$id] = table();
|
||||
}
|
||||
|
||||
function create(m: Measurement)
|
||||
function create(ss: SumStat)
|
||||
{
|
||||
if ( (m?$threshold || m?$threshold_series) && ! m?$threshold_val )
|
||||
if ( (ss?$threshold || ss?$threshold_series) && ! ss?$threshold_val )
|
||||
{
|
||||
Reporter::error("Measurement given a threshold with no $threshold_val function");
|
||||
Reporter::error("SumStats given a threshold with no $threshold_val function");
|
||||
}
|
||||
|
||||
if ( ! m?$id )
|
||||
m$id=unique_id("");
|
||||
threshold_tracker[m$id] = table();
|
||||
measurement_store[m$id] = m;
|
||||
if ( ! ss?$id )
|
||||
ss$id=unique_id("");
|
||||
threshold_tracker[ss$id] = table();
|
||||
stats_store[ss$id] = ss;
|
||||
|
||||
for ( reducer in m$reducers )
|
||||
for ( reducer in ss$reducers )
|
||||
{
|
||||
reducer$mid = m$id;
|
||||
reducer$mid = ss$id;
|
||||
if ( reducer$stream !in reducer_store )
|
||||
reducer_store[reducer$stream] = set();
|
||||
add reducer_store[reducer$stream][reducer];
|
||||
}
|
||||
|
||||
reset(m);
|
||||
schedule m$epoch { Measurement::finish_epoch(m) };
|
||||
reset(ss);
|
||||
schedule ss$epoch { SumStats::finish_epoch(ss) };
|
||||
}
|
||||
|
||||
function add_data(id: string, key: Key, point: DataPoint)
|
||||
function observe(id: string, key: Key, obs: Observation)
|
||||
{
|
||||
# Try to add the data to all of the defined reducers.
|
||||
if ( id !in reducer_store )
|
||||
return;
|
||||
|
||||
# Try to add the data to all of the defined reducers.
|
||||
for ( r in reducer_store[id] )
|
||||
{
|
||||
# If this reducer has a predicate, run the predicate
|
||||
# and skip this key if the predicate return false.
|
||||
if ( r?$pred && ! r$pred(key, point) )
|
||||
if ( r?$pred && ! r$pred(key, obs) )
|
||||
next;
|
||||
|
||||
if ( r?$normalize_key )
|
||||
key = r$normalize_key(copy(key));
|
||||
|
||||
local m = measurement_store[r$mid];
|
||||
local ss = stats_store[r$mid];
|
||||
|
||||
if ( r$mid !in result_store )
|
||||
result_store[m$id] = table();
|
||||
result_store[ss$id] = table();
|
||||
local results = result_store[r$mid];
|
||||
|
||||
if ( key !in results )
|
||||
|
@ -312,56 +320,56 @@ function add_data(id: string, key: Key, point: DataPoint)
|
|||
|
||||
# If a string was given, fall back to 1.0 as the value.
|
||||
local val = 1.0;
|
||||
if ( point?$num || point?$dbl )
|
||||
val = point?$dbl ? point$dbl : point$num;
|
||||
if ( obs?$num || obs?$dbl )
|
||||
val = obs?$dbl ? obs$dbl : obs$num;
|
||||
|
||||
hook add_to_reducer_hook(r, val, point, result_val);
|
||||
data_added(m, key, result);
|
||||
hook add_to_reducer_hook(r, val, obs, result_val);
|
||||
data_added(ss, key, result);
|
||||
}
|
||||
}
|
||||
|
||||
# This function checks if a threshold has been crossed. It is also used as a method to implement
|
||||
# mid-break-interval threshold crossing detection for cluster deployments.
|
||||
function check_thresholds(m: Measurement, key: Key, result: Result, modify_pct: double): bool
|
||||
function check_thresholds(ss: SumStat, key: Key, result: Result, modify_pct: double): bool
|
||||
{
|
||||
if ( ! (m?$threshold || m?$threshold_series) )
|
||||
if ( ! (ss?$threshold || ss?$threshold_series) )
|
||||
return F;
|
||||
|
||||
# Add in the extra ResultVals to make threshold_vals easier to write.
|
||||
if ( |m$reducers| != |result| )
|
||||
if ( |ss$reducers| != |result| )
|
||||
{
|
||||
for ( reducer in m$reducers )
|
||||
for ( reducer in ss$reducers )
|
||||
{
|
||||
if ( reducer$stream !in result )
|
||||
result[reducer$stream] = init_resultval(reducer);
|
||||
}
|
||||
}
|
||||
|
||||
local watch = m$threshold_val(key, result);
|
||||
local watch = ss$threshold_val(key, result);
|
||||
|
||||
if ( modify_pct < 1.0 && modify_pct > 0.0 )
|
||||
watch = double_to_count(floor(watch/modify_pct));
|
||||
|
||||
if ( m$id !in threshold_tracker )
|
||||
threshold_tracker[m$id] = table();
|
||||
local t_tracker = threshold_tracker[m$id];
|
||||
if ( ss$id !in threshold_tracker )
|
||||
threshold_tracker[ss$id] = table();
|
||||
local t_tracker = threshold_tracker[ss$id];
|
||||
|
||||
if ( key !in t_tracker )
|
||||
{
|
||||
local ttmp: Thresholding;
|
||||
t_tracker[key] = ttmp;
|
||||
}
|
||||
local tt = threshold_tracker[m$id][key];
|
||||
local tt = t_tracker[key];
|
||||
|
||||
if ( m?$threshold && ! tt$is_threshold_crossed && watch >= m$threshold )
|
||||
if ( ss?$threshold && ! tt$is_threshold_crossed && watch >= ss$threshold )
|
||||
{
|
||||
# Value crossed the threshold.
|
||||
return T;
|
||||
}
|
||||
|
||||
if ( m?$threshold_series &&
|
||||
|m$threshold_series| >= tt$threshold_series_index &&
|
||||
watch >= m$threshold_series[tt$threshold_series_index] )
|
||||
if ( ss?$threshold_series &&
|
||||
|ss$threshold_series| >= tt$threshold_series_index &&
|
||||
watch >= ss$threshold_series[tt$threshold_series_index] )
|
||||
{
|
||||
# A threshold series was given and the value crossed the next
|
||||
# value in the series.
|
||||
|
@ -371,28 +379,28 @@ function check_thresholds(m: Measurement, key: Key, result: Result, modify_pct:
|
|||
return F;
|
||||
}
|
||||
|
||||
function threshold_crossed(m: Measurement, key: Key, result: Result)
|
||||
function threshold_crossed(ss: SumStat, key: Key, result: Result)
|
||||
{
|
||||
# If there is no callback, there is no point in any of this.
|
||||
if ( ! m?$threshold_crossed )
|
||||
if ( ! ss?$threshold_crossed )
|
||||
return;
|
||||
|
||||
# Add in the extra ResultVals to make threshold_crossed callbacks easier to write.
|
||||
if ( |m$reducers| != |result| )
|
||||
if ( |ss$reducers| != |result| )
|
||||
{
|
||||
for ( reducer in m$reducers )
|
||||
for ( reducer in ss$reducers )
|
||||
{
|
||||
if ( reducer$stream !in result )
|
||||
result[reducer$stream] = init_resultval(reducer);
|
||||
}
|
||||
}
|
||||
|
||||
m$threshold_crossed(key, result);
|
||||
local tt = threshold_tracker[m$id][key];
|
||||
ss$threshold_crossed(key, result);
|
||||
local tt = threshold_tracker[ss$id][key];
|
||||
tt$is_threshold_crossed = T;
|
||||
|
||||
# Bump up to the next threshold series index if a threshold series is being used.
|
||||
if ( m?$threshold_series )
|
||||
if ( ss?$threshold_series )
|
||||
++tt$threshold_series_index;
|
||||
}
|
||||
|
|
@ -1,8 +1,8 @@
|
|||
@load ./main
|
||||
|
||||
module Measurement;
|
||||
module SumStats;
|
||||
|
||||
event Measurement::finish_epoch(m: Measurement)
|
||||
event SumStats::finish_epoch(m: SumStats)
|
||||
{
|
||||
if ( m$id in result_store )
|
||||
{
|
||||
|
@ -13,11 +13,11 @@ event Measurement::finish_epoch(m: Measurement)
|
|||
reset(m);
|
||||
}
|
||||
|
||||
schedule m$epoch { Measurement::finish_epoch(m) };
|
||||
schedule m$epoch { SumStats::finish_epoch(m) };
|
||||
}
|
||||
|
||||
|
||||
function data_added(m: Measurement, key: Key, result: Result)
|
||||
function data_added(m: SumStats, key: Key, result: Result)
|
||||
{
|
||||
if ( check_thresholds(m, key, result, 1.0) )
|
||||
threshold_crossed(m, key, result);
|
|
@ -2,7 +2,7 @@
|
|||
@load ./max
|
||||
@load ./min
|
||||
@load ./sample
|
||||
@load ./variance
|
||||
@load ./std-dev
|
||||
@load ./sum
|
||||
@load ./unique
|
||||
@load ./variance
|
|
@ -1,6 +1,6 @@
|
|||
@load base/frameworks/measurement
|
||||
@load base/frameworks/sumstats
|
||||
|
||||
module Measurement;
|
||||
module SumStats;
|
||||
|
||||
export {
|
||||
redef enum Calculation += {
|
||||
|
@ -14,7 +14,7 @@ export {
|
|||
};
|
||||
}
|
||||
|
||||
hook add_to_reducer_hook(r: Reducer, val: double, data: DataPoint, rv: ResultVal)
|
||||
hook add_to_reducer_hook(r: Reducer, val: double, data: Observation, rv: ResultVal)
|
||||
{
|
||||
if ( AVERAGE in r$apply )
|
||||
{
|
|
@ -1,6 +1,6 @@
|
|||
@load base/frameworks/measurement
|
||||
@load base/frameworks/sumstats
|
||||
|
||||
module Measurement;
|
||||
module SumStats;
|
||||
|
||||
export {
|
||||
redef enum Calculation += {
|
||||
|
@ -14,7 +14,7 @@ export {
|
|||
};
|
||||
}
|
||||
|
||||
hook add_to_reducer_hook(r: Reducer, val: double, data: DataPoint, rv: ResultVal)
|
||||
hook add_to_reducer_hook(r: Reducer, val: double, data: Observation, rv: ResultVal)
|
||||
{
|
||||
if ( MAX in r$apply )
|
||||
{
|
|
@ -1,6 +1,6 @@
|
|||
@load base/frameworks/measurement
|
||||
@load base/frameworks/sumstats
|
||||
|
||||
module Measurement;
|
||||
module SumStats;
|
||||
|
||||
export {
|
||||
redef enum Calculation += {
|
||||
|
@ -14,7 +14,7 @@ export {
|
|||
};
|
||||
}
|
||||
|
||||
hook add_to_reducer_hook(r: Reducer, val: double, data: DataPoint, rv: ResultVal)
|
||||
hook add_to_reducer_hook(r: Reducer, val: double, data: Observation, rv: ResultVal)
|
||||
{
|
||||
if ( MIN in r$apply )
|
||||
{
|
|
@ -1,35 +1,35 @@
|
|||
@load base/frameworks/measurement
|
||||
@load base/frameworks/sumstats
|
||||
@load base/utils/queue
|
||||
|
||||
module Measurement;
|
||||
module SumStats;
|
||||
|
||||
export {
|
||||
redef record Reducer += {
|
||||
## A number of sample DataPoints to collect.
|
||||
## A number of sample Observations to collect.
|
||||
samples: count &default=0;
|
||||
};
|
||||
|
||||
redef record ResultVal += {
|
||||
## This is the queue where samples
|
||||
## are maintained. Use the
|
||||
## :bro:see:`Measurement::get_samples` function
|
||||
## :bro:see:`SumStats::get_samples` function
|
||||
## to get a vector of the samples.
|
||||
samples: Queue::Queue &optional;
|
||||
};
|
||||
|
||||
## Get a vector of sample DataPoint values from a ResultVal.
|
||||
global get_samples: function(rv: ResultVal): vector of DataPoint;
|
||||
## Get a vector of sample Observation values from a ResultVal.
|
||||
global get_samples: function(rv: ResultVal): vector of Observation;
|
||||
}
|
||||
|
||||
function get_samples(rv: ResultVal): vector of DataPoint
|
||||
function get_samples(rv: ResultVal): vector of Observation
|
||||
{
|
||||
local s: vector of DataPoint = vector();
|
||||
local s: vector of Observation = vector();
|
||||
if ( rv?$samples )
|
||||
Queue::get_vector(rv$samples, s);
|
||||
return s;
|
||||
}
|
||||
|
||||
hook add_to_reducer_hook(r: Reducer, val: double, data: DataPoint, rv: ResultVal)
|
||||
hook add_to_reducer_hook(r: Reducer, val: double, data: Observation, rv: ResultVal)
|
||||
{
|
||||
if ( r$samples > 0 )
|
||||
{
|
|
@ -1,7 +1,7 @@
|
|||
@load ./variance
|
||||
@load base/frameworks/measurement
|
||||
@load base/frameworks/sumstats
|
||||
|
||||
module Measurement;
|
||||
module SumStats;
|
||||
|
||||
export {
|
||||
redef enum Calculation += {
|
||||
|
@ -22,7 +22,7 @@ function calc_std_dev(rv: ResultVal)
|
|||
}
|
||||
|
||||
# This depends on the variance plugin which uses priority -5
|
||||
hook add_to_reducer_hook(r: Reducer, val: double, data: DataPoint, rv: ResultVal) &priority=-10
|
||||
hook add_to_reducer_hook(r: Reducer, val: double, data: Observation, rv: ResultVal) &priority=-10
|
||||
{
|
||||
if ( STD_DEV in r$apply )
|
||||
{
|
|
@ -1,6 +1,6 @@
|
|||
@load base/frameworks/measurement
|
||||
@load base/frameworks/sumstats
|
||||
|
||||
module Measurement;
|
||||
module SumStats;
|
||||
|
||||
export {
|
||||
redef enum Calculation += {
|
||||
|
@ -14,13 +14,13 @@ export {
|
|||
sum: double &default=0.0;
|
||||
};
|
||||
|
||||
type threshold_function: function(key: Measurement::Key, result: Measurement::Result): count;
|
||||
type threshold_function: function(key: SumStats::Key, result: SumStats::Result): count;
|
||||
global sum_threshold: function(data_id: string): threshold_function;
|
||||
}
|
||||
|
||||
function sum_threshold(data_id: string): threshold_function
|
||||
{
|
||||
return function(key: Measurement::Key, result: Measurement::Result): count
|
||||
return function(key: SumStats::Key, result: SumStats::Result): count
|
||||
{
|
||||
print fmt("data_id: %s", data_id);
|
||||
print result;
|
||||
|
@ -34,7 +34,7 @@ hook init_resultval_hook(r: Reducer, rv: ResultVal)
|
|||
rv$sum = 0;
|
||||
}
|
||||
|
||||
hook add_to_reducer_hook(r: Reducer, val: double, data: DataPoint, rv: ResultVal)
|
||||
hook add_to_reducer_hook(r: Reducer, val: double, data: Observation, rv: ResultVal)
|
||||
{
|
||||
if ( SUM in r$apply )
|
||||
rv$sum += val;
|
|
@ -1,6 +1,6 @@
|
|||
@load base/frameworks/measurement
|
||||
@load base/frameworks/sumstats
|
||||
|
||||
module Measurement;
|
||||
module SumStats;
|
||||
|
||||
export {
|
||||
redef enum Calculation += {
|
||||
|
@ -20,10 +20,10 @@ redef record ResultVal += {
|
|||
# because we don't want to trust that we can inspect the values
|
||||
# since we will like move to a probalistic data structure in the future.
|
||||
# TODO: in the future this will optionally be a hyperloglog structure
|
||||
unique_vals: set[DataPoint] &optional;
|
||||
unique_vals: set[Observation] &optional;
|
||||
};
|
||||
|
||||
hook add_to_reducer_hook(r: Reducer, val: double, data: DataPoint, rv: ResultVal)
|
||||
hook add_to_reducer_hook(r: Reducer, val: double, data: Observation, rv: ResultVal)
|
||||
{
|
||||
if ( UNIQUE in r$apply )
|
||||
{
|
|
@ -1,7 +1,7 @@
|
|||
@load ./average
|
||||
@load base/frameworks/measurement
|
||||
@load base/frameworks/sumstats
|
||||
|
||||
module Measurement;
|
||||
module SumStats;
|
||||
|
||||
export {
|
||||
redef enum Calculation += {
|
||||
|
@ -29,7 +29,7 @@ function calc_variance(rv: ResultVal)
|
|||
}
|
||||
|
||||
# Reduced priority since this depends on the average
|
||||
hook add_to_reducer_hook(r: Reducer, val: double, data: DataPoint, rv: ResultVal) &priority=-5
|
||||
hook add_to_reducer_hook(r: Reducer, val: double, data: Observation, rv: ResultVal) &priority=-5
|
||||
{
|
||||
if ( VARIANCE in r$apply )
|
||||
{
|
Loading…
Add table
Add a link
Reference in a new issue