Fix typos and formatting in the sumstats docs

This commit is contained in:
Daniel Thayer 2013-10-10 23:15:12 -05:00
parent 3812716ace
commit 0753853726
6 changed files with 36 additions and 34 deletions

View file

@ -1,6 +1,6 @@
##! This implements transparent cluster support for the SumStats framework. ##! This implements transparent cluster support for the SumStats framework.
##! Do not load this file directly. It's only meant to be loaded automatically ##! Do not load this file directly. It's only meant to be loaded automatically
##! and will be depending on if the cluster framework has been enabled. ##! and will be if the cluster framework has been enabled.
##! The goal of this script is to make sumstats calculation completely and ##! The goal of this script is to make sumstats calculation completely and
##! transparently automated when running on a cluster. ##! transparently automated when running on a cluster.
@ -10,31 +10,32 @@
module SumStats; module SumStats;
export { export {
## The percent of the full threshold value that needs to be met on a single worker ## The percent of the full threshold value that needs to be met on a
## for that worker to send the value to its manager in order for it to request a ## single worker for that worker to send the value to its manager in
## global view for that value. There is no requirement that the manager requests ## order for it to request a global view for that value. There is no
## a global view for the key since it may opt not to if it requested a global view ## requirement that the manager requests a global view for the key since
## for the key recently. ## it may opt not to if it requested a global view for the key recently.
const cluster_request_global_view_percent = 0.2 &redef; const cluster_request_global_view_percent = 0.2 &redef;
## This is to deal with intermediate update overload. A manager will only allow ## This is to deal with intermediate update overload. A manager will
## this many intermediate update requests to the workers to be inflight at any ## only allow this many intermediate update requests to the workers to
## given time. Requested intermediate updates are currently thrown out and not ## be inflight at any given time. Requested intermediate updates are
## performed. In practice this should hopefully have a minimal effect. ## currently thrown out and not performed. In practice this should
## hopefully have a minimal effect.
const max_outstanding_global_views = 10 &redef; const max_outstanding_global_views = 10 &redef;
## Event sent by the manager in a cluster to initiate the collection of values for ## Event sent by the manager in a cluster to initiate the collection of
## a sumstat. ## values for a sumstat.
global cluster_ss_request: event(uid: string, ss_name: string, cleanup: bool); global cluster_ss_request: event(uid: string, ss_name: string, cleanup: bool);
## Event sent by nodes that are collecting sumstats after receiving a request for ## Event sent by nodes that are collecting sumstats after receiving a
## the sumstat from the manager. ## request for the sumstat from the manager.
#global cluster_ss_response: event(uid: string, ss_name: string, data: ResultTable, done: bool, cleanup: bool); #global cluster_ss_response: event(uid: string, ss_name: string, data: ResultTable, done: bool, cleanup: bool);
## This event is sent by the manager in a cluster to initiate the collection of ## This event is sent by the manager in a cluster to initiate the
## a single key value from a sumstat. It's typically used to get intermediate ## collection of a single key value from a sumstat. It's typically used
## updates before the break interval triggers to speed detection of a value ## to get intermediate updates before the break interval triggers to
## crossing a threshold. ## speed detection of a value crossing a threshold.
global cluster_get_result: event(uid: string, ss_name: string, key: Key, cleanup: bool); global cluster_get_result: event(uid: string, ss_name: string, key: Key, cleanup: bool);
## This event is sent by nodes in response to a ## This event is sent by nodes in response to a
@ -43,7 +44,7 @@ export {
## This is sent by workers to indicate that they crossed the percent ## This is sent by workers to indicate that they crossed the percent
## of the current threshold by the percentage defined globally in ## of the current threshold by the percentage defined globally in
## :bro:id:`SumStats::cluster_request_global_view_percent` ## :bro:id:`SumStats::cluster_request_global_view_percent`.
global cluster_key_intermediate_response: event(ss_name: string, key: SumStats::Key); global cluster_key_intermediate_response: event(ss_name: string, key: SumStats::Key);
## This event is scheduled internally on workers to send result chunks. ## This event is scheduled internally on workers to send result chunks.

View file

@ -51,8 +51,8 @@ export {
## would like to accept the data being inserted. ## would like to accept the data being inserted.
pred: function(key: SumStats::Key, obs: SumStats::Observation): bool &optional; pred: function(key: SumStats::Key, obs: SumStats::Observation): bool &optional;
## A function to normalize the key. This can be used to aggregate or ## A function to normalize the key. This can be used to
## normalize the entire key. ## aggregate or normalize the entire key.
normalize_key: function(key: SumStats::Key): Key &optional; normalize_key: function(key: SumStats::Key): Key &optional;
}; };
@ -91,28 +91,28 @@ export {
name: string; name: string;
## The interval at which this filter should be "broken" ## The interval at which this filter should be "broken"
## and the '$epoch_result' callback called. The ## and the *epoch_result* callback called. The
## results are also reset at this time so any threshold ## results are also reset at this time so any threshold
## based detection needs to be set to a ## based detection needs to be set to a
## value that should be expected to happen within ## value that should be expected to happen within
## this epoch. ## this epoch.
epoch: interval; epoch: interval;
## The reducers for the SumStat ## The reducers for the SumStat.
reducers: set[Reducer]; reducers: set[Reducer];
## Provide a function to calculate a value from the ## Provide a function to calculate a value from the
## :bro:see:`SumStats::Result` structure which will be used ## :bro:see:`SumStats::Result` structure which will be used
## for thresholding. ## for thresholding.
## This is required if a $threshold value is given. ## This is required if a *threshold* value is given.
threshold_val: function(key: SumStats::Key, result: SumStats::Result): double &optional; threshold_val: function(key: SumStats::Key, result: SumStats::Result): double &optional;
## The threshold value for calling the ## The threshold value for calling the
## $threshold_crossed callback. ## *threshold_crossed* callback.
threshold: double &optional; threshold: double &optional;
## A series of thresholds for calling the ## A series of thresholds for calling the
## $threshold_crossed callback. ## *threshold_crossed* callback.
threshold_series: vector of double &optional; threshold_series: vector of double &optional;
## A callback that is called when a threshold is crossed. ## A callback that is called when a threshold is crossed.
@ -124,7 +124,7 @@ export {
epoch_result: function(ts: time, key: SumStats::Key, result: SumStats::Result) &optional; epoch_result: function(ts: time, key: SumStats::Key, result: SumStats::Result) &optional;
## A callback that will be called when a single collection ## A callback that will be called when a single collection
## interval is completed. The ts value will be the time of ## interval is completed. The *ts* value will be the time of
## when the collection started. ## when the collection started.
epoch_finished: function(ts:time) &optional; epoch_finished: function(ts:time) &optional;
}; };

View file

@ -5,12 +5,12 @@ module SumStats;
export { export {
redef enum Calculation += { redef enum Calculation += {
## Keep last X observations in a queue ## Keep last X observations in a queue.
LAST LAST
}; };
redef record Reducer += { redef record Reducer += {
## number of elements to keep. ## Number of elements to keep.
num_last_elements: count &default=0; num_last_elements: count &default=0;
}; };

View file

@ -4,7 +4,8 @@ module SumStats;
export { export {
redef enum Calculation += { redef enum Calculation += {
## Get uniquely distributed random samples from the observation stream. ## Get uniquely distributed random samples from the observation
## stream.
SAMPLE SAMPLE
}; };
@ -24,8 +25,8 @@ export {
redef record ResultVal += { redef record ResultVal += {
# Internal use only. This is not meant to be publically available # Internal use only. This is not meant to be publically available
# and just a copy of num_samples from the Reducer. Needed for availability # and just a copy of num_samples from the Reducer. Needed for
# in the compose hook. # availability in the compose hook.
num_samples: count &default=0; num_samples: count &default=0;
}; };

View file

@ -4,7 +4,7 @@ module SumStats;
export { export {
redef record Reducer += { redef record Reducer += {
## number of elements to keep in the top-k list ## Number of elements to keep in the top-k list.
topk_size: count &default=500; topk_size: count &default=500;
}; };

View file

@ -28,7 +28,7 @@ redef record ResultVal += {
# Internal use only. This is not meant to be publically available # Internal use only. This is not meant to be publically available
# because we don't want to trust that we can inspect the values # because we don't want to trust that we can inspect the values
# since we will like move to a probalistic data structure in the future. # since we will likely move to a probabilistic data structure in the future.
# TODO: in the future this will optionally be a hyperloglog structure # TODO: in the future this will optionally be a hyperloglog structure
unique_vals: set[Observation] &optional; unique_vals: set[Observation] &optional;
}; };