diff --git a/scripts/base/frameworks/sumstats/plugins/topk.zeek b/scripts/base/frameworks/sumstats/plugins/topk.zeek index e7107cb4fb..a298ed8fec 100644 --- a/scripts/base/frameworks/sumstats/plugins/topk.zeek +++ b/scripts/base/frameworks/sumstats/plugins/topk.zeek @@ -1,4 +1,9 @@ ##! Keep the top-k (i.e., most frequently occurring) observations. +##! +##! This plugin uses a probabilistic algorithm to count the top-k elements. +##! The algorithm (called Space-Saving) is described in the paper Efficient +##! Computation of Frequent and Top-k Elements in Data Streams", by +##! Metwally et al. (2005). @load base/frameworks/sumstats diff --git a/src/probabilistic/Topk.h b/src/probabilistic/Topk.h index 10238bef98..661f9277a2 100644 --- a/src/probabilistic/Topk.h +++ b/src/probabilistic/Topk.h @@ -7,8 +7,11 @@ #include "zeek/OpaqueVal.h" #include "zeek/Val.h" -// This class implements the top-k algorithm. Or - to be more precise - an -// interpretation of it. +// This class implements the Space-Saving algorithm for counting the Top-k elements +// in a datastream as presented in the paper "Efficient Computation of Frequent and +// Top-k Elements in Data Streams", by Metwally et al. (2005). +// +// Or - to be more precise - it implements an interpretation of it. namespace zeek::detail {