diff --git a/scripts/base/frameworks/sumstats/plugins/topk.zeek b/scripts/base/frameworks/sumstats/plugins/topk.zeek index e7107cb4fb..683d141467 100644 --- a/scripts/base/frameworks/sumstats/plugins/topk.zeek +++ b/scripts/base/frameworks/sumstats/plugins/topk.zeek @@ -1,4 +1,9 @@ ##! Keep the top-k (i.e., most frequently occurring) observations. +##! +##! This plugin uses a probabilistic algorithm to count the top-k elements. +##! The algorithm (calles Space-Saving) is described in the paper Efficient +##! Computation of Frequent and Top-k Elements in Data Streams", by +##! Metwally et al. (2005). @load base/frameworks/sumstats diff --git a/src/probabilistic/Topk.h b/src/probabilistic/Topk.h index 10238bef98..2229d82206 100644 --- a/src/probabilistic/Topk.h +++ b/src/probabilistic/Topk.h @@ -7,8 +7,11 @@ #include "zeek/OpaqueVal.h" #include "zeek/Val.h" -// This class implements the top-k algorithm. Or - to be more precise - an -// interpretation of it. +// This class implements the Space-Saving algorithm for counting the Topk- elements +// in a datastream as presented in the paper "Efficient Computation of Frequent and +// Top-k Elements in Data Streams", by Metwally et al. (2005). +// +// Or - to be more precise - it implements an interpretation of it. namespace zeek::detail {