From 25c33d2a29a2cff9c7d47276af14b853f28335c3 Mon Sep 17 00:00:00 2001 From: Johanna Amann Date: Wed, 11 May 2022 13:21:26 +0100 Subject: [PATCH] Add exact name of the Top-k algorithm. I needed to figure out which exact algorithm we use for our probabilistic top-k measurements. It turns out that we do not mention this in our source tree at all so far. --- scripts/base/frameworks/sumstats/plugins/topk.zeek | 5 +++++ src/probabilistic/Topk.h | 7 +++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/scripts/base/frameworks/sumstats/plugins/topk.zeek b/scripts/base/frameworks/sumstats/plugins/topk.zeek index e7107cb4fb..683d141467 100644 --- a/scripts/base/frameworks/sumstats/plugins/topk.zeek +++ b/scripts/base/frameworks/sumstats/plugins/topk.zeek @@ -1,4 +1,9 @@ ##! Keep the top-k (i.e., most frequently occurring) observations. +##! +##! This plugin uses a probabilistic algorithm to count the top-k elements. +##! The algorithm (calles Space-Saving) is described in the paper Efficient +##! Computation of Frequent and Top-k Elements in Data Streams", by +##! Metwally et al. (2005). @load base/frameworks/sumstats diff --git a/src/probabilistic/Topk.h b/src/probabilistic/Topk.h index 10238bef98..2229d82206 100644 --- a/src/probabilistic/Topk.h +++ b/src/probabilistic/Topk.h @@ -7,8 +7,11 @@ #include "zeek/OpaqueVal.h" #include "zeek/Val.h" -// This class implements the top-k algorithm. Or - to be more precise - an -// interpretation of it. +// This class implements the Space-Saving algorithm for counting the Topk- elements +// in a datastream as presented in the paper "Efficient Computation of Frequent and +// Top-k Elements in Data Streams", by Metwally et al. (2005). +// +// Or - to be more precise - it implements an interpretation of it. namespace zeek::detail {