mirror of
https://github.com/zeek/zeek.git
synced 2025-10-10 18:48:20 +00:00
topk for sumstats
This commit is contained in:
parent
5da97455f5
commit
de5769a88f
10 changed files with 198 additions and 4 deletions
|
@ -4,5 +4,6 @@
|
|||
@load ./sample
|
||||
@load ./std-dev
|
||||
@load ./sum
|
||||
@load ./topk
|
||||
@load ./unique
|
||||
@load ./variance
|
24
scripts/base/frameworks/sumstats/plugins/topk.bro
Normal file
24
scripts/base/frameworks/sumstats/plugins/topk.bro
Normal file
|
@ -0,0 +1,24 @@
|
|||
@load base/frameworks/sumstats
|
||||
|
||||
module SumStats;
|
||||
|
||||
export {
|
||||
redef enum Calculation += {
|
||||
TOPK
|
||||
};
|
||||
|
||||
redef record ResultVal += {
|
||||
topk: opaque of topk &default=topk_init(500);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
hook observe_hook(r: Reducer, val: double, obs: Observation, rv: ResultVal)
|
||||
{
|
||||
if ( TOPK in r$apply )
|
||||
{
|
||||
topk_add(rv$topk, obs);
|
||||
}
|
||||
}
|
||||
|
||||
|
31
src/Topk.cc
31
src/Topk.cc
|
@ -19,7 +19,7 @@ Element::~Element()
|
|||
value=0;
|
||||
}
|
||||
|
||||
HashKey* TopkVal::GetHash(Val* v)
|
||||
HashKey* TopkVal::GetHash(Val* v) const
|
||||
{
|
||||
TypeList* tl = new TypeList(v->Type());
|
||||
tl->Append(v->Type());
|
||||
|
@ -58,6 +58,7 @@ TopkVal::~TopkVal()
|
|||
type = 0;
|
||||
}
|
||||
|
||||
|
||||
VectorVal* TopkVal::getTopK(int k) // returns vector
|
||||
{
|
||||
if ( numElements == 0 )
|
||||
|
@ -100,6 +101,34 @@ VectorVal* TopkVal::getTopK(int k) // returns vector
|
|||
return t;
|
||||
}
|
||||
|
||||
uint64_t TopkVal::getCount(Val* value) const
|
||||
{
|
||||
HashKey* key = GetHash(value);
|
||||
Element* e = (Element*) elementDict->Lookup(key);
|
||||
|
||||
if ( e == 0 )
|
||||
{
|
||||
reporter->Error("getCount for element that is not in top-k");
|
||||
return 0;
|
||||
}
|
||||
|
||||
return e->parent->count;
|
||||
}
|
||||
|
||||
uint64_t TopkVal::getEpsilon(Val* value) const
|
||||
{
|
||||
HashKey* key = GetHash(value);
|
||||
Element* e = (Element*) elementDict->Lookup(key);
|
||||
|
||||
if ( e == 0 )
|
||||
{
|
||||
reporter->Error("getEpsilon for element that is not in top-k");
|
||||
return 0;
|
||||
}
|
||||
|
||||
return e->epsilon;
|
||||
}
|
||||
|
||||
void TopkVal::Encountered(Val* encountered)
|
||||
{
|
||||
// ok, let's see if we already know this one.
|
||||
|
|
|
@ -38,10 +38,12 @@ public:
|
|||
~TopkVal();
|
||||
void Encountered(Val* value); // we saw something
|
||||
VectorVal* getTopK(int k); // returns vector
|
||||
uint64_t getCount(Val* value) const;
|
||||
uint64_t getEpsilon(Val* value) const;
|
||||
|
||||
private:
|
||||
void IncrementCounter(Element* e);
|
||||
HashKey* GetHash(Val*); // this probably should go somewhere else.
|
||||
HashKey* GetHash(Val*) const; // this probably should go somewhere else.
|
||||
|
||||
BroType* type;
|
||||
std::list<Bucket*> buckets;
|
||||
|
|
15
src/bro.bif
15
src/bro.bif
|
@ -5669,3 +5669,18 @@ function topk_get_top%(handle: opaque of topk, k: count%): any
|
|||
Topk::TopkVal* h = (Topk::TopkVal*) handle;
|
||||
return h->getTopK(k);
|
||||
%}
|
||||
|
||||
function topk_count%(handle: opaque of topk, value: any%): count
|
||||
%{
|
||||
assert(handle);
|
||||
Topk::TopkVal* h = (Topk::TopkVal*) handle;
|
||||
return new Val(h->getCount(value), TYPE_COUNT);
|
||||
%}
|
||||
|
||||
function topk_epsilon%(handle: opaque of topk, value: any%): count
|
||||
%{
|
||||
assert(handle);
|
||||
Topk::TopkVal* h = (Topk::TopkVal*) handle;
|
||||
return new Val(h->getEpsilon(value), TYPE_COUNT);
|
||||
%}
|
||||
|
||||
|
|
6
testing/btest/Baseline/bifs.topk/.stderr
Normal file
6
testing/btest/Baseline/bifs.topk/.stderr
Normal file
|
@ -0,0 +1,6 @@
|
|||
error: getCount for element that is not in top-k
|
||||
error: getEpsilon for element that is not in top-k
|
||||
error: getCount for element that is not in top-k
|
||||
error: getEpsilon for element that is not in top-k
|
||||
error: getCount for element that is not in top-k
|
||||
error: getEpsilon for element that is not in top-k
|
|
@ -1,7 +1,37 @@
|
|||
[b, c]
|
||||
0
|
||||
0
|
||||
2
|
||||
0
|
||||
2
|
||||
1
|
||||
[d, c]
|
||||
0
|
||||
0
|
||||
2
|
||||
1
|
||||
3
|
||||
2
|
||||
[d, e]
|
||||
3
|
||||
2
|
||||
3
|
||||
2
|
||||
[f, e]
|
||||
4
|
||||
3
|
||||
3
|
||||
2
|
||||
[f, e]
|
||||
4
|
||||
3
|
||||
4
|
||||
2
|
||||
[g, e]
|
||||
0
|
||||
0
|
||||
4
|
||||
2
|
||||
5
|
||||
4
|
||||
[c, e, d]
|
||||
|
|
|
@ -0,0 +1,8 @@
|
|||
Top entries for key counter
|
||||
Num: 1, count: 99, epsilon: 0
|
||||
Num: 2, count: 98, epsilon: 0
|
||||
Num: 3, count: 97, epsilon: 0
|
||||
Num: 4, count: 96, epsilon: 0
|
||||
Num: 5, count: 95, epsilon: 0
|
||||
Top entries for key two
|
||||
Num: 1, count: 2, epsilon: 0
|
|
@ -1,5 +1,6 @@
|
|||
# @TEST-EXEC: bro -b %INPUT > out
|
||||
# @TEST-EXEC: btest-diff out
|
||||
# @TEST-EXEC: btest-diff .stderr
|
||||
|
||||
event bro_init()
|
||||
{
|
||||
|
@ -13,26 +14,56 @@ event bro_init()
|
|||
|
||||
local s = topk_get_top(k1, 5);
|
||||
print s;
|
||||
print topk_count(k1, "a");
|
||||
print topk_epsilon(k1, "a");
|
||||
print topk_count(k1, "b");
|
||||
print topk_epsilon(k1, "b");
|
||||
print topk_count(k1, "c");
|
||||
print topk_epsilon(k1, "c");
|
||||
|
||||
topk_add(k1, "d");
|
||||
s = topk_get_top(k1, 5);
|
||||
print s;
|
||||
print topk_count(k1, "b");
|
||||
print topk_epsilon(k1, "b");
|
||||
print topk_count(k1, "c");
|
||||
print topk_epsilon(k1, "c");
|
||||
print topk_count(k1, "d");
|
||||
print topk_epsilon(k1, "d");
|
||||
|
||||
topk_add(k1, "e");
|
||||
s = topk_get_top(k1, 5);
|
||||
print s;
|
||||
print topk_count(k1, "d");
|
||||
print topk_epsilon(k1, "d");
|
||||
print topk_count(k1, "e");
|
||||
print topk_epsilon(k1, "e");
|
||||
|
||||
topk_add(k1, "f");
|
||||
s = topk_get_top(k1, 5);
|
||||
print s;
|
||||
print topk_count(k1, "f");
|
||||
print topk_epsilon(k1, "f");
|
||||
print topk_count(k1, "e");
|
||||
print topk_epsilon(k1, "e");
|
||||
|
||||
topk_add(k1, "e");
|
||||
s = topk_get_top(k1, 5);
|
||||
print s;
|
||||
print topk_count(k1, "f");
|
||||
print topk_epsilon(k1, "f");
|
||||
print topk_count(k1, "e");
|
||||
print topk_epsilon(k1, "e");
|
||||
|
||||
topk_add(k1, "g");
|
||||
s = topk_get_top(k1, 5);
|
||||
print s;
|
||||
print topk_count(k1, "f");
|
||||
print topk_epsilon(k1, "f");
|
||||
print topk_count(k1, "e");
|
||||
print topk_epsilon(k1, "e");
|
||||
print topk_count(k1, "g");
|
||||
print topk_epsilon(k1, "g");
|
||||
|
||||
k1 = topk_init(100);
|
||||
topk_add(k1, "a");
|
||||
|
|
48
testing/btest/scripts/base/frameworks/sumstats/topk.bro
Normal file
48
testing/btest/scripts/base/frameworks/sumstats/topk.bro
Normal file
|
@ -0,0 +1,48 @@
|
|||
# @TEST-EXEC: bro %INPUT
|
||||
# @TEST-EXEC: btest-diff .stdout
|
||||
|
||||
event bro_init() &priority=5
|
||||
{
|
||||
local r1: SumStats::Reducer = [$stream="test.metric",
|
||||
$apply=set(SumStats::TOPK)];
|
||||
SumStats::create([$epoch=3secs,
|
||||
$reducers=set(r1),
|
||||
$epoch_finished(data: SumStats::ResultTable) =
|
||||
{
|
||||
for ( key in data )
|
||||
{
|
||||
local r = data[key]["test.metric"];
|
||||
|
||||
local s: vector of SumStats::Observation;
|
||||
s = topk_get_top(r$topk, 5);
|
||||
|
||||
print fmt("Top entries for key %s", key$str);
|
||||
for ( element in s )
|
||||
{
|
||||
print fmt("Num: %d, count: %d, epsilon: %d", s[element]$num, topk_count(r$topk, s[element]), topk_epsilon(r$topk, s[element]));
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
]);
|
||||
|
||||
|
||||
const loop_v: vector of count = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100};
|
||||
|
||||
local a: count;
|
||||
a = 0;
|
||||
|
||||
for ( i in loop_v )
|
||||
{
|
||||
a = a + 1;
|
||||
for ( j in loop_v )
|
||||
{
|
||||
if ( i < j )
|
||||
SumStats::observe("test.metric", [$str="counter"], [$num=a]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
SumStats::observe("test.metric", [$str="two"], [$num=1]);
|
||||
SumStats::observe("test.metric", [$str="two"], [$num=1]);
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue