mirror of
https://github.com/zeek/zeek.git
synced 2025-10-11 11:08:20 +00:00
topk for sumstats
This commit is contained in:
parent
5da97455f5
commit
de5769a88f
10 changed files with 198 additions and 4 deletions
|
@ -4,5 +4,6 @@
|
||||||
@load ./sample
|
@load ./sample
|
||||||
@load ./std-dev
|
@load ./std-dev
|
||||||
@load ./sum
|
@load ./sum
|
||||||
|
@load ./topk
|
||||||
@load ./unique
|
@load ./unique
|
||||||
@load ./variance
|
@load ./variance
|
24
scripts/base/frameworks/sumstats/plugins/topk.bro
Normal file
24
scripts/base/frameworks/sumstats/plugins/topk.bro
Normal file
|
@ -0,0 +1,24 @@
|
||||||
|
@load base/frameworks/sumstats
|
||||||
|
|
||||||
|
module SumStats;
|
||||||
|
|
||||||
|
export {
|
||||||
|
redef enum Calculation += {
|
||||||
|
TOPK
|
||||||
|
};
|
||||||
|
|
||||||
|
redef record ResultVal += {
|
||||||
|
topk: opaque of topk &default=topk_init(500);
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
hook observe_hook(r: Reducer, val: double, obs: Observation, rv: ResultVal)
|
||||||
|
{
|
||||||
|
if ( TOPK in r$apply )
|
||||||
|
{
|
||||||
|
topk_add(rv$topk, obs);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
31
src/Topk.cc
31
src/Topk.cc
|
@ -19,7 +19,7 @@ Element::~Element()
|
||||||
value=0;
|
value=0;
|
||||||
}
|
}
|
||||||
|
|
||||||
HashKey* TopkVal::GetHash(Val* v)
|
HashKey* TopkVal::GetHash(Val* v) const
|
||||||
{
|
{
|
||||||
TypeList* tl = new TypeList(v->Type());
|
TypeList* tl = new TypeList(v->Type());
|
||||||
tl->Append(v->Type());
|
tl->Append(v->Type());
|
||||||
|
@ -58,6 +58,7 @@ TopkVal::~TopkVal()
|
||||||
type = 0;
|
type = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
VectorVal* TopkVal::getTopK(int k) // returns vector
|
VectorVal* TopkVal::getTopK(int k) // returns vector
|
||||||
{
|
{
|
||||||
if ( numElements == 0 )
|
if ( numElements == 0 )
|
||||||
|
@ -100,6 +101,34 @@ VectorVal* TopkVal::getTopK(int k) // returns vector
|
||||||
return t;
|
return t;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uint64_t TopkVal::getCount(Val* value) const
|
||||||
|
{
|
||||||
|
HashKey* key = GetHash(value);
|
||||||
|
Element* e = (Element*) elementDict->Lookup(key);
|
||||||
|
|
||||||
|
if ( e == 0 )
|
||||||
|
{
|
||||||
|
reporter->Error("getCount for element that is not in top-k");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
return e->parent->count;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t TopkVal::getEpsilon(Val* value) const
|
||||||
|
{
|
||||||
|
HashKey* key = GetHash(value);
|
||||||
|
Element* e = (Element*) elementDict->Lookup(key);
|
||||||
|
|
||||||
|
if ( e == 0 )
|
||||||
|
{
|
||||||
|
reporter->Error("getEpsilon for element that is not in top-k");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
return e->epsilon;
|
||||||
|
}
|
||||||
|
|
||||||
void TopkVal::Encountered(Val* encountered)
|
void TopkVal::Encountered(Val* encountered)
|
||||||
{
|
{
|
||||||
// ok, let's see if we already know this one.
|
// ok, let's see if we already know this one.
|
||||||
|
|
|
@ -38,10 +38,12 @@ public:
|
||||||
~TopkVal();
|
~TopkVal();
|
||||||
void Encountered(Val* value); // we saw something
|
void Encountered(Val* value); // we saw something
|
||||||
VectorVal* getTopK(int k); // returns vector
|
VectorVal* getTopK(int k); // returns vector
|
||||||
|
uint64_t getCount(Val* value) const;
|
||||||
|
uint64_t getEpsilon(Val* value) const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void IncrementCounter(Element* e);
|
void IncrementCounter(Element* e);
|
||||||
HashKey* GetHash(Val*); // this probably should go somewhere else.
|
HashKey* GetHash(Val*) const; // this probably should go somewhere else.
|
||||||
|
|
||||||
BroType* type;
|
BroType* type;
|
||||||
std::list<Bucket*> buckets;
|
std::list<Bucket*> buckets;
|
||||||
|
|
15
src/bro.bif
15
src/bro.bif
|
@ -5669,3 +5669,18 @@ function topk_get_top%(handle: opaque of topk, k: count%): any
|
||||||
Topk::TopkVal* h = (Topk::TopkVal*) handle;
|
Topk::TopkVal* h = (Topk::TopkVal*) handle;
|
||||||
return h->getTopK(k);
|
return h->getTopK(k);
|
||||||
%}
|
%}
|
||||||
|
|
||||||
|
function topk_count%(handle: opaque of topk, value: any%): count
|
||||||
|
%{
|
||||||
|
assert(handle);
|
||||||
|
Topk::TopkVal* h = (Topk::TopkVal*) handle;
|
||||||
|
return new Val(h->getCount(value), TYPE_COUNT);
|
||||||
|
%}
|
||||||
|
|
||||||
|
function topk_epsilon%(handle: opaque of topk, value: any%): count
|
||||||
|
%{
|
||||||
|
assert(handle);
|
||||||
|
Topk::TopkVal* h = (Topk::TopkVal*) handle;
|
||||||
|
return new Val(h->getEpsilon(value), TYPE_COUNT);
|
||||||
|
%}
|
||||||
|
|
||||||
|
|
6
testing/btest/Baseline/bifs.topk/.stderr
Normal file
6
testing/btest/Baseline/bifs.topk/.stderr
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
error: getCount for element that is not in top-k
|
||||||
|
error: getEpsilon for element that is not in top-k
|
||||||
|
error: getCount for element that is not in top-k
|
||||||
|
error: getEpsilon for element that is not in top-k
|
||||||
|
error: getCount for element that is not in top-k
|
||||||
|
error: getEpsilon for element that is not in top-k
|
|
@ -1,7 +1,37 @@
|
||||||
[b, c]
|
[b, c]
|
||||||
|
0
|
||||||
|
0
|
||||||
|
2
|
||||||
|
0
|
||||||
|
2
|
||||||
|
1
|
||||||
[d, c]
|
[d, c]
|
||||||
|
0
|
||||||
|
0
|
||||||
|
2
|
||||||
|
1
|
||||||
|
3
|
||||||
|
2
|
||||||
[d, e]
|
[d, e]
|
||||||
|
3
|
||||||
|
2
|
||||||
|
3
|
||||||
|
2
|
||||||
[f, e]
|
[f, e]
|
||||||
|
4
|
||||||
|
3
|
||||||
|
3
|
||||||
|
2
|
||||||
[f, e]
|
[f, e]
|
||||||
|
4
|
||||||
|
3
|
||||||
|
4
|
||||||
|
2
|
||||||
[g, e]
|
[g, e]
|
||||||
|
0
|
||||||
|
0
|
||||||
|
4
|
||||||
|
2
|
||||||
|
5
|
||||||
|
4
|
||||||
[c, e, d]
|
[c, e, d]
|
||||||
|
|
|
@ -0,0 +1,8 @@
|
||||||
|
Top entries for key counter
|
||||||
|
Num: 1, count: 99, epsilon: 0
|
||||||
|
Num: 2, count: 98, epsilon: 0
|
||||||
|
Num: 3, count: 97, epsilon: 0
|
||||||
|
Num: 4, count: 96, epsilon: 0
|
||||||
|
Num: 5, count: 95, epsilon: 0
|
||||||
|
Top entries for key two
|
||||||
|
Num: 1, count: 2, epsilon: 0
|
|
@ -1,5 +1,6 @@
|
||||||
# @TEST-EXEC: bro -b %INPUT > out
|
# @TEST-EXEC: bro -b %INPUT > out
|
||||||
# @TEST-EXEC: btest-diff out
|
# @TEST-EXEC: btest-diff out
|
||||||
|
# @TEST-EXEC: btest-diff .stderr
|
||||||
|
|
||||||
event bro_init()
|
event bro_init()
|
||||||
{
|
{
|
||||||
|
@ -13,26 +14,56 @@ event bro_init()
|
||||||
|
|
||||||
local s = topk_get_top(k1, 5);
|
local s = topk_get_top(k1, 5);
|
||||||
print s;
|
print s;
|
||||||
|
print topk_count(k1, "a");
|
||||||
|
print topk_epsilon(k1, "a");
|
||||||
|
print topk_count(k1, "b");
|
||||||
|
print topk_epsilon(k1, "b");
|
||||||
|
print topk_count(k1, "c");
|
||||||
|
print topk_epsilon(k1, "c");
|
||||||
|
|
||||||
topk_add(k1, "d");
|
topk_add(k1, "d");
|
||||||
s = topk_get_top(k1, 5);
|
s = topk_get_top(k1, 5);
|
||||||
print s;
|
print s;
|
||||||
|
print topk_count(k1, "b");
|
||||||
|
print topk_epsilon(k1, "b");
|
||||||
|
print topk_count(k1, "c");
|
||||||
|
print topk_epsilon(k1, "c");
|
||||||
|
print topk_count(k1, "d");
|
||||||
|
print topk_epsilon(k1, "d");
|
||||||
|
|
||||||
topk_add(k1, "e");
|
topk_add(k1, "e");
|
||||||
s = topk_get_top(k1, 5);
|
s = topk_get_top(k1, 5);
|
||||||
print s;
|
print s;
|
||||||
|
print topk_count(k1, "d");
|
||||||
|
print topk_epsilon(k1, "d");
|
||||||
|
print topk_count(k1, "e");
|
||||||
|
print topk_epsilon(k1, "e");
|
||||||
|
|
||||||
topk_add(k1, "f");
|
topk_add(k1, "f");
|
||||||
s = topk_get_top(k1, 5);
|
s = topk_get_top(k1, 5);
|
||||||
print s;
|
print s;
|
||||||
|
print topk_count(k1, "f");
|
||||||
|
print topk_epsilon(k1, "f");
|
||||||
|
print topk_count(k1, "e");
|
||||||
|
print topk_epsilon(k1, "e");
|
||||||
|
|
||||||
topk_add(k1, "e");
|
topk_add(k1, "e");
|
||||||
s = topk_get_top(k1, 5);
|
s = topk_get_top(k1, 5);
|
||||||
print s;
|
print s;
|
||||||
|
print topk_count(k1, "f");
|
||||||
|
print topk_epsilon(k1, "f");
|
||||||
|
print topk_count(k1, "e");
|
||||||
|
print topk_epsilon(k1, "e");
|
||||||
|
|
||||||
topk_add(k1, "g");
|
topk_add(k1, "g");
|
||||||
s = topk_get_top(k1, 5);
|
s = topk_get_top(k1, 5);
|
||||||
print s;
|
print s;
|
||||||
|
print topk_count(k1, "f");
|
||||||
|
print topk_epsilon(k1, "f");
|
||||||
|
print topk_count(k1, "e");
|
||||||
|
print topk_epsilon(k1, "e");
|
||||||
|
print topk_count(k1, "g");
|
||||||
|
print topk_epsilon(k1, "g");
|
||||||
|
|
||||||
k1 = topk_init(100);
|
k1 = topk_init(100);
|
||||||
topk_add(k1, "a");
|
topk_add(k1, "a");
|
||||||
|
|
48
testing/btest/scripts/base/frameworks/sumstats/topk.bro
Normal file
48
testing/btest/scripts/base/frameworks/sumstats/topk.bro
Normal file
|
@ -0,0 +1,48 @@
|
||||||
|
# @TEST-EXEC: bro %INPUT
|
||||||
|
# @TEST-EXEC: btest-diff .stdout
|
||||||
|
|
||||||
|
event bro_init() &priority=5
|
||||||
|
{
|
||||||
|
local r1: SumStats::Reducer = [$stream="test.metric",
|
||||||
|
$apply=set(SumStats::TOPK)];
|
||||||
|
SumStats::create([$epoch=3secs,
|
||||||
|
$reducers=set(r1),
|
||||||
|
$epoch_finished(data: SumStats::ResultTable) =
|
||||||
|
{
|
||||||
|
for ( key in data )
|
||||||
|
{
|
||||||
|
local r = data[key]["test.metric"];
|
||||||
|
|
||||||
|
local s: vector of SumStats::Observation;
|
||||||
|
s = topk_get_top(r$topk, 5);
|
||||||
|
|
||||||
|
print fmt("Top entries for key %s", key$str);
|
||||||
|
for ( element in s )
|
||||||
|
{
|
||||||
|
print fmt("Num: %d, count: %d, epsilon: %d", s[element]$num, topk_count(r$topk, s[element]), topk_epsilon(r$topk, s[element]));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]);
|
||||||
|
|
||||||
|
|
||||||
|
const loop_v: vector of count = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100};
|
||||||
|
|
||||||
|
local a: count;
|
||||||
|
a = 0;
|
||||||
|
|
||||||
|
for ( i in loop_v )
|
||||||
|
{
|
||||||
|
a = a + 1;
|
||||||
|
for ( j in loop_v )
|
||||||
|
{
|
||||||
|
if ( i < j )
|
||||||
|
SumStats::observe("test.metric", [$str="counter"], [$num=a]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
SumStats::observe("test.metric", [$str="two"], [$num=1]);
|
||||||
|
SumStats::observe("test.metric", [$str="two"], [$num=1]);
|
||||||
|
}
|
Loading…
Add table
Add a link
Reference in a new issue