topk for sumstats

This commit is contained in:
Bernhard Amann 2013-04-23 15:19:01 -07:00
parent 5da97455f5
commit de5769a88f
10 changed files with 198 additions and 4 deletions

View file

@ -4,5 +4,6 @@
@load ./sample @load ./sample
@load ./std-dev @load ./std-dev
@load ./sum @load ./sum
@load ./topk
@load ./unique @load ./unique
@load ./variance @load ./variance

View file

@ -0,0 +1,24 @@
@load base/frameworks/sumstats
module SumStats;
export {
redef enum Calculation += {
TOPK
};
redef record ResultVal += {
topk: opaque of topk &default=topk_init(500);
};
}
hook observe_hook(r: Reducer, val: double, obs: Observation, rv: ResultVal)
{
if ( TOPK in r$apply )
{
topk_add(rv$topk, obs);
}
}

View file

@ -19,7 +19,7 @@ Element::~Element()
value=0; value=0;
} }
HashKey* TopkVal::GetHash(Val* v) HashKey* TopkVal::GetHash(Val* v) const
{ {
TypeList* tl = new TypeList(v->Type()); TypeList* tl = new TypeList(v->Type());
tl->Append(v->Type()); tl->Append(v->Type());
@ -58,7 +58,8 @@ TopkVal::~TopkVal()
type = 0; type = 0;
} }
VectorVal* TopkVal::getTopK(int k) // returns vector
VectorVal* TopkVal::getTopK(int k) // returns vector
{ {
if ( numElements == 0 ) if ( numElements == 0 )
{ {
@ -100,6 +101,34 @@ VectorVal* TopkVal::getTopK(int k) // returns vector
return t; return t;
} }
uint64_t TopkVal::getCount(Val* value) const
{
HashKey* key = GetHash(value);
Element* e = (Element*) elementDict->Lookup(key);
if ( e == 0 )
{
reporter->Error("getCount for element that is not in top-k");
return 0;
}
return e->parent->count;
}
uint64_t TopkVal::getEpsilon(Val* value) const
{
HashKey* key = GetHash(value);
Element* e = (Element*) elementDict->Lookup(key);
if ( e == 0 )
{
reporter->Error("getEpsilon for element that is not in top-k");
return 0;
}
return e->epsilon;
}
void TopkVal::Encountered(Val* encountered) void TopkVal::Encountered(Val* encountered)
{ {
// ok, let's see if we already know this one. // ok, let's see if we already know this one.

View file

@ -38,10 +38,12 @@ public:
~TopkVal(); ~TopkVal();
void Encountered(Val* value); // we saw something void Encountered(Val* value); // we saw something
VectorVal* getTopK(int k); // returns vector VectorVal* getTopK(int k); // returns vector
uint64_t getCount(Val* value) const;
uint64_t getEpsilon(Val* value) const;
private: private:
void IncrementCounter(Element* e); void IncrementCounter(Element* e);
HashKey* GetHash(Val*); // this probably should go somewhere else. HashKey* GetHash(Val*) const; // this probably should go somewhere else.
BroType* type; BroType* type;
std::list<Bucket*> buckets; std::list<Bucket*> buckets;

View file

@ -5669,3 +5669,18 @@ function topk_get_top%(handle: opaque of topk, k: count%): any
Topk::TopkVal* h = (Topk::TopkVal*) handle; Topk::TopkVal* h = (Topk::TopkVal*) handle;
return h->getTopK(k); return h->getTopK(k);
%} %}
function topk_count%(handle: opaque of topk, value: any%): count
%{
assert(handle);
Topk::TopkVal* h = (Topk::TopkVal*) handle;
return new Val(h->getCount(value), TYPE_COUNT);
%}
function topk_epsilon%(handle: opaque of topk, value: any%): count
%{
assert(handle);
Topk::TopkVal* h = (Topk::TopkVal*) handle;
return new Val(h->getEpsilon(value), TYPE_COUNT);
%}

View file

@ -0,0 +1,6 @@
error: getCount for element that is not in top-k
error: getEpsilon for element that is not in top-k
error: getCount for element that is not in top-k
error: getEpsilon for element that is not in top-k
error: getCount for element that is not in top-k
error: getEpsilon for element that is not in top-k

View file

@ -1,7 +1,37 @@
[b, c] [b, c]
0
0
2
0
2
1
[d, c] [d, c]
0
0
2
1
3
2
[d, e] [d, e]
3
2
3
2
[f, e] [f, e]
4
3
3
2
[f, e] [f, e]
4
3
4
2
[g, e] [g, e]
0
0
4
2
5
4
[c, e, d] [c, e, d]

View file

@ -0,0 +1,8 @@
Top entries for key counter
Num: 1, count: 99, epsilon: 0
Num: 2, count: 98, epsilon: 0
Num: 3, count: 97, epsilon: 0
Num: 4, count: 96, epsilon: 0
Num: 5, count: 95, epsilon: 0
Top entries for key two
Num: 1, count: 2, epsilon: 0

View file

@ -1,5 +1,6 @@
# @TEST-EXEC: bro -b %INPUT > out # @TEST-EXEC: bro -b %INPUT > out
# @TEST-EXEC: btest-diff out # @TEST-EXEC: btest-diff out
# @TEST-EXEC: btest-diff .stderr
event bro_init() event bro_init()
{ {
@ -13,26 +14,56 @@ event bro_init()
local s = topk_get_top(k1, 5); local s = topk_get_top(k1, 5);
print s; print s;
print topk_count(k1, "a");
print topk_epsilon(k1, "a");
print topk_count(k1, "b");
print topk_epsilon(k1, "b");
print topk_count(k1, "c");
print topk_epsilon(k1, "c");
topk_add(k1, "d"); topk_add(k1, "d");
s = topk_get_top(k1, 5); s = topk_get_top(k1, 5);
print s; print s;
print topk_count(k1, "b");
print topk_epsilon(k1, "b");
print topk_count(k1, "c");
print topk_epsilon(k1, "c");
print topk_count(k1, "d");
print topk_epsilon(k1, "d");
topk_add(k1, "e"); topk_add(k1, "e");
s = topk_get_top(k1, 5); s = topk_get_top(k1, 5);
print s; print s;
print topk_count(k1, "d");
print topk_epsilon(k1, "d");
print topk_count(k1, "e");
print topk_epsilon(k1, "e");
topk_add(k1, "f"); topk_add(k1, "f");
s = topk_get_top(k1, 5); s = topk_get_top(k1, 5);
print s; print s;
print topk_count(k1, "f");
print topk_epsilon(k1, "f");
print topk_count(k1, "e");
print topk_epsilon(k1, "e");
topk_add(k1, "e"); topk_add(k1, "e");
s = topk_get_top(k1, 5); s = topk_get_top(k1, 5);
print s; print s;
print topk_count(k1, "f");
print topk_epsilon(k1, "f");
print topk_count(k1, "e");
print topk_epsilon(k1, "e");
topk_add(k1, "g"); topk_add(k1, "g");
s = topk_get_top(k1, 5); s = topk_get_top(k1, 5);
print s; print s;
print topk_count(k1, "f");
print topk_epsilon(k1, "f");
print topk_count(k1, "e");
print topk_epsilon(k1, "e");
print topk_count(k1, "g");
print topk_epsilon(k1, "g");
k1 = topk_init(100); k1 = topk_init(100);
topk_add(k1, "a"); topk_add(k1, "a");

View file

@ -0,0 +1,48 @@
# @TEST-EXEC: bro %INPUT
# @TEST-EXEC: btest-diff .stdout
event bro_init() &priority=5
{
local r1: SumStats::Reducer = [$stream="test.metric",
$apply=set(SumStats::TOPK)];
SumStats::create([$epoch=3secs,
$reducers=set(r1),
$epoch_finished(data: SumStats::ResultTable) =
{
for ( key in data )
{
local r = data[key]["test.metric"];
local s: vector of SumStats::Observation;
s = topk_get_top(r$topk, 5);
print fmt("Top entries for key %s", key$str);
for ( element in s )
{
print fmt("Num: %d, count: %d, epsilon: %d", s[element]$num, topk_count(r$topk, s[element]), topk_epsilon(r$topk, s[element]));
}
}
}
]);
const loop_v: vector of count = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100};
local a: count;
a = 0;
for ( i in loop_v )
{
a = a + 1;
for ( j in loop_v )
{
if ( i < j )
SumStats::observe("test.metric", [$str="counter"], [$num=a]);
}
}
SumStats::observe("test.metric", [$str="two"], [$num=1]);
SumStats::observe("test.metric", [$str="two"], [$num=1]);
}