Merge remote-tracking branch 'origin/topic/bernhard/topk'

* origin/topic/bernhard/topk:
  adapt to new folder structure
  fix opaqueval-related memleak
  synchronize pruned attribute
  potentially found wrong Ref.
  add sum function that can be used to get the number of total observed elements.
  in cluster settings, the resultvals can apparently been uninitialized in some special cases
  fix memory leaks
  fix warnings
  add topk cluster test
  make size of topk-list configureable when using sumstats
  implement merging for top-k.
  add serialization for topk
  make the get function const
  topk for sumstats
  well, a test that works..
  implement topk.
This commit is contained in:
Robin Sommer 2013-08-01 10:27:18 -07:00
commit 81dcda3eb4
19 changed files with 1288 additions and 3 deletions

View file

@ -0,0 +1,11 @@
error: getCount for element that is not in top-k
error: getEpsilon for element that is not in top-k
error: getCount for element that is not in top-k
error: getEpsilon for element that is not in top-k
error: getCount for element that is not in top-k
error: getEpsilon for element that is not in top-k
error: getCount for element that is not in top-k
error: getEpsilon for element that is not in top-k
warning: TopkVal::getSum() was used on a pruned data structure. Result values do not represent total element count
error: getCount for element that is not in top-k
error: getEpsilon for element that is not in top-k

View file

@ -0,0 +1,81 @@
[b, c]
4
0
0
2
0
2
1
[d, c]
5
0
0
2
1
3
2
[d, e]
6
3
2
3
2
[f, e]
7
4
3
3
2
[f, e]
8
4
3
4
2
[g, e]
9
0
0
4
2
5
4
[c, e, d]
19
6
0
5
0
4
0
[c, e]
6
0
5
0
0
0
[c, e]
22
12
0
10
0
0
0
[c, e]
19
6
0
5
0
4
0
[c, e, d]
38
12
0
10
0
8
0

View file

@ -0,0 +1,21 @@
1
2
6
4
5
1
[c, e, d]
1
2
6
4
5
1
[c, e, d]
2
4
12
8
10
2
[c, e, d]

View file

@ -0,0 +1,9 @@
Top entries for key counter
Num: 995, count: 100, epsilon: 0
Num: 1, count: 99, epsilon: 0
Num: 2, count: 98, epsilon: 0
Num: 3, count: 97, epsilon: 0
Num: 4, count: 96, epsilon: 0
Top entries for key two
Num: 2, count: 4, epsilon: 0
Num: 1, count: 3, epsilon: 0

View file

@ -0,0 +1,8 @@
Top entries for key counter
Num: 1, count: 99, epsilon: 0
Num: 2, count: 98, epsilon: 0
Num: 3, count: 97, epsilon: 0
Num: 4, count: 96, epsilon: 0
Num: 5, count: 95, epsilon: 0
Top entries for key two
Num: 1, count: 2, epsilon: 0

154
testing/btest/bifs/topk.bro Normal file
View file

@ -0,0 +1,154 @@
# @TEST-EXEC: bro -b %INPUT > out
# @TEST-EXEC: btest-diff out
# @TEST-EXEC: btest-diff .stderr
event bro_init()
{
local k1 = topk_init(2);
# first - peculiarity check...
topk_add(k1, "a");
topk_add(k1, "b");
topk_add(k1, "b");
topk_add(k1, "c");
local s = topk_get_top(k1, 5);
print s;
print topk_sum(k1);
print topk_count(k1, "a");
print topk_epsilon(k1, "a");
print topk_count(k1, "b");
print topk_epsilon(k1, "b");
print topk_count(k1, "c");
print topk_epsilon(k1, "c");
topk_add(k1, "d");
s = topk_get_top(k1, 5);
print s;
print topk_sum(k1);
print topk_count(k1, "b");
print topk_epsilon(k1, "b");
print topk_count(k1, "c");
print topk_epsilon(k1, "c");
print topk_count(k1, "d");
print topk_epsilon(k1, "d");
topk_add(k1, "e");
s = topk_get_top(k1, 5);
print s;
print topk_sum(k1);
print topk_count(k1, "d");
print topk_epsilon(k1, "d");
print topk_count(k1, "e");
print topk_epsilon(k1, "e");
topk_add(k1, "f");
s = topk_get_top(k1, 5);
print s;
print topk_sum(k1);
print topk_count(k1, "f");
print topk_epsilon(k1, "f");
print topk_count(k1, "e");
print topk_epsilon(k1, "e");
topk_add(k1, "e");
s = topk_get_top(k1, 5);
print s;
print topk_sum(k1);
print topk_count(k1, "f");
print topk_epsilon(k1, "f");
print topk_count(k1, "e");
print topk_epsilon(k1, "e");
topk_add(k1, "g");
s = topk_get_top(k1, 5);
print s;
print topk_sum(k1);
print topk_count(k1, "f");
print topk_epsilon(k1, "f");
print topk_count(k1, "e");
print topk_epsilon(k1, "e");
print topk_count(k1, "g");
print topk_epsilon(k1, "g");
k1 = topk_init(100);
topk_add(k1, "a");
topk_add(k1, "b");
topk_add(k1, "b");
topk_add(k1, "c");
topk_add(k1, "c");
topk_add(k1, "c");
topk_add(k1, "c");
topk_add(k1, "c");
topk_add(k1, "c");
topk_add(k1, "d");
topk_add(k1, "d");
topk_add(k1, "d");
topk_add(k1, "d");
topk_add(k1, "e");
topk_add(k1, "e");
topk_add(k1, "e");
topk_add(k1, "e");
topk_add(k1, "e");
topk_add(k1, "f");
s = topk_get_top(k1, 3);
print s;
print topk_sum(k1);
print topk_count(k1, "c");
print topk_epsilon(k1, "c");
print topk_count(k1, "e");
print topk_epsilon(k1, "d");
print topk_count(k1, "d");
print topk_epsilon(k1, "d");
local k3 = topk_init(2);
topk_merge_prune(k3, k1);
s = topk_get_top(k3, 3);
print s;
print topk_count(k3, "c");
print topk_epsilon(k3, "c");
print topk_count(k3, "e");
print topk_epsilon(k3, "e");
print topk_count(k3, "d");
print topk_epsilon(k3, "d");
topk_merge_prune(k3, k1);
s = topk_get_top(k3, 3);
print s;
print topk_sum(k3); # this gives a warning and a wrong result.
print topk_count(k3, "c");
print topk_epsilon(k3, "c");
print topk_count(k3, "e");
print topk_epsilon(k3, "e");
print topk_count(k3, "d");
print topk_epsilon(k3, "d");
k3 = topk_init(2);
topk_merge(k3, k1);
print s;
print topk_sum(k3);
print topk_count(k3, "c");
print topk_epsilon(k3, "c");
print topk_count(k3, "e");
print topk_epsilon(k3, "e");
print topk_count(k3, "d");
print topk_epsilon(k3, "d");
topk_merge(k3, k1);
s = topk_get_top(k3, 3);
print s;
print topk_sum(k3);
print topk_count(k3, "c");
print topk_epsilon(k3, "c");
print topk_count(k3, "e");
print topk_epsilon(k3, "e");
print topk_count(k3, "d");
print topk_epsilon(k3, "d");
}

View file

@ -0,0 +1,74 @@
# @TEST-EXEC: bro -b %INPUT runnumber=1 >out
# @TEST-EXEC: bro -b %INPUT runnumber=2 >>out
# @TEST-EXEC: bro -b %INPUT runnumber=3 >>out
# @TEST-EXEC: btest-diff out
global runnumber: count &redef; # differentiate runs
global k1: opaque of topk &persistent;
global k2: opaque of topk &persistent;
event bro_init()
{
k2 = topk_init(20);
if ( runnumber == 1 )
{
k1 = topk_init(100);
topk_add(k1, "a");
topk_add(k1, "b");
topk_add(k1, "b");
topk_add(k1, "c");
topk_add(k1, "c");
topk_add(k1, "c");
topk_add(k1, "c");
topk_add(k1, "c");
topk_add(k1, "c");
topk_add(k1, "d");
topk_add(k1, "d");
topk_add(k1, "d");
topk_add(k1, "d");
topk_add(k1, "e");
topk_add(k1, "e");
topk_add(k1, "e");
topk_add(k1, "e");
topk_add(k1, "e");
topk_add(k1, "f");
}
local s = topk_get_top(k1, 3);
print topk_count(k1, "a");
print topk_count(k1, "b");
print topk_count(k1, "c");
print topk_count(k1, "d");
print topk_count(k1, "e");
print topk_count(k1, "f");
if ( runnumber == 2 )
{
topk_add(k1, "a");
topk_add(k1, "b");
topk_add(k1, "b");
topk_add(k1, "c");
topk_add(k1, "c");
topk_add(k1, "c");
topk_add(k1, "c");
topk_add(k1, "c");
topk_add(k1, "c");
topk_add(k1, "d");
topk_add(k1, "d");
topk_add(k1, "d");
topk_add(k1, "d");
topk_add(k1, "e");
topk_add(k1, "e");
topk_add(k1, "e");
topk_add(k1, "e");
topk_add(k1, "e");
topk_add(k1, "f");
}
print s;
}

View file

@ -0,0 +1,110 @@
# @TEST-SERIALIZE: comm
#
# @TEST-EXEC: btest-bg-run manager-1 BROPATH=$BROPATH:.. CLUSTER_NODE=manager-1 bro %INPUT
# @TEST-EXEC: sleep 1
# @TEST-EXEC: btest-bg-run worker-1 BROPATH=$BROPATH:.. CLUSTER_NODE=worker-1 bro %INPUT
# @TEST-EXEC: btest-bg-run worker-2 BROPATH=$BROPATH:.. CLUSTER_NODE=worker-2 bro %INPUT
# @TEST-EXEC: btest-bg-wait 15
# @TEST-EXEC: btest-diff manager-1/.stdout
#
@TEST-START-FILE cluster-layout.bro
redef Cluster::nodes = {
["manager-1"] = [$node_type=Cluster::MANAGER, $ip=127.0.0.1, $p=37757/tcp, $workers=set("worker-1", "worker-2")],
["worker-1"] = [$node_type=Cluster::WORKER, $ip=127.0.0.1, $p=37760/tcp, $manager="manager-1", $interface="eth0"],
["worker-2"] = [$node_type=Cluster::WORKER, $ip=127.0.0.1, $p=37761/tcp, $manager="manager-1", $interface="eth1"],
};
@TEST-END-FILE
redef Log::default_rotation_interval = 0secs;
event bro_init() &priority=5
{
local r1: SumStats::Reducer = [$stream="test.metric",
$apply=set(SumStats::TOPK)];
SumStats::create([$epoch=5secs,
$reducers=set(r1),
$epoch_finished(data: SumStats::ResultTable) =
{
for ( key in data )
{
local r = data[key]["test.metric"];
local s: vector of SumStats::Observation;
s = topk_get_top(r$topk, 5);
print fmt("Top entries for key %s", key$str);
for ( element in s )
{
print fmt("Num: %d, count: %d, epsilon: %d", s[element]$num, topk_count(r$topk, s[element]), topk_epsilon(r$topk, s[element]));
}
terminate();
}
}
]);
}
event remote_connection_closed(p: event_peer)
{
terminate();
}
global ready_for_data: event();
redef Cluster::manager2worker_events += /^ready_for_data$/;
event ready_for_data()
{
const loop_v: vector of count = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100};
if ( Cluster::node == "worker-1" )
{
local a: count;
a = 0;
for ( i in loop_v )
{
a = a + 1;
for ( j in loop_v )
{
if ( i < j )
SumStats::observe("test.metric", [$str="counter"], [$num=a]);
}
}
SumStats::observe("test.metric", [$str="two"], [$num=1]);
SumStats::observe("test.metric", [$str="two"], [$num=1]);
}
if ( Cluster::node == "worker-2" )
{
SumStats::observe("test.metric", [$str="two"], [$num=2]);
SumStats::observe("test.metric", [$str="two"], [$num=2]);
SumStats::observe("test.metric", [$str="two"], [$num=2]);
SumStats::observe("test.metric", [$str="two"], [$num=2]);
SumStats::observe("test.metric", [$str="two"], [$num=1]);
for ( i in loop_v )
{
SumStats::observe("test.metric", [$str="counter"], [$num=995]);
}
}
}
@if ( Cluster::local_node_type() == Cluster::MANAGER )
global peer_count = 0;
event remote_connection_handshake_done(p: event_peer) &priority=-5
{
++peer_count;
if ( peer_count == 2 )
event ready_for_data();
}
@endif

View file

@ -0,0 +1,48 @@
# @TEST-EXEC: bro %INPUT
# @TEST-EXEC: btest-diff .stdout
event bro_init() &priority=5
{
local r1: SumStats::Reducer = [$stream="test.metric",
$apply=set(SumStats::TOPK)];
SumStats::create([$epoch=3secs,
$reducers=set(r1),
$epoch_finished(data: SumStats::ResultTable) =
{
for ( key in data )
{
local r = data[key]["test.metric"];
local s: vector of SumStats::Observation;
s = topk_get_top(r$topk, 5);
print fmt("Top entries for key %s", key$str);
for ( element in s )
{
print fmt("Num: %d, count: %d, epsilon: %d", s[element]$num, topk_count(r$topk, s[element]), topk_epsilon(r$topk, s[element]));
}
}
}
]);
const loop_v: vector of count = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100};
local a: count;
a = 0;
for ( i in loop_v )
{
a = a + 1;
for ( j in loop_v )
{
if ( i < j )
SumStats::observe("test.metric", [$str="counter"], [$num=a]);
}
}
SumStats::observe("test.metric", [$str="two"], [$num=1]);
SumStats::observe("test.metric", [$str="two"], [$num=1]);
}