mirror of
https://github.com/zeek/zeek.git
synced 2025-10-04 23:58:20 +00:00
Merge remote-tracking branch 'origin/topic/bernhard/topk'
* origin/topic/bernhard/topk: adapt to new folder structure fix opaqueval-related memleak synchronize pruned attribute potentially found wrong Ref. add sum function that can be used to get the number of total observed elements. in cluster settings, the resultvals can apparently been uninitialized in some special cases fix memory leaks fix warnings add topk cluster test make size of topk-list configureable when using sumstats implement merging for top-k. add serialization for topk make the get function const topk for sumstats well, a test that works.. implement topk.
This commit is contained in:
commit
81dcda3eb4
19 changed files with 1288 additions and 3 deletions
11
testing/btest/Baseline/bifs.topk/.stderr
Normal file
11
testing/btest/Baseline/bifs.topk/.stderr
Normal file
|
@ -0,0 +1,11 @@
|
|||
error: getCount for element that is not in top-k
|
||||
error: getEpsilon for element that is not in top-k
|
||||
error: getCount for element that is not in top-k
|
||||
error: getEpsilon for element that is not in top-k
|
||||
error: getCount for element that is not in top-k
|
||||
error: getEpsilon for element that is not in top-k
|
||||
error: getCount for element that is not in top-k
|
||||
error: getEpsilon for element that is not in top-k
|
||||
warning: TopkVal::getSum() was used on a pruned data structure. Result values do not represent total element count
|
||||
error: getCount for element that is not in top-k
|
||||
error: getEpsilon for element that is not in top-k
|
81
testing/btest/Baseline/bifs.topk/out
Normal file
81
testing/btest/Baseline/bifs.topk/out
Normal file
|
@ -0,0 +1,81 @@
|
|||
[b, c]
|
||||
4
|
||||
0
|
||||
0
|
||||
2
|
||||
0
|
||||
2
|
||||
1
|
||||
[d, c]
|
||||
5
|
||||
0
|
||||
0
|
||||
2
|
||||
1
|
||||
3
|
||||
2
|
||||
[d, e]
|
||||
6
|
||||
3
|
||||
2
|
||||
3
|
||||
2
|
||||
[f, e]
|
||||
7
|
||||
4
|
||||
3
|
||||
3
|
||||
2
|
||||
[f, e]
|
||||
8
|
||||
4
|
||||
3
|
||||
4
|
||||
2
|
||||
[g, e]
|
||||
9
|
||||
0
|
||||
0
|
||||
4
|
||||
2
|
||||
5
|
||||
4
|
||||
[c, e, d]
|
||||
19
|
||||
6
|
||||
0
|
||||
5
|
||||
0
|
||||
4
|
||||
0
|
||||
[c, e]
|
||||
6
|
||||
0
|
||||
5
|
||||
0
|
||||
0
|
||||
0
|
||||
[c, e]
|
||||
22
|
||||
12
|
||||
0
|
||||
10
|
||||
0
|
||||
0
|
||||
0
|
||||
[c, e]
|
||||
19
|
||||
6
|
||||
0
|
||||
5
|
||||
0
|
||||
4
|
||||
0
|
||||
[c, e, d]
|
||||
38
|
||||
12
|
||||
0
|
||||
10
|
||||
0
|
||||
8
|
||||
0
|
21
testing/btest/Baseline/istate.topk/out
Normal file
21
testing/btest/Baseline/istate.topk/out
Normal file
|
@ -0,0 +1,21 @@
|
|||
1
|
||||
2
|
||||
6
|
||||
4
|
||||
5
|
||||
1
|
||||
[c, e, d]
|
||||
1
|
||||
2
|
||||
6
|
||||
4
|
||||
5
|
||||
1
|
||||
[c, e, d]
|
||||
2
|
||||
4
|
||||
12
|
||||
8
|
||||
10
|
||||
2
|
||||
[c, e, d]
|
|
@ -0,0 +1,9 @@
|
|||
Top entries for key counter
|
||||
Num: 995, count: 100, epsilon: 0
|
||||
Num: 1, count: 99, epsilon: 0
|
||||
Num: 2, count: 98, epsilon: 0
|
||||
Num: 3, count: 97, epsilon: 0
|
||||
Num: 4, count: 96, epsilon: 0
|
||||
Top entries for key two
|
||||
Num: 2, count: 4, epsilon: 0
|
||||
Num: 1, count: 3, epsilon: 0
|
|
@ -0,0 +1,8 @@
|
|||
Top entries for key counter
|
||||
Num: 1, count: 99, epsilon: 0
|
||||
Num: 2, count: 98, epsilon: 0
|
||||
Num: 3, count: 97, epsilon: 0
|
||||
Num: 4, count: 96, epsilon: 0
|
||||
Num: 5, count: 95, epsilon: 0
|
||||
Top entries for key two
|
||||
Num: 1, count: 2, epsilon: 0
|
154
testing/btest/bifs/topk.bro
Normal file
154
testing/btest/bifs/topk.bro
Normal file
|
@ -0,0 +1,154 @@
|
|||
# @TEST-EXEC: bro -b %INPUT > out
|
||||
# @TEST-EXEC: btest-diff out
|
||||
# @TEST-EXEC: btest-diff .stderr
|
||||
|
||||
event bro_init()
|
||||
{
|
||||
local k1 = topk_init(2);
|
||||
|
||||
# first - peculiarity check...
|
||||
topk_add(k1, "a");
|
||||
topk_add(k1, "b");
|
||||
topk_add(k1, "b");
|
||||
topk_add(k1, "c");
|
||||
|
||||
local s = topk_get_top(k1, 5);
|
||||
print s;
|
||||
print topk_sum(k1);
|
||||
print topk_count(k1, "a");
|
||||
print topk_epsilon(k1, "a");
|
||||
print topk_count(k1, "b");
|
||||
print topk_epsilon(k1, "b");
|
||||
print topk_count(k1, "c");
|
||||
print topk_epsilon(k1, "c");
|
||||
|
||||
topk_add(k1, "d");
|
||||
s = topk_get_top(k1, 5);
|
||||
print s;
|
||||
print topk_sum(k1);
|
||||
print topk_count(k1, "b");
|
||||
print topk_epsilon(k1, "b");
|
||||
print topk_count(k1, "c");
|
||||
print topk_epsilon(k1, "c");
|
||||
print topk_count(k1, "d");
|
||||
print topk_epsilon(k1, "d");
|
||||
|
||||
topk_add(k1, "e");
|
||||
s = topk_get_top(k1, 5);
|
||||
print s;
|
||||
print topk_sum(k1);
|
||||
print topk_count(k1, "d");
|
||||
print topk_epsilon(k1, "d");
|
||||
print topk_count(k1, "e");
|
||||
print topk_epsilon(k1, "e");
|
||||
|
||||
topk_add(k1, "f");
|
||||
s = topk_get_top(k1, 5);
|
||||
print s;
|
||||
print topk_sum(k1);
|
||||
print topk_count(k1, "f");
|
||||
print topk_epsilon(k1, "f");
|
||||
print topk_count(k1, "e");
|
||||
print topk_epsilon(k1, "e");
|
||||
|
||||
topk_add(k1, "e");
|
||||
s = topk_get_top(k1, 5);
|
||||
print s;
|
||||
print topk_sum(k1);
|
||||
print topk_count(k1, "f");
|
||||
print topk_epsilon(k1, "f");
|
||||
print topk_count(k1, "e");
|
||||
print topk_epsilon(k1, "e");
|
||||
|
||||
topk_add(k1, "g");
|
||||
s = topk_get_top(k1, 5);
|
||||
print s;
|
||||
print topk_sum(k1);
|
||||
print topk_count(k1, "f");
|
||||
print topk_epsilon(k1, "f");
|
||||
print topk_count(k1, "e");
|
||||
print topk_epsilon(k1, "e");
|
||||
print topk_count(k1, "g");
|
||||
print topk_epsilon(k1, "g");
|
||||
|
||||
k1 = topk_init(100);
|
||||
topk_add(k1, "a");
|
||||
topk_add(k1, "b");
|
||||
topk_add(k1, "b");
|
||||
topk_add(k1, "c");
|
||||
topk_add(k1, "c");
|
||||
topk_add(k1, "c");
|
||||
topk_add(k1, "c");
|
||||
topk_add(k1, "c");
|
||||
topk_add(k1, "c");
|
||||
topk_add(k1, "d");
|
||||
topk_add(k1, "d");
|
||||
topk_add(k1, "d");
|
||||
topk_add(k1, "d");
|
||||
topk_add(k1, "e");
|
||||
topk_add(k1, "e");
|
||||
topk_add(k1, "e");
|
||||
topk_add(k1, "e");
|
||||
topk_add(k1, "e");
|
||||
topk_add(k1, "f");
|
||||
s = topk_get_top(k1, 3);
|
||||
print s;
|
||||
print topk_sum(k1);
|
||||
print topk_count(k1, "c");
|
||||
print topk_epsilon(k1, "c");
|
||||
print topk_count(k1, "e");
|
||||
print topk_epsilon(k1, "d");
|
||||
print topk_count(k1, "d");
|
||||
print topk_epsilon(k1, "d");
|
||||
|
||||
local k3 = topk_init(2);
|
||||
topk_merge_prune(k3, k1);
|
||||
|
||||
s = topk_get_top(k3, 3);
|
||||
print s;
|
||||
print topk_count(k3, "c");
|
||||
print topk_epsilon(k3, "c");
|
||||
print topk_count(k3, "e");
|
||||
print topk_epsilon(k3, "e");
|
||||
print topk_count(k3, "d");
|
||||
print topk_epsilon(k3, "d");
|
||||
|
||||
topk_merge_prune(k3, k1);
|
||||
|
||||
s = topk_get_top(k3, 3);
|
||||
print s;
|
||||
print topk_sum(k3); # this gives a warning and a wrong result.
|
||||
print topk_count(k3, "c");
|
||||
print topk_epsilon(k3, "c");
|
||||
print topk_count(k3, "e");
|
||||
print topk_epsilon(k3, "e");
|
||||
print topk_count(k3, "d");
|
||||
print topk_epsilon(k3, "d");
|
||||
|
||||
k3 = topk_init(2);
|
||||
topk_merge(k3, k1);
|
||||
print s;
|
||||
print topk_sum(k3);
|
||||
print topk_count(k3, "c");
|
||||
print topk_epsilon(k3, "c");
|
||||
print topk_count(k3, "e");
|
||||
print topk_epsilon(k3, "e");
|
||||
print topk_count(k3, "d");
|
||||
print topk_epsilon(k3, "d");
|
||||
|
||||
topk_merge(k3, k1);
|
||||
|
||||
s = topk_get_top(k3, 3);
|
||||
print s;
|
||||
print topk_sum(k3);
|
||||
print topk_count(k3, "c");
|
||||
print topk_epsilon(k3, "c");
|
||||
print topk_count(k3, "e");
|
||||
print topk_epsilon(k3, "e");
|
||||
print topk_count(k3, "d");
|
||||
print topk_epsilon(k3, "d");
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
74
testing/btest/istate/topk.bro
Normal file
74
testing/btest/istate/topk.bro
Normal file
|
@ -0,0 +1,74 @@
|
|||
# @TEST-EXEC: bro -b %INPUT runnumber=1 >out
|
||||
# @TEST-EXEC: bro -b %INPUT runnumber=2 >>out
|
||||
# @TEST-EXEC: bro -b %INPUT runnumber=3 >>out
|
||||
# @TEST-EXEC: btest-diff out
|
||||
|
||||
global runnumber: count &redef; # differentiate runs
|
||||
|
||||
global k1: opaque of topk &persistent;
|
||||
global k2: opaque of topk &persistent;
|
||||
|
||||
event bro_init()
|
||||
{
|
||||
|
||||
k2 = topk_init(20);
|
||||
|
||||
if ( runnumber == 1 )
|
||||
{
|
||||
k1 = topk_init(100);
|
||||
|
||||
topk_add(k1, "a");
|
||||
topk_add(k1, "b");
|
||||
topk_add(k1, "b");
|
||||
topk_add(k1, "c");
|
||||
topk_add(k1, "c");
|
||||
topk_add(k1, "c");
|
||||
topk_add(k1, "c");
|
||||
topk_add(k1, "c");
|
||||
topk_add(k1, "c");
|
||||
topk_add(k1, "d");
|
||||
topk_add(k1, "d");
|
||||
topk_add(k1, "d");
|
||||
topk_add(k1, "d");
|
||||
topk_add(k1, "e");
|
||||
topk_add(k1, "e");
|
||||
topk_add(k1, "e");
|
||||
topk_add(k1, "e");
|
||||
topk_add(k1, "e");
|
||||
topk_add(k1, "f");
|
||||
}
|
||||
|
||||
local s = topk_get_top(k1, 3);
|
||||
print topk_count(k1, "a");
|
||||
print topk_count(k1, "b");
|
||||
print topk_count(k1, "c");
|
||||
print topk_count(k1, "d");
|
||||
print topk_count(k1, "e");
|
||||
print topk_count(k1, "f");
|
||||
|
||||
if ( runnumber == 2 )
|
||||
{
|
||||
topk_add(k1, "a");
|
||||
topk_add(k1, "b");
|
||||
topk_add(k1, "b");
|
||||
topk_add(k1, "c");
|
||||
topk_add(k1, "c");
|
||||
topk_add(k1, "c");
|
||||
topk_add(k1, "c");
|
||||
topk_add(k1, "c");
|
||||
topk_add(k1, "c");
|
||||
topk_add(k1, "d");
|
||||
topk_add(k1, "d");
|
||||
topk_add(k1, "d");
|
||||
topk_add(k1, "d");
|
||||
topk_add(k1, "e");
|
||||
topk_add(k1, "e");
|
||||
topk_add(k1, "e");
|
||||
topk_add(k1, "e");
|
||||
topk_add(k1, "e");
|
||||
topk_add(k1, "f");
|
||||
}
|
||||
|
||||
print s;
|
||||
|
||||
}
|
110
testing/btest/scripts/base/frameworks/sumstats/topk-cluster.bro
Normal file
110
testing/btest/scripts/base/frameworks/sumstats/topk-cluster.bro
Normal file
|
@ -0,0 +1,110 @@
|
|||
# @TEST-SERIALIZE: comm
|
||||
#
|
||||
# @TEST-EXEC: btest-bg-run manager-1 BROPATH=$BROPATH:.. CLUSTER_NODE=manager-1 bro %INPUT
|
||||
# @TEST-EXEC: sleep 1
|
||||
# @TEST-EXEC: btest-bg-run worker-1 BROPATH=$BROPATH:.. CLUSTER_NODE=worker-1 bro %INPUT
|
||||
# @TEST-EXEC: btest-bg-run worker-2 BROPATH=$BROPATH:.. CLUSTER_NODE=worker-2 bro %INPUT
|
||||
# @TEST-EXEC: btest-bg-wait 15
|
||||
|
||||
# @TEST-EXEC: btest-diff manager-1/.stdout
|
||||
#
|
||||
@TEST-START-FILE cluster-layout.bro
|
||||
redef Cluster::nodes = {
|
||||
["manager-1"] = [$node_type=Cluster::MANAGER, $ip=127.0.0.1, $p=37757/tcp, $workers=set("worker-1", "worker-2")],
|
||||
["worker-1"] = [$node_type=Cluster::WORKER, $ip=127.0.0.1, $p=37760/tcp, $manager="manager-1", $interface="eth0"],
|
||||
["worker-2"] = [$node_type=Cluster::WORKER, $ip=127.0.0.1, $p=37761/tcp, $manager="manager-1", $interface="eth1"],
|
||||
};
|
||||
@TEST-END-FILE
|
||||
|
||||
redef Log::default_rotation_interval = 0secs;
|
||||
|
||||
|
||||
event bro_init() &priority=5
|
||||
{
|
||||
local r1: SumStats::Reducer = [$stream="test.metric",
|
||||
$apply=set(SumStats::TOPK)];
|
||||
SumStats::create([$epoch=5secs,
|
||||
$reducers=set(r1),
|
||||
$epoch_finished(data: SumStats::ResultTable) =
|
||||
{
|
||||
for ( key in data )
|
||||
{
|
||||
local r = data[key]["test.metric"];
|
||||
|
||||
local s: vector of SumStats::Observation;
|
||||
s = topk_get_top(r$topk, 5);
|
||||
|
||||
print fmt("Top entries for key %s", key$str);
|
||||
for ( element in s )
|
||||
{
|
||||
print fmt("Num: %d, count: %d, epsilon: %d", s[element]$num, topk_count(r$topk, s[element]), topk_epsilon(r$topk, s[element]));
|
||||
}
|
||||
|
||||
terminate();
|
||||
}
|
||||
}
|
||||
]);
|
||||
|
||||
|
||||
}
|
||||
|
||||
event remote_connection_closed(p: event_peer)
|
||||
{
|
||||
terminate();
|
||||
}
|
||||
|
||||
global ready_for_data: event();
|
||||
redef Cluster::manager2worker_events += /^ready_for_data$/;
|
||||
|
||||
event ready_for_data()
|
||||
{
|
||||
const loop_v: vector of count = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100};
|
||||
|
||||
|
||||
if ( Cluster::node == "worker-1" )
|
||||
{
|
||||
|
||||
local a: count;
|
||||
a = 0;
|
||||
|
||||
for ( i in loop_v )
|
||||
{
|
||||
a = a + 1;
|
||||
for ( j in loop_v )
|
||||
{
|
||||
if ( i < j )
|
||||
SumStats::observe("test.metric", [$str="counter"], [$num=a]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
SumStats::observe("test.metric", [$str="two"], [$num=1]);
|
||||
SumStats::observe("test.metric", [$str="two"], [$num=1]);
|
||||
}
|
||||
if ( Cluster::node == "worker-2" )
|
||||
{
|
||||
SumStats::observe("test.metric", [$str="two"], [$num=2]);
|
||||
SumStats::observe("test.metric", [$str="two"], [$num=2]);
|
||||
SumStats::observe("test.metric", [$str="two"], [$num=2]);
|
||||
SumStats::observe("test.metric", [$str="two"], [$num=2]);
|
||||
SumStats::observe("test.metric", [$str="two"], [$num=1]);
|
||||
|
||||
for ( i in loop_v )
|
||||
{
|
||||
SumStats::observe("test.metric", [$str="counter"], [$num=995]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@if ( Cluster::local_node_type() == Cluster::MANAGER )
|
||||
|
||||
global peer_count = 0;
|
||||
event remote_connection_handshake_done(p: event_peer) &priority=-5
|
||||
{
|
||||
++peer_count;
|
||||
if ( peer_count == 2 )
|
||||
event ready_for_data();
|
||||
}
|
||||
|
||||
@endif
|
||||
|
48
testing/btest/scripts/base/frameworks/sumstats/topk.bro
Normal file
48
testing/btest/scripts/base/frameworks/sumstats/topk.bro
Normal file
|
@ -0,0 +1,48 @@
|
|||
# @TEST-EXEC: bro %INPUT
|
||||
# @TEST-EXEC: btest-diff .stdout
|
||||
|
||||
event bro_init() &priority=5
|
||||
{
|
||||
local r1: SumStats::Reducer = [$stream="test.metric",
|
||||
$apply=set(SumStats::TOPK)];
|
||||
SumStats::create([$epoch=3secs,
|
||||
$reducers=set(r1),
|
||||
$epoch_finished(data: SumStats::ResultTable) =
|
||||
{
|
||||
for ( key in data )
|
||||
{
|
||||
local r = data[key]["test.metric"];
|
||||
|
||||
local s: vector of SumStats::Observation;
|
||||
s = topk_get_top(r$topk, 5);
|
||||
|
||||
print fmt("Top entries for key %s", key$str);
|
||||
for ( element in s )
|
||||
{
|
||||
print fmt("Num: %d, count: %d, epsilon: %d", s[element]$num, topk_count(r$topk, s[element]), topk_epsilon(r$topk, s[element]));
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
]);
|
||||
|
||||
|
||||
const loop_v: vector of count = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100};
|
||||
|
||||
local a: count;
|
||||
a = 0;
|
||||
|
||||
for ( i in loop_v )
|
||||
{
|
||||
a = a + 1;
|
||||
for ( j in loop_v )
|
||||
{
|
||||
if ( i < j )
|
||||
SumStats::observe("test.metric", [$str="counter"], [$num=a]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
SumStats::observe("test.metric", [$str="two"], [$num=1]);
|
||||
SumStats::observe("test.metric", [$str="two"], [$num=1]);
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue