mirror of
https://github.com/zeek/zeek.git
synced 2025-10-03 07:08:19 +00:00
Merge remote-tracking branch 'origin/topic/bernhard/hyperloglog'
* origin/topic/bernhard/hyperloglog: (32 commits) add clustered leak test for hll. No issues. make gcc happy (hopefully) fix refcounting problem in hll/bloom-filter opaque vals. Thanks Robin. re-use same hash class for all add operations get hll ready for merging and forgot a file... adapt to new structure fix opaqueval-related memleak. make it compile on case-sensitive file systems and fix warnings make error rate configureable add persistence test not using predetermined random seeds. update cluster test to also use hll persistence really works. well, with this commit synchronizing the data structure should work.. ...if we had consistent hashing. and also serialize the other things we need ok, this bug was hard to find. serialization compiles. change plugin after feedback of seth Forgot a file. Again. Like always. Basically. do away with old file. ...
This commit is contained in:
commit
4dcf8fc0db
29 changed files with 991 additions and 18 deletions
1
testing/btest/Baseline/bifs.hll_cardinality/.stderr
Normal file
1
testing/btest/Baseline/bifs.hll_cardinality/.stderr
Normal file
|
@ -0,0 +1 @@
|
|||
error: incompatible HLL data type
|
23
testing/btest/Baseline/bifs.hll_cardinality/out
Normal file
23
testing/btest/Baseline/bifs.hll_cardinality/out
Normal file
|
@ -0,0 +1,23 @@
|
|||
This value should be around 13:
|
||||
13.00129
|
||||
This value should be about 12:
|
||||
12.001099
|
||||
This value should be around 0:
|
||||
0.0
|
||||
This value should be around 13:
|
||||
13.00129
|
||||
This value should be 0:
|
||||
0.0
|
||||
This value should be true:
|
||||
T
|
||||
This value should be about 12:
|
||||
12.001099
|
||||
12.001099
|
||||
This value should be true:
|
||||
T
|
||||
This value should be about 21:
|
||||
21.003365
|
||||
This value should be about 13:
|
||||
13.00129
|
||||
This value should be about 12:
|
||||
12.001099
|
|
@ -0,0 +1,2 @@
|
|||
This value should be about 21:
|
||||
21.003365
|
|
@ -0,0 +1,2 @@
|
|||
This value should be around 13:
|
||||
13.00129
|
|
@ -0,0 +1,2 @@
|
|||
This value should be about 12:
|
||||
12.001099
|
6
testing/btest/Baseline/istate.hll/out
Normal file
6
testing/btest/Baseline/istate.hll/out
Normal file
|
@ -0,0 +1,6 @@
|
|||
1
|
||||
10.000763
|
||||
2
|
||||
10.000763
|
||||
3
|
||||
11.000923
|
|
@ -1,4 +1,4 @@
|
|||
Host: 6.5.4.3 - num:2 - sum:6.0 - avg:3.0 - max:5.0 - min:1.0 - var:8.0 - std_dev:2.8 - unique:2
|
||||
Host: 10.10.10.10 - num:1 - sum:5.0 - avg:5.0 - max:5.0 - min:5.0 - var:0.0 - std_dev:0.0 - unique:1
|
||||
Host: 1.2.3.4 - num:9 - sum:437.0 - avg:48.6 - max:95.0 - min:3.0 - var:758.8 - std_dev:27.5 - unique:8
|
||||
Host: 7.2.1.5 - num:2 - sum:145.0 - avg:72.5 - max:91.0 - min:54.0 - var:684.5 - std_dev:26.2 - unique:2
|
||||
Host: 6.5.4.3 - num:2 - sum:6.0 - avg:3.0 - max:5.0 - min:1.0 - var:8.0 - std_dev:2.8 - unique:2 - hllunique:2
|
||||
Host: 10.10.10.10 - num:1 - sum:5.0 - avg:5.0 - max:5.0 - min:5.0 - var:0.0 - std_dev:0.0 - unique:1 - hllunique:1
|
||||
Host: 1.2.3.4 - num:9 - sum:437.0 - avg:48.6 - max:95.0 - min:3.0 - var:758.8 - std_dev:27.5 - unique:8 - hllunique:8
|
||||
Host: 7.2.1.5 - num:2 - sum:145.0 - avg:72.5 - max:91.0 - min:54.0 - var:684.5 - std_dev:26.2 - unique:2 - hllunique:2
|
||||
|
|
|
@ -1,3 +1,3 @@
|
|||
Host: 6.5.4.3 - num:1 - sum:2.0 - var:0.0 - avg:2.0 - max:2.0 - min:2.0 - std_dev:0.0 - unique:1
|
||||
Host: 1.2.3.4 - num:5 - sum:221.0 - var:1144.2 - avg:44.2 - max:94.0 - min:5.0 - std_dev:33.8 - unique:4
|
||||
Host: 7.2.1.5 - num:1 - sum:1.0 - var:0.0 - avg:1.0 - max:1.0 - min:1.0 - std_dev:0.0 - unique:1
|
||||
Host: 6.5.4.3 - num:1 - sum:2.0 - var:0.0 - avg:2.0 - max:2.0 - min:2.0 - std_dev:0.0 - unique:1 - hllunique:1
|
||||
Host: 1.2.3.4 - num:5 - sum:221.0 - var:1144.2 - avg:44.2 - max:94.0 - min:5.0 - std_dev:33.8 - unique:4 - hllunique:4
|
||||
Host: 7.2.1.5 - num:1 - sum:1.0 - var:0.0 - avg:1.0 - max:1.0 - min:1.0 - std_dev:0.0 - unique:1 - hllunique:1
|
||||
|
|
83
testing/btest/bifs/hll_cardinality.bro
Normal file
83
testing/btest/bifs/hll_cardinality.bro
Normal file
|
@ -0,0 +1,83 @@
|
|||
#
|
||||
# @TEST-EXEC: bro %INPUT>out
|
||||
# @TEST-EXEC: btest-diff out
|
||||
# @TEST-EXEC: btest-diff .stderr
|
||||
|
||||
event bro_init()
|
||||
{
|
||||
local c1 = hll_cardinality_init(0.01);
|
||||
local c2 = hll_cardinality_init(0.01);
|
||||
|
||||
local add1 = 2001;
|
||||
local add2 = 2002;
|
||||
local add3 = 2003;
|
||||
|
||||
hll_cardinality_add(c1, add1);
|
||||
hll_cardinality_add(c1, add2);
|
||||
hll_cardinality_add(c1, add3);
|
||||
hll_cardinality_add(c1, 1000);
|
||||
hll_cardinality_add(c1, 1001);
|
||||
hll_cardinality_add(c1, 101);
|
||||
hll_cardinality_add(c1, 1003);
|
||||
hll_cardinality_add(c1, 1004);
|
||||
hll_cardinality_add(c1, 1005);
|
||||
hll_cardinality_add(c1, 1006);
|
||||
hll_cardinality_add(c1, 1007);
|
||||
hll_cardinality_add(c1, 1008);
|
||||
hll_cardinality_add(c1, 1009);
|
||||
|
||||
hll_cardinality_add(c2, add1);
|
||||
hll_cardinality_add(c2, add2);
|
||||
hll_cardinality_add(c2, add3);
|
||||
hll_cardinality_add(c2, 1);
|
||||
hll_cardinality_add(c2, "b");
|
||||
hll_cardinality_add(c2, 101);
|
||||
hll_cardinality_add(c2, 2);
|
||||
hll_cardinality_add(c2, 3);
|
||||
hll_cardinality_add(c2, 4);
|
||||
hll_cardinality_add(c2, 5);
|
||||
hll_cardinality_add(c2, 6);
|
||||
hll_cardinality_add(c2, 7);
|
||||
hll_cardinality_add(c2, 8);
|
||||
|
||||
print "This value should be around 13:";
|
||||
print hll_cardinality_estimate(c1);
|
||||
|
||||
print "This value should be about 12:";
|
||||
print hll_cardinality_estimate(c2);
|
||||
|
||||
local m2 = hll_cardinality_init(0.02);
|
||||
|
||||
print "This value should be around 0:";
|
||||
print hll_cardinality_estimate(m2);
|
||||
|
||||
local c3 = hll_cardinality_copy(c1);
|
||||
|
||||
print "This value should be around 13:";
|
||||
print hll_cardinality_estimate(c3);
|
||||
|
||||
c3 = hll_cardinality_init(0.01);
|
||||
print "This value should be 0:";
|
||||
print hll_cardinality_estimate(c3);
|
||||
|
||||
print "This value should be true:";
|
||||
print hll_cardinality_merge_into(c3, c2);
|
||||
|
||||
print "This value should be about 12:";
|
||||
print hll_cardinality_estimate(c2);
|
||||
print hll_cardinality_estimate(c3);
|
||||
|
||||
print "This value should be true:";
|
||||
print hll_cardinality_merge_into(c2, c1);
|
||||
|
||||
print "This value should be about 21:";
|
||||
print hll_cardinality_estimate(c2);
|
||||
|
||||
print "This value should be about 13:";
|
||||
print hll_cardinality_estimate(c1);
|
||||
|
||||
print "This value should be about 12:";
|
||||
print hll_cardinality_estimate(c3);
|
||||
|
||||
}
|
||||
|
111
testing/btest/core/leaks/hll_cluster.bro
Normal file
111
testing/btest/core/leaks/hll_cluster.bro
Normal file
|
@ -0,0 +1,111 @@
|
|||
# Needs perftools support.
|
||||
#
|
||||
# @TEST-SERIALIZE: comm
|
||||
# @TEST-GROUP: leaks
|
||||
#
|
||||
# @TEST-REQUIRES: bro --help 2>&1 | grep -q mem-leaks
|
||||
#
|
||||
# @TEST-EXEC: bro %INPUT>out
|
||||
# @TEST-EXEC: btest-bg-run manager-1 HEAP_CHECK_DUMP_DIRECTORY=. HEAPCHECK=local BROPATH=$BROPATH:.. CLUSTER_NODE=manager-1 bro %INPUT
|
||||
# @TEST-EXEC: sleep 2
|
||||
# @TEST-EXEC: btest-bg-run worker-1 HEAP_CHECK_DUMP_DIRECTORY=. HEAPCHECK=local BROPATH=$BROPATH:.. CLUSTER_NODE=worker-1 bro runnumber=1 %INPUT
|
||||
# @TEST-EXEC: btest-bg-run worker-2 HEAP_CHECK_DUMP_DIRECTORY=. HEAPCHECK=local BROPATH=$BROPATH:.. CLUSTER_NODE=worker-2 bro runnumber=2 %INPUT
|
||||
# @TEST-EXEC: btest-bg-wait -k 10
|
||||
#
|
||||
# @TEST-EXEC: btest-diff manager-1/.stdout
|
||||
# @TEST-EXEC: btest-diff worker-1/.stdout
|
||||
# @TEST-EXEC: btest-diff worker-2/.stdout
|
||||
|
||||
@TEST-START-FILE cluster-layout.bro
|
||||
redef Cluster::nodes = {
|
||||
["manager-1"] = [$node_type=Cluster::MANAGER, $ip=127.0.0.1, $p=37757/tcp, $workers=set("worker-1", "worker-2")],
|
||||
["worker-1"] = [$node_type=Cluster::WORKER, $ip=127.0.0.1, $p=37760/tcp, $manager="manager-1"],
|
||||
["worker-2"] = [$node_type=Cluster::WORKER, $ip=127.0.0.1, $p=37761/tcp, $manager="manager-1"],
|
||||
};
|
||||
@TEST-END-FILE
|
||||
|
||||
redef Log::default_rotation_interval = 0secs;
|
||||
|
||||
global hll_data: event(data: opaque of cardinality);
|
||||
|
||||
redef Cluster::worker2manager_events += /hll_data/;
|
||||
|
||||
@if ( Cluster::local_node_type() == Cluster::WORKER )
|
||||
|
||||
global runnumber: count &redef; # differentiate runs
|
||||
|
||||
event remote_connection_handshake_done(p: event_peer)
|
||||
{
|
||||
local c = hll_cardinality_init(0.01);
|
||||
|
||||
local add1 = 2001;
|
||||
local add2 = 2002;
|
||||
local add3 = 2003;
|
||||
|
||||
if ( runnumber == 1 )
|
||||
{
|
||||
hll_cardinality_add(c, add1);
|
||||
hll_cardinality_add(c, add2);
|
||||
hll_cardinality_add(c, add3);
|
||||
hll_cardinality_add(c, 1000);
|
||||
hll_cardinality_add(c, 1001);
|
||||
hll_cardinality_add(c, 101);
|
||||
hll_cardinality_add(c, 1003);
|
||||
hll_cardinality_add(c, 1004);
|
||||
hll_cardinality_add(c, 1005);
|
||||
hll_cardinality_add(c, 1006);
|
||||
hll_cardinality_add(c, 1007);
|
||||
hll_cardinality_add(c, 1008);
|
||||
hll_cardinality_add(c, 1009);
|
||||
print "This value should be around 13:";
|
||||
print hll_cardinality_estimate(c);
|
||||
}
|
||||
else if ( runnumber == 2 )
|
||||
{
|
||||
hll_cardinality_add(c, add1);
|
||||
hll_cardinality_add(c, add2);
|
||||
hll_cardinality_add(c, add3);
|
||||
hll_cardinality_add(c, 1);
|
||||
hll_cardinality_add(c, 101);
|
||||
hll_cardinality_add(c, 2);
|
||||
hll_cardinality_add(c, 3);
|
||||
hll_cardinality_add(c, 4);
|
||||
hll_cardinality_add(c, 5);
|
||||
hll_cardinality_add(c, 6);
|
||||
hll_cardinality_add(c, 7);
|
||||
hll_cardinality_add(c, 8);
|
||||
print "This value should be about 12:";
|
||||
print hll_cardinality_estimate(c);
|
||||
}
|
||||
|
||||
event hll_data(c);
|
||||
|
||||
terminate();
|
||||
}
|
||||
|
||||
@endif
|
||||
|
||||
@if ( Cluster::local_node_type() == Cluster::MANAGER )
|
||||
|
||||
global result_count = 0;
|
||||
global hll: opaque of cardinality;
|
||||
|
||||
event bro_init()
|
||||
{
|
||||
hll = hll_cardinality_init(0.01);
|
||||
}
|
||||
|
||||
event hll_data(data: opaque of cardinality)
|
||||
{
|
||||
hll_cardinality_merge_into(hll, data);
|
||||
++result_count;
|
||||
|
||||
if ( result_count == 2 )
|
||||
{
|
||||
print "This value should be about 21:";
|
||||
print hll_cardinality_estimate(hll);
|
||||
terminate();
|
||||
}
|
||||
}
|
||||
|
||||
@endif
|
40
testing/btest/istate/hll.bro
Normal file
40
testing/btest/istate/hll.bro
Normal file
|
@ -0,0 +1,40 @@
|
|||
# @TEST-EXEC: bro -b %INPUT runnumber=1 >out
|
||||
# @TEST-EXEC: bro -b %INPUT runnumber=2 >>out
|
||||
# @TEST-EXEC: bro -b %INPUT runnumber=3 >>out
|
||||
# @TEST-EXEC: btest-diff out
|
||||
|
||||
global runnumber: count &redef; # differentiate first and second run
|
||||
|
||||
global card: opaque of cardinality &persistent;
|
||||
|
||||
event bro_init()
|
||||
{
|
||||
print runnumber;
|
||||
|
||||
if ( runnumber == 1 )
|
||||
{
|
||||
card = hll_cardinality_init(0.01);
|
||||
|
||||
hll_cardinality_add(card, "a");
|
||||
hll_cardinality_add(card, "b");
|
||||
hll_cardinality_add(card, "c");
|
||||
hll_cardinality_add(card, "d");
|
||||
hll_cardinality_add(card, "e");
|
||||
hll_cardinality_add(card, "f");
|
||||
hll_cardinality_add(card, "g");
|
||||
hll_cardinality_add(card, "h");
|
||||
hll_cardinality_add(card, "i");
|
||||
hll_cardinality_add(card, "j");
|
||||
}
|
||||
|
||||
print hll_cardinality_estimate(card);
|
||||
|
||||
if ( runnumber == 2 )
|
||||
{
|
||||
hll_cardinality_add(card, "a");
|
||||
hll_cardinality_add(card, "b");
|
||||
hll_cardinality_add(card, "c");
|
||||
hll_cardinality_add(card, "aa");
|
||||
}
|
||||
}
|
||||
|
|
@ -22,14 +22,14 @@ global n = 0;
|
|||
|
||||
event bro_init() &priority=5
|
||||
{
|
||||
local r1: SumStats::Reducer = [$stream="test", $apply=set(SumStats::SUM, SumStats::MIN, SumStats::MAX, SumStats::AVERAGE, SumStats::STD_DEV, SumStats::VARIANCE, SumStats::UNIQUE)];
|
||||
local r1: SumStats::Reducer = [$stream="test", $apply=set(SumStats::SUM, SumStats::MIN, SumStats::MAX, SumStats::AVERAGE, SumStats::STD_DEV, SumStats::VARIANCE, SumStats::UNIQUE, SumStats::HLL_UNIQUE)];
|
||||
SumStats::create([$name="test",
|
||||
$epoch=5secs,
|
||||
$reducers=set(r1),
|
||||
$epoch_result(ts: time, key: SumStats::Key, result: SumStats::Result) =
|
||||
{
|
||||
local r = result["test"];
|
||||
print fmt("Host: %s - num:%d - sum:%.1f - avg:%.1f - max:%.1f - min:%.1f - var:%.1f - std_dev:%.1f - unique:%d", key$host, r$num, r$sum, r$average, r$max, r$min, r$variance, r$std_dev, r$unique);
|
||||
print fmt("Host: %s - num:%d - sum:%.1f - avg:%.1f - max:%.1f - min:%.1f - var:%.1f - std_dev:%.1f - unique:%d - hllunique:%d", key$host, r$num, r$sum, r$average, r$max, r$min, r$variance, r$std_dev, r$unique, r$hllunique);
|
||||
},
|
||||
$epoch_finished(ts: time) =
|
||||
{
|
||||
|
|
|
@ -10,14 +10,15 @@ event bro_init() &priority=5
|
|||
SumStats::MAX,
|
||||
SumStats::MIN,
|
||||
SumStats::STD_DEV,
|
||||
SumStats::UNIQUE)];
|
||||
SumStats::UNIQUE,
|
||||
SumStats::HLL_UNIQUE)];
|
||||
SumStats::create([$name="test",
|
||||
$epoch=3secs,
|
||||
$reducers=set(r1),
|
||||
$epoch_result(ts: time, key: SumStats::Key, result: SumStats::Result) =
|
||||
{
|
||||
local r = result["test.metric"];
|
||||
print fmt("Host: %s - num:%d - sum:%.1f - var:%.1f - avg:%.1f - max:%.1f - min:%.1f - std_dev:%.1f - unique:%d", key$host, r$num, r$sum, r$variance, r$average, r$max, r$min, r$std_dev, r$unique);
|
||||
print fmt("Host: %s - num:%d - sum:%.1f - var:%.1f - avg:%.1f - max:%.1f - min:%.1f - std_dev:%.1f - unique:%d - hllunique:%d", key$host, r$num, r$sum, r$variance, r$average, r$max, r$min, r$std_dev, r$unique, r$hllunique);
|
||||
}
|
||||
]);
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue