mirror of
https://github.com/zeek/zeek.git
synced 2025-10-14 12:38:20 +00:00
telemetry: Invoke Telemetry::sync() only at scrape/collection time
This stops invoking Telemetry::sync() via a scheduled event and instead only invokes it on-demand. This makes metric collection network time independent and lazier, too. With Prometheus scrape requests being processed on Zeek's main thread now, we can safely invoke the script layer Telemetry::sync() hook. Closes #3947
This commit is contained in:
parent
e118887771
commit
70872673a1
16 changed files with 260 additions and 17 deletions
|
@ -0,0 +1,48 @@
|
|||
# @TEST-DOC: Calling collect_metrics() invokes Telemetry::sync.
|
||||
# Note compilable to C++ due to globals being initialized to a record that
|
||||
# has an opaque type as a field.
|
||||
# @TEST-REQUIRES: test "${ZEEK_USE_CPP}" != "1"
|
||||
#
|
||||
# @TEST-EXEC: zeek -b %INPUT >out
|
||||
# @TEST-EXEC: btest-diff out
|
||||
|
||||
|
||||
@load base/frameworks/telemetry
|
||||
|
||||
global connections_by_proto_cf = Telemetry::register_counter_family([
|
||||
$prefix="btest",
|
||||
$name="connections",
|
||||
$unit="",
|
||||
$help_text="Total number of monitored connections",
|
||||
$label_names=vector("proto")
|
||||
]);
|
||||
|
||||
function print_metrics(ms: vector of Telemetry::Metric) {
|
||||
for (_, m in ms) {
|
||||
print m$opts$name, m$label_values, m$value;
|
||||
}
|
||||
}
|
||||
|
||||
event zeek_init()
|
||||
{
|
||||
print "node up";
|
||||
local ms = Telemetry::collect_metrics("btest");
|
||||
print_metrics(ms);
|
||||
ms = Telemetry::collect_metrics("btest");
|
||||
print_metrics(ms);
|
||||
ms = Telemetry::collect_metrics("btest");
|
||||
print_metrics(ms);
|
||||
local hm = Telemetry::collect_histogram_metrics("btest");
|
||||
print_metrics(ms);
|
||||
}
|
||||
|
||||
|
||||
global sync_calls = 0;
|
||||
|
||||
hook Telemetry::sync()
|
||||
{
|
||||
++sync_calls;
|
||||
local proto = sync_calls == 1 ? "tcp" : "udp";
|
||||
print "sync", sync_calls, proto;
|
||||
Telemetry::counter_family_inc(connections_by_proto_cf, vector(proto));
|
||||
}
|
|
@ -0,0 +1,43 @@
|
|||
# @TEST-DOC: Breaking and recursive Telemetry::sync() warning
|
||||
# Note compilable to C++ due to globals being initialized to a record that
|
||||
# has an opaque type as a field.
|
||||
# @TEST-REQUIRES: test "${ZEEK_USE_CPP}" != "1"
|
||||
#
|
||||
# @TEST-EXEC: zeek -b %INPUT >out
|
||||
# @TEST-EXEC: btest-diff out
|
||||
# @TEST-EXEC: btest-diff .stderr
|
||||
|
||||
|
||||
@load base/frameworks/telemetry
|
||||
|
||||
global connections_by_proto_cf = Telemetry::register_counter_family([
|
||||
$prefix="btest",
|
||||
$name="connections",
|
||||
$unit="",
|
||||
$help_text="Total number of monitored connections",
|
||||
$label_names=vector("proto")
|
||||
]);
|
||||
|
||||
event zeek_init()
|
||||
{
|
||||
print "node up";
|
||||
Telemetry::counter_family_inc(connections_by_proto_cf, vector("tcp"));
|
||||
local ms = Telemetry::collect_metrics("btest");
|
||||
}
|
||||
|
||||
|
||||
hook Telemetry::sync()
|
||||
{
|
||||
# Calling collect_metrics() in Telemetry::sync() is not good as
|
||||
# it would invoke Telemetry::sync() recursively. The manager will
|
||||
# emit a warning and not run the second Telemetry::sync() invocation.
|
||||
local ms = Telemetry::collect_metrics("btest");
|
||||
}
|
||||
|
||||
hook Telemetry::sync() &priority=-100
|
||||
{
|
||||
# break is not good as it prevents other Telemetry::sync() hooks
|
||||
# from running. This will produce a warning.
|
||||
# We could find this via script validation if we wanted to.
|
||||
break;
|
||||
}
|
68
testing/btest/scripts/base/frameworks/telemetry/sync.zeek
Normal file
68
testing/btest/scripts/base/frameworks/telemetry/sync.zeek
Normal file
|
@ -0,0 +1,68 @@
|
|||
# @TEST-DOC: Verify Telemetry::sync() is invoked for metric scraping via the Prometheus HTTP endpoint.
|
||||
# Note compilable to C++ due to globals being initialized to a record that
|
||||
# has an opaque type as a field.
|
||||
# @TEST-REQUIRES: test "${ZEEK_USE_CPP}" != "1"
|
||||
# @TEST-REQUIRES: which jq
|
||||
# @TEST-REQUIRES: which curl
|
||||
#
|
||||
# @TEST-PORT: METRICS_PORT
|
||||
#
|
||||
# @TEST-EXEC: chmod +x fetch-metrics.sh
|
||||
# @TEST-EXEC: zeek --parse-only %INPUT
|
||||
# @TEST-EXEC: btest-bg-run zeek ZEEKPATH=$ZEEKPATH:.. zeek -b %INPUT
|
||||
# @TEST-EXEC: $SCRIPTS/wait-for-file zeek/up 5 || (btest-bg-wait -k 1 && false)
|
||||
# @TEST-EXEC: ./fetch-metrics.sh 1.trace metrics1.txt
|
||||
# @TEST-EXEC: ./fetch-metrics.sh 2.trace metrics2.txt
|
||||
# @TEST-EXEC: ./fetch-metrics.sh 3.trace metrics3.txt
|
||||
# @TEST-EXEC: btest-bg-wait 10
|
||||
#
|
||||
# @TEST-EXEC: btest-diff zeek/.stdout
|
||||
# @TEST-EXEC: btest-diff metrics1.txt
|
||||
# @TEST-EXEC: btest-diff metrics2.txt
|
||||
# @TEST-EXEC: btest-diff metrics3.txt
|
||||
|
||||
@TEST-START-FILE fetch-metrics.sh
|
||||
#! /usr/bin/env bash
|
||||
set -ux
|
||||
trace_file=$1
|
||||
output_file=$2
|
||||
|
||||
PORT=$(echo ${METRICS_PORT} | cut -d '/' -f 1)
|
||||
URL=http://localhost:${PORT}/metrics
|
||||
|
||||
curl -m 5 --trace $trace_file $URL | grep ^btest > $output_file
|
||||
|
||||
exit 0
|
||||
@TEST-END-FILE
|
||||
|
||||
@load base/frameworks/telemetry
|
||||
|
||||
redef exit_only_after_terminate = T;
|
||||
redef Telemetry::metrics_port = to_port(getenv("METRICS_PORT"));
|
||||
|
||||
event zeek_init()
|
||||
{
|
||||
print "node up";
|
||||
system("touch up");
|
||||
}
|
||||
|
||||
global connections_by_proto_cf = Telemetry::register_counter_family([
|
||||
$prefix="btest",
|
||||
$name="connections",
|
||||
$unit="",
|
||||
$help_text="Total number of monitored connections",
|
||||
$label_names=vector("proto")
|
||||
]);
|
||||
|
||||
global sync_calls = 0;
|
||||
|
||||
hook Telemetry::sync()
|
||||
{
|
||||
++sync_calls;
|
||||
local proto = sync_calls == 1 ? "tcp" : "udp";
|
||||
print "sync", sync_calls, proto;
|
||||
Telemetry::counter_family_inc(connections_by_proto_cf, vector(proto));
|
||||
|
||||
if ( sync_calls == 3 )
|
||||
terminate();
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue