mirror of
https://github.com/zeek/zeek.git
synced 2025-10-08 17:48:21 +00:00
Introduce telemetry framework
Adds base/frameworks/telemetry with wrappers around telemetry.bif and updates telemetry/Manager to support collecting metrics from script land. Add policy/frameworks/telemetry/log for logging of metrics data into a new telemetry.log and telemetry_histogram.log and add into local.zeek by default.
This commit is contained in:
parent
95fba8fd29
commit
3fe930dbf2
32 changed files with 1950 additions and 27 deletions
116
testing/btest/scripts/base/frameworks/telemetry/basic.zeek
Normal file
116
testing/btest/scripts/base/frameworks/telemetry/basic.zeek
Normal file
|
@ -0,0 +1,116 @@
|
|||
# @TEST-DOC: Using and listing of counters and gauges using the telemetry module.
|
||||
# @TEST-EXEC: zcat <$TRACES/echo-connections.pcap.gz | zeek -b -Cr - %INPUT > out
|
||||
# @TEST-EXEC: btest-diff out
|
||||
# @TEST-EXEC-FAIL: test -f reporter.log
|
||||
|
||||
@load base/frameworks/telemetry
|
||||
|
||||
global btest_a_cf = Telemetry::register_counter_family([
|
||||
$prefix="btest",
|
||||
$name="a_test",
|
||||
$unit="1",
|
||||
$help_text="A btest metric",
|
||||
$labels=vector("x", "y")
|
||||
]);
|
||||
|
||||
global btest_b_cf = Telemetry::register_counter_family([
|
||||
$prefix="btest",
|
||||
$name="b_test",
|
||||
$unit="1",
|
||||
$help_text="Another btest metric",
|
||||
$labels=vector("x", "y")
|
||||
]);
|
||||
|
||||
global btest_c_cf = Telemetry::register_counter_family([
|
||||
$prefix="btest",
|
||||
$name="c_test",
|
||||
$unit="1",
|
||||
$help_text="The last btest metric",
|
||||
$labels=vector("x", "y")
|
||||
]);
|
||||
|
||||
global system_sensor_temp_gf = Telemetry::register_gauge_family([
|
||||
$prefix="system",
|
||||
$name="sensor_temperature",
|
||||
$unit="celsius",
|
||||
$help_text="Temperatures reported by sensors in the system",
|
||||
$labels=vector("name")
|
||||
]);
|
||||
|
||||
global btest_sample_histogram_hf = Telemetry::register_histogram_family([
|
||||
$prefix="btest",
|
||||
$name="sample_histogram",
|
||||
$unit="1",
|
||||
$help_text="A sample histogram that is not returned by Telemetry::collect_metrics",
|
||||
$bounds=vector(1.0, 2.0, 3.0, 4.0, 5.0),
|
||||
$labels=vector("dim")
|
||||
]);
|
||||
|
||||
function print_metrics(what: string, metrics: vector of Telemetry::Metric)
|
||||
{
|
||||
print fmt("### %s |%s|", what, |metrics|);
|
||||
for (i in metrics)
|
||||
{
|
||||
local m = metrics[i];
|
||||
print m$opts$metric_type, m$opts$prefix, m$opts$name, m$opts$labels, m$labels, m$value;
|
||||
|
||||
if (m?$count_value)
|
||||
print "count_value", m$count_value;
|
||||
}
|
||||
}
|
||||
|
||||
function print_histogram_metrics(what: string, metrics: vector of Telemetry::HistogramMetric)
|
||||
{
|
||||
print fmt("### %s |%s|", what, |metrics|);
|
||||
for (i in metrics)
|
||||
{
|
||||
local m = metrics[i];
|
||||
print m$opts$metric_type, m$opts$prefix, m$opts$name, m$opts$bounds, m$opts$labels, m$labels, m$values, m$sum, m$observations;
|
||||
}
|
||||
}
|
||||
|
||||
event zeek_done() &priority=-100
|
||||
{
|
||||
Telemetry::counter_family_inc(btest_a_cf, vector("a", "b"));
|
||||
Telemetry::counter_family_inc(btest_a_cf, vector("a", "c"));
|
||||
Telemetry::counter_family_inc(btest_a_cf, vector("a", "c"));
|
||||
|
||||
Telemetry::counter_family_inc(btest_b_cf, vector("a", "b"), 10.0);
|
||||
Telemetry::counter_family_inc(btest_b_cf, vector("a", "c"), 20.0);
|
||||
|
||||
Telemetry::counter_family_set(btest_c_cf, vector("a", "b"), 100.0);
|
||||
Telemetry::counter_family_set(btest_c_cf, vector("a", "b"), 200.0);
|
||||
|
||||
Telemetry::gauge_family_set(system_sensor_temp_gf, vector("cpu0"), 43.0);
|
||||
Telemetry::gauge_family_set(system_sensor_temp_gf, vector("cpu1"), 43.1);
|
||||
Telemetry::gauge_family_inc(system_sensor_temp_gf, vector("cpu1"));
|
||||
Telemetry::gauge_family_set(system_sensor_temp_gf, vector("cpu3"), 43.2);
|
||||
Telemetry::gauge_family_dec(system_sensor_temp_gf, vector("cpu3"));
|
||||
|
||||
Telemetry::histogram_family_observe(btest_sample_histogram_hf, vector("a"), 0.5);
|
||||
Telemetry::histogram_family_observe(btest_sample_histogram_hf, vector("a"), 0.9);
|
||||
Telemetry::histogram_family_observe(btest_sample_histogram_hf, vector("a"), 1.1);
|
||||
Telemetry::histogram_family_observe(btest_sample_histogram_hf, vector("a"), 2.0);
|
||||
Telemetry::histogram_family_observe(btest_sample_histogram_hf, vector("a"), 7.0);
|
||||
|
||||
Telemetry::histogram_family_observe(btest_sample_histogram_hf, vector("b"), 0.5);
|
||||
Telemetry::histogram_family_observe(btest_sample_histogram_hf, vector("b"), 7.0);
|
||||
|
||||
local zeek_session_metrics = Telemetry::collect_metrics("zeek", "*session*");
|
||||
print_metrics("zeek_session_metrics", zeek_session_metrics);
|
||||
|
||||
local all_btest_metrics = Telemetry::collect_metrics("bt*", "*");
|
||||
print_metrics("bt* metrics", all_btest_metrics);
|
||||
|
||||
local btest_a_metrics = Telemetry::collect_metrics("btest", "a_*");
|
||||
print_metrics("btest_a_metrics", btest_a_metrics);
|
||||
|
||||
local btest_b_metrics = Telemetry::collect_metrics("btest", "b_*");
|
||||
print_metrics("btest_b_metrics", btest_b_metrics);
|
||||
|
||||
local system_metrics = Telemetry::collect_metrics("system");
|
||||
print_metrics("system_metrics", system_metrics);
|
||||
|
||||
local histogram_metrics = Telemetry::collect_histogram_metrics("btest");
|
||||
print_histogram_metrics("btest_histogram_metrics", histogram_metrics);
|
||||
}
|
|
@ -0,0 +1,48 @@
|
|||
# @TEST-EXEC: zcat <$TRACES/echo-connections.pcap.gz | zeek -b -Cr - %INPUT > out
|
||||
# @TEST-EXEC: btest-diff out
|
||||
# @TEST-EXEC-FAIL: test -f reporter.log
|
||||
|
||||
@load base/frameworks/telemetry
|
||||
|
||||
global connection_duration_hf = Telemetry::register_histogram_family([
|
||||
$prefix="zeek",
|
||||
$name="connection_duration",
|
||||
$unit="seconds",
|
||||
$help_text="Monitored connection durations",
|
||||
$bounds=vector(2.0, 3.0, 4.0, 5.0, 6.0, 10.0)
|
||||
]);
|
||||
|
||||
global realistic_connection_duration_hf = Telemetry::register_histogram_family([
|
||||
$prefix="zeek",
|
||||
$name="realistic_connection_duration",
|
||||
$labels=vector("proto"),
|
||||
$unit="seconds",
|
||||
$help_text="Monitored connection durations by protocol",
|
||||
$bounds=vector(0.1, 1.0, 10.0, 30.0, 60.0, 120.0, 300, 900.0, 1800.0)
|
||||
]);
|
||||
|
||||
global connection_duration_h = Telemetry::histogram_with(connection_duration_hf);
|
||||
|
||||
event connection_state_remove(c: connection)
|
||||
{
|
||||
Telemetry::histogram_observe(connection_duration_h, interval_to_double(c$duration));
|
||||
local proto = to_lower(cat(get_port_transport_proto(c$id$resp_p)));
|
||||
Telemetry::histogram_family_observe(realistic_connection_duration_hf,
|
||||
vector(proto),
|
||||
interval_to_double(c$duration));
|
||||
}
|
||||
|
||||
event zeek_done() &priority=-100
|
||||
{
|
||||
local histogram_metrics = Telemetry::collect_histogram_metrics("zeek", "*connection_duration");
|
||||
for (i in histogram_metrics)
|
||||
{
|
||||
local hm = histogram_metrics[i];
|
||||
print hm$opts$metric_type, hm$opts$prefix, hm$opts$name;
|
||||
print hm$opts$labels;
|
||||
print hm$labels;
|
||||
print hm$opts$bounds;
|
||||
print hm$values;
|
||||
print hm$observations, hm$sum;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,43 @@
|
|||
# @TEST-DOC: Query some internal broker/caf related metrics as they use the int64_t versions, too.
|
||||
# @TEST-EXEC: zcat <$TRACES/echo-connections.pcap.gz | zeek -b -Cr - %INPUT > out
|
||||
# @TEST-EXEC: btest-diff out
|
||||
# @TEST-EXEC-FAIL: test -f reporter.log
|
||||
|
||||
@load base/frameworks/telemetry
|
||||
|
||||
function print_histogram_metrics(what: string, metrics: vector of Telemetry::HistogramMetric)
|
||||
{
|
||||
print fmt("### %s |%s|", what, |metrics|);
|
||||
for (i in metrics)
|
||||
{
|
||||
local m = metrics[i];
|
||||
print m$opts$metric_type, m$opts$prefix, m$opts$name, m$opts$bounds, m$opts$labels, m$labels;
|
||||
# Don't output actual values as they are runtime dependent.
|
||||
# print m$values, m$sum, m$observations;
|
||||
if ( m$opts?$count_bounds )
|
||||
print m$opts$count_bounds;
|
||||
}
|
||||
}
|
||||
|
||||
function print_metrics(what: string, metrics: vector of Telemetry::Metric)
|
||||
{
|
||||
print fmt("### %s |%s|", what, |metrics|);
|
||||
for (i in metrics)
|
||||
{
|
||||
local m = metrics[i];
|
||||
print m$opts$metric_type, m$opts$prefix, m$opts$name, m$opts$labels, m$labels, m$value;
|
||||
|
||||
if (m?$count_value)
|
||||
print "count_value", m$count_value;
|
||||
}
|
||||
}
|
||||
|
||||
event zeek_done() &priority=-100
|
||||
{
|
||||
local broker_metrics = Telemetry::collect_metrics("broker", "*");
|
||||
print_metrics("broker", broker_metrics);
|
||||
local caf_metrics = Telemetry::collect_metrics("caf*", "*");
|
||||
print_metrics("caf", caf_metrics);
|
||||
local caf_histogram_metrics = Telemetry::collect_histogram_metrics("caf*", "*");
|
||||
print_histogram_metrics("caf", caf_histogram_metrics);
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue