Merge remote-tracking branch 'origin/topic/awelzel/telemetry-script-land-v0'

* origin/topic/awelzel/telemetry-script-land-v0:
  Introduce telemetry framework
  Bump broker submodule to master.
This commit is contained in:
Robin Sommer 2022-08-05 16:50:18 +02:00
commit 686e740bbe
No known key found for this signature in database
GPG key ID: 6BEDA4DA6B8B23E3
36 changed files with 1972 additions and 38 deletions

View file

@ -336,6 +336,9 @@ scripts/base/init-default.zeek
scripts/base/frameworks/netcontrol/drop.zeek
scripts/base/frameworks/netcontrol/shunt.zeek
scripts/base/frameworks/netcontrol/non-cluster.zeek
scripts/base/frameworks/telemetry/__load__.zeek
scripts/base/frameworks/telemetry/main.zeek
scripts/base/misc/version.zeek
scripts/base/protocols/conn/__load__.zeek
scripts/base/protocols/conn/main.zeek
scripts/base/protocols/conn/contents.zeek
@ -445,7 +448,6 @@ scripts/base/init-default.zeek
scripts/base/misc/find-checksum-offloading.zeek
scripts/base/misc/find-filtered-trace.zeek
build/scripts/base/misc/installation.zeek
scripts/base/misc/version.zeek
build/scripts/builtin-plugins/__preload__.zeek
build/scripts/builtin-plugins/Zeek_Spicy/__preload__.zeek
build/scripts/builtin-plugins/__load__.zeek

View file

@ -57,6 +57,8 @@ ssh
ssl
stats
syslog
telemetry
telemetry_histogram
traceroute
tunnel
unified2

File diff suppressed because one or more lines are too long

View file

@ -0,0 +1,25 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
### zeek_session_metrics |2|
Telemetry::INT_GAUGE, zeek, active-sessions, [protocol], [tcp], 500.0
count_value, 500
Telemetry::INT_COUNTER, zeek, total-sessions, [protocol], [tcp], 500.0
count_value, 500
### bt* metrics |5|
Telemetry::DOUBLE_COUNTER, btest, a_test, [x, y], [a, b], 1.0
Telemetry::DOUBLE_COUNTER, btest, a_test, [x, y], [a, c], 2.0
Telemetry::DOUBLE_COUNTER, btest, b_test, [x, y], [a, b], 10.0
Telemetry::DOUBLE_COUNTER, btest, b_test, [x, y], [a, c], 20.0
Telemetry::DOUBLE_COUNTER, btest, c_test, [x, y], [a, b], 200.0
### btest_a_metrics |2|
Telemetry::DOUBLE_COUNTER, btest, a_test, [x, y], [a, b], 1.0
Telemetry::DOUBLE_COUNTER, btest, a_test, [x, y], [a, c], 2.0
### btest_b_metrics |2|
Telemetry::DOUBLE_COUNTER, btest, b_test, [x, y], [a, b], 10.0
Telemetry::DOUBLE_COUNTER, btest, b_test, [x, y], [a, c], 20.0
### system_metrics |3|
Telemetry::DOUBLE_GAUGE, system, sensor_temperature, [name], [cpu0], 43.0
Telemetry::DOUBLE_GAUGE, system, sensor_temperature, [name], [cpu1], 44.1
Telemetry::DOUBLE_GAUGE, system, sensor_temperature, [name], [cpu3], 42.2
### btest_histogram_metrics |2|
Telemetry::DOUBLE_HISTOGRAM, btest, sample_histogram, [1.0, 2.0, 3.0, 4.0, 5.0, inf], [dim], [a], [2.0, 2.0, 0.0, 0.0, 0.0, 1.0], 11.5, 5.0
Telemetry::DOUBLE_HISTOGRAM, btest, sample_histogram, [1.0, 2.0, 3.0, 4.0, 5.0, inf], [dim], [b], [1.0, 0.0, 0.0, 0.0, 0.0, 1.0], 7.5, 2.0

View file

@ -0,0 +1,13 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
Telemetry::DOUBLE_HISTOGRAM, zeek, connection_duration
[]
[]
[2.0, 3.0, 4.0, 5.0, 6.0, 10.0, inf]
[0.0, 322.0, 90.0, 5.0, 76.0, 7.0, 0.0]
500.0, 1650.264644
Telemetry::DOUBLE_HISTOGRAM, zeek, realistic_connection_duration
[proto]
[tcp]
[0.1, 1.0, 10.0, 30.0, 60.0, 120.0, 300.0, 900.0, 1800.0, inf]
[0.0, 0.0, 500.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
500.0, 1650.264644

View file

@ -0,0 +1,26 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
### broker |5|
Telemetry::INT_COUNTER, broker, processed-elements, [type], [data], 0.0
count_value, 0
Telemetry::INT_COUNTER, broker, processed-elements, [type], [command], 0.0
count_value, 0
Telemetry::INT_COUNTER, broker, processed-elements, [type], [routing-update], 0.0
count_value, 0
Telemetry::INT_COUNTER, broker, processed-elements, [type], [ping], 0.0
count_value, 0
Telemetry::INT_COUNTER, broker, processed-elements, [type], [pong], 0.0
count_value, 0
### caf |5|
Telemetry::INT_COUNTER, caf.system, rejected-messages, [], [], 0.0
count_value, 0
Telemetry::INT_COUNTER, caf.system, processed-messages, [], [], 7.0
count_value, 7
Telemetry::INT_GAUGE, caf.system, running-actors, [], [], 2.0
count_value, 2
Telemetry::INT_GAUGE, caf.system, queued-messages, [], [], 0.0
count_value, 0
Telemetry::INT_GAUGE, caf.actor, mailbox-size, [name], [broker.core], 0.0
count_value, 0
### caf |2|
Telemetry::DOUBLE_HISTOGRAM, caf.actor, processing-time, [0.00001, 0.0001, 0.0005, 0.001, 0.01, 0.1, 0.5, 1.0, 5.0, inf], [name], [broker.core]
Telemetry::DOUBLE_HISTOGRAM, caf.actor, mailbox-time, [0.00001, 0.0001, 0.0005, 0.001, 0.01, 0.1, 0.5, 1.0, 5.0, inf], [name], [broker.core]

View file

@ -0,0 +1,11 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
#separator \x09
#set_separator ,
#empty_field (empty)
#unset_field -
#path telemetry
#open XXXX-XX-XX-XX-XX-XX
#fields ts peer metric_type prefix name unit labels label_values value
#types time string string string string string vector[string] vector[string] double
XXXXXXXXXX.XXXXXX zeek counter btest connections - proto tcp 500.0
#close XXXX-XX-XX-XX-XX-XX

View file

@ -0,0 +1,12 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
#separator \x09
#set_separator ,
#empty_field (empty)
#unset_field -
#path telemetry_histogram
#open XXXX-XX-XX-XX-XX-XX
#fields ts peer prefix name unit labels label_values bounds values sum observations
#types time string string string string vector[string] vector[string] vector[double] vector[double] double double
XXXXXXXXXX.XXXXXX zeek btest connection_duration seconds (empty) (empty) 2.0,3.0,4.0,5.0,6.0,10.0,inf 0.0,0.0,0.0,0.0,0.0,0.0,0.0 0.0 0.0
XXXXXXXXXX.XXXXXX zeek btest connection_duration seconds (empty) (empty) 2.0,3.0,4.0,5.0,6.0,10.0,inf 0.0,322.0,90.0,5.0,76.0,7.0,0.0 1650.264644 500.0
#close XXXX-XX-XX-XX-XX-XX

View file

@ -0,0 +1,3 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
XXXXXXXXXX.XXXXXX - _zeek connection_duration (empty) (empty) 2.0,3.0,4.0,5.0,6.0,10.0,inf 0.0,0.0,0.0,0.0,0.0,0.0,0.0 0.0 0.0
XXXXXXXXXX.XXXXXX - _zeek connection_duration (empty) (empty) 2.0,3.0,4.0,5.0,6.0,10.0,inf 0.0,322.0,90.0,5.0,76.0,7.0,0.0 1650.264644 500.0

View file

@ -0,0 +1,5 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
XXXXXXXXXX.XXXXXX zeek gauge zeek active-sessions - protocol tcp 1.0
XXXXXXXXXX.XXXXXX zeek counter zeek total-sessions - protocol tcp 1.0
XXXXXXXXXX.XXXXXX zeek gauge zeek active-sessions - protocol tcp 500.0
XXXXXXXXXX.XXXXXX zeek counter zeek total-sessions - protocol tcp 500.0

View file

@ -0,0 +1,3 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
XXXXXXXXXX.XXXXXX zeek zeek connection_duration seconds (empty) (empty) 2.0,3.0,4.0,5.0,6.0,10.0,inf 0.0,0.0,0.0,0.0,0.0,0.0,0.0 0.0 0.0
XXXXXXXXXX.XXXXXX zeek zeek connection_duration seconds (empty) (empty) 2.0,3.0,4.0,5.0,6.0,10.0,inf 0.0,322.0,90.0,5.0,76.0,7.0,0.0 1650.264644 500.0

View file

@ -1,4 +1,3 @@
#include "Plugin.h"
#include <Conn.h>
@ -8,6 +7,7 @@
#include <RunState.h>
#include <threading/Formatter.h>
#include <cstring>
#include <set>
namespace btest::plugin::Demo_Hooks
{
@ -16,6 +16,22 @@ Plugin plugin;
using namespace btest::plugin::Demo_Hooks;
// Sanitize arguments for the following functions with (...). These
// receiving the current version string or parts of it and make the
// baseline non-deterministic.
static std::set<std::string> sanitized_functions = {
"Version::parse",
"gsub",
"split_string1",
"lstrip",
"to_count",
"cat",
"Telemetry::__dbl_gauge_metric_get_or_add",
"Telemetry::gauge_with",
"Telemetry::make_labels",
"Telemetry::gauge_family_set",
};
zeek::plugin::Configuration Plugin::Configure()
{
EnableHook(zeek::plugin::HOOK_LOAD_FILE);
@ -58,11 +74,7 @@ static void describe_hook_args(const zeek::plugin::HookArgumentList& args, zeek:
// For function calls we remove args for unstable arguments
// from parsing the version in `base/misc/version`.
if ( i->GetType() == zeek::plugin::HookArgument::FUNC &&
(::strcmp(d->Description(), "Version::parse") == 0 ||
::strcmp(d->Description(), "gsub") == 0 ||
::strcmp(d->Description(), "split_string1") == 0 ||
::strcmp(d->Description(), "lstrip") == 0 ||
::strcmp(d->Description(), "to_count") == 0) )
sanitized_functions.count(d->Description()) != 0 )
serialize_args = false;
continue;
@ -105,10 +117,8 @@ std::pair<bool, zeek::ValPtr> Plugin::HookFunctionCall(const zeek::Func* func,
// For function calls we remove args for unstable arguments
// from parsing the version in `base/misc/version`.
if ( ::strcmp(d.Description(), "Version::parse") == 0 ||
::strcmp(d.Description(), "gsub") == 0 ||
::strcmp(d.Description(), "split_string1") == 0 ||
::strcmp(d.Description(), "lstrip") == 0 || ::strcmp(d.Description(), "to_count") == 0 )
//
if ( sanitized_functions.count(d.Description()) != 0 )
d.Add("(...)");
else
zeek::plugin::HookArgument(args).Describe(&d);

View file

@ -6,5 +6,4 @@
# @TEST-EXEC: hexdump -C unprocessed.pcap > unprocessed.pcap.hex
# @TEST-EXEC: btest-diff unprocessed.pcap.hex
@unload base/misc/version
@load base/init-default

View file

@ -0,0 +1,116 @@
# @TEST-DOC: Using and listing of counters and gauges using the telemetry module.
# @TEST-EXEC: zcat <$TRACES/echo-connections.pcap.gz | zeek -b -Cr - %INPUT > out
# @TEST-EXEC: btest-diff out
# @TEST-EXEC-FAIL: test -f reporter.log
@load base/frameworks/telemetry
global btest_a_cf = Telemetry::register_counter_family([
$prefix="btest",
$name="a_test",
$unit="1",
$help_text="A btest metric",
$labels=vector("x", "y")
]);
global btest_b_cf = Telemetry::register_counter_family([
$prefix="btest",
$name="b_test",
$unit="1",
$help_text="Another btest metric",
$labels=vector("x", "y")
]);
global btest_c_cf = Telemetry::register_counter_family([
$prefix="btest",
$name="c_test",
$unit="1",
$help_text="The last btest metric",
$labels=vector("x", "y")
]);
global system_sensor_temp_gf = Telemetry::register_gauge_family([
$prefix="system",
$name="sensor_temperature",
$unit="celsius",
$help_text="Temperatures reported by sensors in the system",
$labels=vector("name")
]);
global btest_sample_histogram_hf = Telemetry::register_histogram_family([
$prefix="btest",
$name="sample_histogram",
$unit="1",
$help_text="A sample histogram that is not returned by Telemetry::collect_metrics",
$bounds=vector(1.0, 2.0, 3.0, 4.0, 5.0),
$labels=vector("dim")
]);
function print_metrics(what: string, metrics: vector of Telemetry::Metric)
{
print fmt("### %s |%s|", what, |metrics|);
for (i in metrics)
{
local m = metrics[i];
print m$opts$metric_type, m$opts$prefix, m$opts$name, m$opts$labels, m$labels, m$value;
if (m?$count_value)
print "count_value", m$count_value;
}
}
function print_histogram_metrics(what: string, metrics: vector of Telemetry::HistogramMetric)
{
print fmt("### %s |%s|", what, |metrics|);
for (i in metrics)
{
local m = metrics[i];
print m$opts$metric_type, m$opts$prefix, m$opts$name, m$opts$bounds, m$opts$labels, m$labels, m$values, m$sum, m$observations;
}
}
event zeek_done() &priority=-100
{
Telemetry::counter_family_inc(btest_a_cf, vector("a", "b"));
Telemetry::counter_family_inc(btest_a_cf, vector("a", "c"));
Telemetry::counter_family_inc(btest_a_cf, vector("a", "c"));
Telemetry::counter_family_inc(btest_b_cf, vector("a", "b"), 10.0);
Telemetry::counter_family_inc(btest_b_cf, vector("a", "c"), 20.0);
Telemetry::counter_family_set(btest_c_cf, vector("a", "b"), 100.0);
Telemetry::counter_family_set(btest_c_cf, vector("a", "b"), 200.0);
Telemetry::gauge_family_set(system_sensor_temp_gf, vector("cpu0"), 43.0);
Telemetry::gauge_family_set(system_sensor_temp_gf, vector("cpu1"), 43.1);
Telemetry::gauge_family_inc(system_sensor_temp_gf, vector("cpu1"));
Telemetry::gauge_family_set(system_sensor_temp_gf, vector("cpu3"), 43.2);
Telemetry::gauge_family_dec(system_sensor_temp_gf, vector("cpu3"));
Telemetry::histogram_family_observe(btest_sample_histogram_hf, vector("a"), 0.5);
Telemetry::histogram_family_observe(btest_sample_histogram_hf, vector("a"), 0.9);
Telemetry::histogram_family_observe(btest_sample_histogram_hf, vector("a"), 1.1);
Telemetry::histogram_family_observe(btest_sample_histogram_hf, vector("a"), 2.0);
Telemetry::histogram_family_observe(btest_sample_histogram_hf, vector("a"), 7.0);
Telemetry::histogram_family_observe(btest_sample_histogram_hf, vector("b"), 0.5);
Telemetry::histogram_family_observe(btest_sample_histogram_hf, vector("b"), 7.0);
local zeek_session_metrics = Telemetry::collect_metrics("zeek", "*session*");
print_metrics("zeek_session_metrics", zeek_session_metrics);
local all_btest_metrics = Telemetry::collect_metrics("bt*", "*");
print_metrics("bt* metrics", all_btest_metrics);
local btest_a_metrics = Telemetry::collect_metrics("btest", "a_*");
print_metrics("btest_a_metrics", btest_a_metrics);
local btest_b_metrics = Telemetry::collect_metrics("btest", "b_*");
print_metrics("btest_b_metrics", btest_b_metrics);
local system_metrics = Telemetry::collect_metrics("system");
print_metrics("system_metrics", system_metrics);
local histogram_metrics = Telemetry::collect_histogram_metrics("btest");
print_histogram_metrics("btest_histogram_metrics", histogram_metrics);
}

View file

@ -0,0 +1,48 @@
# @TEST-EXEC: zcat <$TRACES/echo-connections.pcap.gz | zeek -b -Cr - %INPUT > out
# @TEST-EXEC: btest-diff out
# @TEST-EXEC-FAIL: test -f reporter.log
@load base/frameworks/telemetry
global connection_duration_hf = Telemetry::register_histogram_family([
$prefix="zeek",
$name="connection_duration",
$unit="seconds",
$help_text="Monitored connection durations",
$bounds=vector(2.0, 3.0, 4.0, 5.0, 6.0, 10.0)
]);
global realistic_connection_duration_hf = Telemetry::register_histogram_family([
$prefix="zeek",
$name="realistic_connection_duration",
$labels=vector("proto"),
$unit="seconds",
$help_text="Monitored connection durations by protocol",
$bounds=vector(0.1, 1.0, 10.0, 30.0, 60.0, 120.0, 300, 900.0, 1800.0)
]);
global connection_duration_h = Telemetry::histogram_with(connection_duration_hf);
event connection_state_remove(c: connection)
{
Telemetry::histogram_observe(connection_duration_h, interval_to_double(c$duration));
local proto = to_lower(cat(get_port_transport_proto(c$id$resp_p)));
Telemetry::histogram_family_observe(realistic_connection_duration_hf,
vector(proto),
interval_to_double(c$duration));
}
event zeek_done() &priority=-100
{
local histogram_metrics = Telemetry::collect_histogram_metrics("zeek", "*connection_duration");
for (i in histogram_metrics)
{
local hm = histogram_metrics[i];
print hm$opts$metric_type, hm$opts$prefix, hm$opts$name;
print hm$opts$labels;
print hm$labels;
print hm$opts$bounds;
print hm$values;
print hm$observations, hm$sum;
}
}

View file

@ -0,0 +1,43 @@
# @TEST-DOC: Query some internal broker/caf related metrics as they use the int64_t versions, too.
# @TEST-EXEC: zcat <$TRACES/echo-connections.pcap.gz | zeek -b -Cr - %INPUT > out
# @TEST-EXEC: btest-diff out
# @TEST-EXEC-FAIL: test -f reporter.log
@load base/frameworks/telemetry
function print_histogram_metrics(what: string, metrics: vector of Telemetry::HistogramMetric)
{
print fmt("### %s |%s|", what, |metrics|);
for (i in metrics)
{
local m = metrics[i];
print m$opts$metric_type, m$opts$prefix, m$opts$name, m$opts$bounds, m$opts$labels, m$labels;
# Don't output actual values as they are runtime dependent.
# print m$values, m$sum, m$observations;
if ( m$opts?$count_bounds )
print m$opts$count_bounds;
}
}
function print_metrics(what: string, metrics: vector of Telemetry::Metric)
{
print fmt("### %s |%s|", what, |metrics|);
for (i in metrics)
{
local m = metrics[i];
print m$opts$metric_type, m$opts$prefix, m$opts$name, m$opts$labels, m$labels, m$value;
if (m?$count_value)
print "count_value", m$count_value;
}
}
event zeek_done() &priority=-100
{
local broker_metrics = Telemetry::collect_metrics("broker", "*");
print_metrics("broker", broker_metrics);
local caf_metrics = Telemetry::collect_metrics("caf*", "*");
print_metrics("caf", caf_metrics);
local caf_histogram_metrics = Telemetry::collect_histogram_metrics("caf*", "*");
print_histogram_metrics("caf", caf_histogram_metrics);
}

View file

@ -0,0 +1,34 @@
# @TEST-DOC: Tests that setting log_prefixes filters out the zeek metrics normally created.
# @TEST-EXEC: zcat <$TRACES/echo-connections.pcap.gz | zeek -b -Cr - %INPUT > out
# @TEST-EXEC: btest-diff telemetry.log
# @TEST-EXEC: btest-diff telemetry_histogram.log
@load frameworks/telemetry/log
redef Telemetry::log_prefixes = {"btest"};
global connections_by_proto_cf = Telemetry::register_counter_family([
$prefix="btest",
$name="connections",
$unit="1",
$help_text="Total number of monitored connections",
$labels=vector("proto")
]);
global connection_duration_hf = Telemetry::register_histogram_family([
$prefix="btest",
$name="connection_duration",
$unit="seconds",
$help_text="Monitored connection duration",
$bounds=vector(2.0, 3.0, 4.0, 5.0, 6.0, 10.0)
]);
global connection_duration_h = Telemetry::histogram_with(connection_duration_hf);
event connection_state_remove(c: connection)
{
local proto = to_lower(cat(get_port_transport_proto(c$id$orig_p)));
Telemetry::counter_family_inc(connections_by_proto_cf, vector(proto));
Telemetry::histogram_observe(connection_duration_h, interval_to_double(c$duration));
}

View file

@ -0,0 +1,25 @@
# @TEST-DOC: Test loading of telemetry/log and smoke check the telemetry.log file
# @TEST-EXEC: zcat <$TRACES/echo-connections.pcap.gz | zeek -b -Cr - %INPUT > out
# @TEST-EXEC: grep 'zeek.*sessions' telemetry.log > telemetry.log.filtered
# @TEST-EXEC: grep 'zeek.*connection_duration' telemetry_histogram.log > telemetry_histogram.log.filtered
# @TEST-EXEC: btest-diff telemetry.log.filtered
# @TEST-EXEC: btest-diff telemetry_histogram.log.filtered
@load frameworks/telemetry/log
global connection_duration_hf = Telemetry::register_histogram_family([
$prefix="zeek",
$name="connection_duration",
$unit="seconds",
$help_text="Monitored connection duration",
$bounds=vector(2.0, 3.0, 4.0, 5.0, 6.0, 10.0)
]);
global connection_duration_h = Telemetry::histogram_with(connection_duration_hf);
event connection_state_remove(c: connection)
{
Telemetry::histogram_observe(connection_duration_h, interval_to_double(c$duration));
}

View file

@ -1 +1 @@
5b2a6d78f789f1271b68123875ef66eaaba6f3e8
c57b93b0d3aa4ee69452b039055122d4bec9058f

View file

@ -1 +1 @@
d8088ba741389aa092b5fb284d0849401234809f
5b83cfbcf5eb52b28773dded8e1d02d350737ec5

View file

@ -12,3 +12,13 @@
# (json-logs.zeek activates this).
redef LogAscii::use_json = F;
@endif
# Exclude process metrics, they are non-deterministic.
redef Telemetry::log_prefixes -= { "process" };
# Prevent the version_info metric from being logged as it's not deterministic.
hook Telemetry::log_policy(rec: Telemetry::Info, id: Log::ID, filter: Log::Filter)
{
if ( rec$prefix == "zeek" && rec$name == "version_info" )
break;
}