From 70872673a1ce56fe742c38f0d702b167e794ca9e Mon Sep 17 00:00:00 2001 From: Arne Welzel Date: Wed, 2 Oct 2024 16:53:11 +0200 Subject: [PATCH 1/4] telemetry: Invoke Telemetry::sync() only at scrape/collection time This stops invoking Telemetry::sync() via a scheduled event and instead only invokes it on-demand. This makes metric collection network time independent and lazier, too. With Prometheus scrape requests being processed on Zeek's main thread now, we can safely invoke the script layer Telemetry::sync() hook. Closes #3947 --- NEWS | 4 ++ scripts/base/frameworks/telemetry/main.zeek | 30 ++++---- scripts/base/init-bare.zeek | 15 ++++ src/telemetry/Manager.cc | 29 ++++++++ src/telemetry/Manager.h | 7 ++ .../Baseline/bifs.event-handler-stats/out | 2 +- .../out | 13 ++++ .../.stderr | 3 + .../out | 2 + .../metrics1.txt | 2 + .../metrics2.txt | 3 + .../metrics3.txt | 3 + .../zeek..stdout | 5 ++ .../frameworks/telemetry/sync-collect.zeek | 48 +++++++++++++ .../frameworks/telemetry/sync-warnings.zeek | 43 ++++++++++++ .../base/frameworks/telemetry/sync.zeek | 68 +++++++++++++++++++ 16 files changed, 260 insertions(+), 17 deletions(-) create mode 100644 testing/btest/Baseline/scripts.base.frameworks.telemetry.sync-collect/out create mode 100644 testing/btest/Baseline/scripts.base.frameworks.telemetry.sync-warnings/.stderr create mode 100644 testing/btest/Baseline/scripts.base.frameworks.telemetry.sync-warnings/out create mode 100644 testing/btest/Baseline/scripts.base.frameworks.telemetry.sync/metrics1.txt create mode 100644 testing/btest/Baseline/scripts.base.frameworks.telemetry.sync/metrics2.txt create mode 100644 testing/btest/Baseline/scripts.base.frameworks.telemetry.sync/metrics3.txt create mode 100644 testing/btest/Baseline/scripts.base.frameworks.telemetry.sync/zeek..stdout create mode 100644 testing/btest/scripts/base/frameworks/telemetry/sync-collect.zeek create mode 100644 testing/btest/scripts/base/frameworks/telemetry/sync-warnings.zeek create mode 100644 testing/btest/scripts/base/frameworks/telemetry/sync.zeek diff --git a/NEWS b/NEWS index 8b46a774c7..bddb781f4e 100644 --- a/NEWS +++ b/NEWS @@ -93,6 +93,10 @@ Changed Functionality * The ASCII input reader now suppresses warnings for consecutive invalid lines, producing a summary of total suppressions once a valid line is encountered. +* The `Telemetry::sync()` hook is now invoked on demand. Either when the metrics + of a node are scraped via the Prometheus HTTP endpoint, or one of the collect + methods is invoked from Zeek script. + Removed Functionality --------------------- diff --git a/scripts/base/frameworks/telemetry/main.zeek b/scripts/base/frameworks/telemetry/main.zeek index 0e5ce1b1f5..ce67c642d1 100644 --- a/scripts/base/frameworks/telemetry/main.zeek +++ b/scripts/base/frameworks/telemetry/main.zeek @@ -263,22 +263,15 @@ export { label_values: labels_vector, measurement: double): bool; - ## Telemetry sync hook. - ## - ## This hook is invoked every :zeek:see:`Telemetry::sync_interval` - ## for script writers to synchronize or mirror metrics with the - ## telemetry subsystem. For example, when tracking table or value - ## footprints with gauges, the value in question can be set on an actual - ## :zeek:see:`Telemetry::Gauge` instance during execution of this hook. - ## - ## Implementations should be lightweight, this hook may be called - ## multiple times per minute. The interval can increased by changing - ## :zeek:see:`Telemetry::sync_interval` at the cost of delaying - ## metric updates and thereby reducing granularity. - global sync: hook(); - ## Interval at which the :zeek:see:`Telemetry::sync` hook is invoked. - option sync_interval = 10sec; + ## + ## By default, the hook is invoked on demand, setting this option to + ## a positive interval allows to invoke it regularly, too. Regular + ## invocations are relative to Zeek's network time. + ## + ## Note that on-demand hook invocation will happen even if this + ## is set. + option sync_interval = 0sec &deprecated="Remove in 8.1. If you require regular sync invocation, do so explicitly in a scheduled event."; ## Collect all counter and gauge metrics matching the given *name* and *prefix*. ## @@ -493,7 +486,9 @@ function collect_histogram_metrics(prefix: string, name: string): vector of Hist event run_sync_hook() { hook Telemetry::sync(); +@pragma push ignore-deprecations schedule sync_interval { run_sync_hook() }; +@pragma pop ignore-deprecations } # Expose the Zeek version as Prometheus style info metric @@ -508,7 +503,10 @@ global version_gauge_family = Telemetry::register_gauge_family([ event zeek_init() { - schedule sync_interval { run_sync_hook() }; +@pragma push ignore-deprecations + if ( sync_interval > 0sec ) + schedule sync_interval { run_sync_hook() }; +@pragma pop ignore-deprecations local v = Version::info; local labels = vector(cat(v$version_number), diff --git a/scripts/base/init-bare.zeek b/scripts/base/init-bare.zeek index 48643239b8..a0cdd0cd39 100644 --- a/scripts/base/init-bare.zeek +++ b/scripts/base/init-bare.zeek @@ -5933,6 +5933,21 @@ export { sum: double; }; + ## Telemetry sync hook. + ## + ## This hook is invoked when metrics are requested via functions + ## :zeek:see:`Telemetry::collect_metrics` and :zeek:see:`Telemetry::collect_histogram_metrics`, + ## or just before Zeek collects metrics when being scraped through + ## its Prometheus endpoint. + ## Script writers can use it to synchronize (or mirror) metrics with the + ## telemetry subsystem. For example, when tracking table or value + ## footprints with gauges, the value in question can be set on an actual + ## :zeek:see:`Telemetry::Gauge` instance during execution of this hook. + ## + ## Implementations should be lightweight, this hook may be called + ## multiple times per minute. + global sync: hook(); + type MetricVector : vector of Metric; type HistogramMetricVector : vector of HistogramMetric; diff --git a/src/telemetry/Manager.cc b/src/telemetry/Manager.cc index 3272641882..2924301dad 100644 --- a/src/telemetry/Manager.cc +++ b/src/telemetry/Manager.cc @@ -16,6 +16,7 @@ #include #include "zeek/3rdparty/doctest.h" +#include "zeek/Func.h" #include "zeek/ID.h" #include "zeek/RunState.h" #include "zeek/ZeekString.h" @@ -256,6 +257,28 @@ static bool compare_histograms(const std::optional& a, const std::optional return comparer(a, b, metric_record_type); } +void Manager::InvokeTelemetrySyncHook() { + static const auto sync_hook = zeek::id::find_func("Telemetry::sync"); + + if ( sync_hook->Flavor() != FUNC_FLAVOR_HOOK ) + reporter->InternalError("Telemetry::sync not a hook?"); + + if ( in_sync_hook ) { + reporter->Warning("Telemetry::sync() hook invoked recursively"); + return; + } + + in_sync_hook = true; + + zeek::Args empty; + auto result = sync_hook->Invoke(&empty); + + if ( ! result->IsOne() ) + reporter->Warning("Telemetry::sync() implementations skipped due to 'break' usage"); + + in_sync_hook = false; +} + ValPtr Manager::CollectMetrics(std::string_view prefix_pattern, std::string_view name_pattern) { static auto metrics_vector_type = zeek::id::find_type("Telemetry::MetricVector"); static auto string_vec_type = zeek::id::find_type("string_vec"); @@ -268,6 +291,8 @@ ValPtr Manager::CollectMetrics(std::string_view prefix_pattern, std::string_view static auto metric_opts_type = zeek::id::find_type("Telemetry::MetricOpts"); static auto metric_type_idx = metric_opts_type->FieldOffset("metric_type"); + InvokeTelemetrySyncHook(); + VectorValPtr ret_val = make_intrusive(metrics_vector_type); // Due to the name containing the full information about a metric including a potential unit add an @@ -345,6 +370,8 @@ ValPtr Manager::CollectHistogramMetrics(std::string_view prefix_pattern, std::st static auto metric_opts_type = zeek::id::find_type("Telemetry::MetricOpts"); static auto metric_type_idx = metric_opts_type->FieldOffset("metric_type"); + InvokeTelemetrySyncHook(); + VectorValPtr ret_val = make_intrusive(metrics_vector_type); // Due to the name containing the full information about a metric including a potential unit add an @@ -574,6 +601,8 @@ void Manager::ProcessFd(int fd, int flags) { for ( const auto& [name, f] : families ) f->RunCallbacks(); + InvokeTelemetrySyncHook(); + collector_response_idx = collector_request_idx; lk.unlock(); diff --git a/src/telemetry/Manager.h b/src/telemetry/Manager.h index 26e2403f58..9523385b77 100644 --- a/src/telemetry/Manager.h +++ b/src/telemetry/Manager.h @@ -259,6 +259,13 @@ private: RecordValPtr GetMetricOptsRecord(const prometheus::MetricFamily& metric_family); void BuildClusterJson(); + /** + * Runs the Telemetry::sync() hook in Zeek script land. + */ + void InvokeTelemetrySyncHook(); + + bool in_sync_hook = false; + std::map> families; std::map opts_records; diff --git a/testing/btest/Baseline/bifs.event-handler-stats/out b/testing/btest/Baseline/bifs.event-handler-stats/out index ae43a66216..d438e254c3 100644 --- a/testing/btest/Baseline/bifs.event-handler-stats/out +++ b/testing/btest/Baseline/bifs.event-handler-stats/out @@ -1,2 +1,2 @@ ### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. -[[name=Broker::log_flush, times_called=2], [name=ChecksumOffloading::check, times_called=2], [name=NetControl::init, times_called=1], [name=analyzer_confirmation_info, times_called=1], [name=connection_established, times_called=1], [name=connection_state_remove, times_called=1], [name=file_new, times_called=1], [name=file_over_new_connection, times_called=1], [name=file_sniff, times_called=1], [name=file_state_remove, times_called=1], [name=filter_change_tracking, times_called=3], [name=get_file_handle, times_called=4], [name=http_begin_entity, times_called=2], [name=http_end_entity, times_called=2], [name=http_header, times_called=13], [name=http_message_done, times_called=2], [name=http_reply, times_called=1], [name=http_request, times_called=1], [name=net_done, times_called=1], [name=new_connection, times_called=1], [name=run_sync_hook, times_called=2], [name=zeek_done, times_called=1], [name=zeek_init, times_called=1]] +[[name=Broker::log_flush, times_called=2], [name=ChecksumOffloading::check, times_called=2], [name=NetControl::init, times_called=1], [name=analyzer_confirmation_info, times_called=1], [name=connection_established, times_called=1], [name=connection_state_remove, times_called=1], [name=file_new, times_called=1], [name=file_over_new_connection, times_called=1], [name=file_sniff, times_called=1], [name=file_state_remove, times_called=1], [name=filter_change_tracking, times_called=3], [name=get_file_handle, times_called=4], [name=http_begin_entity, times_called=2], [name=http_end_entity, times_called=2], [name=http_header, times_called=13], [name=http_message_done, times_called=2], [name=http_reply, times_called=1], [name=http_request, times_called=1], [name=net_done, times_called=1], [name=new_connection, times_called=1], [name=zeek_done, times_called=1], [name=zeek_init, times_called=1]] diff --git a/testing/btest/Baseline/scripts.base.frameworks.telemetry.sync-collect/out b/testing/btest/Baseline/scripts.base.frameworks.telemetry.sync-collect/out new file mode 100644 index 0000000000..393c51b20d --- /dev/null +++ b/testing/btest/Baseline/scripts.base.frameworks.telemetry.sync-collect/out @@ -0,0 +1,13 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +node up +sync, 1, tcp +btest_connections_total, [tcp], 1.0 +sync, 2, udp +btest_connections_total, [udp], 1.0 +btest_connections_total, [tcp], 1.0 +sync, 3, udp +btest_connections_total, [udp], 2.0 +btest_connections_total, [tcp], 1.0 +sync, 4, udp +btest_connections_total, [udp], 2.0 +btest_connections_total, [tcp], 1.0 diff --git a/testing/btest/Baseline/scripts.base.frameworks.telemetry.sync-warnings/.stderr b/testing/btest/Baseline/scripts.base.frameworks.telemetry.sync-warnings/.stderr new file mode 100644 index 0000000000..32c85b57ed --- /dev/null +++ b/testing/btest/Baseline/scripts.base.frameworks.telemetry.sync-warnings/.stderr @@ -0,0 +1,3 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +warning: Telemetry::sync() hook invoked recursively +warning: Telemetry::sync() implementations skipped due to 'break' usage diff --git a/testing/btest/Baseline/scripts.base.frameworks.telemetry.sync-warnings/out b/testing/btest/Baseline/scripts.base.frameworks.telemetry.sync-warnings/out new file mode 100644 index 0000000000..a4801a85eb --- /dev/null +++ b/testing/btest/Baseline/scripts.base.frameworks.telemetry.sync-warnings/out @@ -0,0 +1,2 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +node up diff --git a/testing/btest/Baseline/scripts.base.frameworks.telemetry.sync/metrics1.txt b/testing/btest/Baseline/scripts.base.frameworks.telemetry.sync/metrics1.txt new file mode 100644 index 0000000000..7df20bcb5a --- /dev/null +++ b/testing/btest/Baseline/scripts.base.frameworks.telemetry.sync/metrics1.txt @@ -0,0 +1,2 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +btest_connections_total{proto="tcp"} 1 diff --git a/testing/btest/Baseline/scripts.base.frameworks.telemetry.sync/metrics2.txt b/testing/btest/Baseline/scripts.base.frameworks.telemetry.sync/metrics2.txt new file mode 100644 index 0000000000..db8cfd6872 --- /dev/null +++ b/testing/btest/Baseline/scripts.base.frameworks.telemetry.sync/metrics2.txt @@ -0,0 +1,3 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +btest_connections_total{proto="udp"} 1 +btest_connections_total{proto="tcp"} 1 diff --git a/testing/btest/Baseline/scripts.base.frameworks.telemetry.sync/metrics3.txt b/testing/btest/Baseline/scripts.base.frameworks.telemetry.sync/metrics3.txt new file mode 100644 index 0000000000..cce741488a --- /dev/null +++ b/testing/btest/Baseline/scripts.base.frameworks.telemetry.sync/metrics3.txt @@ -0,0 +1,3 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +btest_connections_total{proto="udp"} 2 +btest_connections_total{proto="tcp"} 1 diff --git a/testing/btest/Baseline/scripts.base.frameworks.telemetry.sync/zeek..stdout b/testing/btest/Baseline/scripts.base.frameworks.telemetry.sync/zeek..stdout new file mode 100644 index 0000000000..f3259c3511 --- /dev/null +++ b/testing/btest/Baseline/scripts.base.frameworks.telemetry.sync/zeek..stdout @@ -0,0 +1,5 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +node up +sync, 1, tcp +sync, 2, udp +sync, 3, udp diff --git a/testing/btest/scripts/base/frameworks/telemetry/sync-collect.zeek b/testing/btest/scripts/base/frameworks/telemetry/sync-collect.zeek new file mode 100644 index 0000000000..ca52fb77fb --- /dev/null +++ b/testing/btest/scripts/base/frameworks/telemetry/sync-collect.zeek @@ -0,0 +1,48 @@ +# @TEST-DOC: Calling collect_metrics() invokes Telemetry::sync. +# Note compilable to C++ due to globals being initialized to a record that +# has an opaque type as a field. +# @TEST-REQUIRES: test "${ZEEK_USE_CPP}" != "1" +# +# @TEST-EXEC: zeek -b %INPUT >out +# @TEST-EXEC: btest-diff out + + +@load base/frameworks/telemetry + +global connections_by_proto_cf = Telemetry::register_counter_family([ + $prefix="btest", + $name="connections", + $unit="", + $help_text="Total number of monitored connections", + $label_names=vector("proto") +]); + +function print_metrics(ms: vector of Telemetry::Metric) { + for (_, m in ms) { + print m$opts$name, m$label_values, m$value; + } +} + +event zeek_init() + { + print "node up"; + local ms = Telemetry::collect_metrics("btest"); + print_metrics(ms); + ms = Telemetry::collect_metrics("btest"); + print_metrics(ms); + ms = Telemetry::collect_metrics("btest"); + print_metrics(ms); + local hm = Telemetry::collect_histogram_metrics("btest"); + print_metrics(ms); + } + + +global sync_calls = 0; + +hook Telemetry::sync() + { + ++sync_calls; + local proto = sync_calls == 1 ? "tcp" : "udp"; + print "sync", sync_calls, proto; + Telemetry::counter_family_inc(connections_by_proto_cf, vector(proto)); + } diff --git a/testing/btest/scripts/base/frameworks/telemetry/sync-warnings.zeek b/testing/btest/scripts/base/frameworks/telemetry/sync-warnings.zeek new file mode 100644 index 0000000000..b542ff5ccb --- /dev/null +++ b/testing/btest/scripts/base/frameworks/telemetry/sync-warnings.zeek @@ -0,0 +1,43 @@ +# @TEST-DOC: Breaking and recursive Telemetry::sync() warning +# Note compilable to C++ due to globals being initialized to a record that +# has an opaque type as a field. +# @TEST-REQUIRES: test "${ZEEK_USE_CPP}" != "1" +# +# @TEST-EXEC: zeek -b %INPUT >out +# @TEST-EXEC: btest-diff out +# @TEST-EXEC: btest-diff .stderr + + +@load base/frameworks/telemetry + +global connections_by_proto_cf = Telemetry::register_counter_family([ + $prefix="btest", + $name="connections", + $unit="", + $help_text="Total number of monitored connections", + $label_names=vector("proto") +]); + +event zeek_init() + { + print "node up"; + Telemetry::counter_family_inc(connections_by_proto_cf, vector("tcp")); + local ms = Telemetry::collect_metrics("btest"); + } + + +hook Telemetry::sync() + { + # Calling collect_metrics() in Telemetry::sync() is not good as + # it would invoke Telemetry::sync() recursively. The manager will + # emit a warning and not run the second Telemetry::sync() invocation. + local ms = Telemetry::collect_metrics("btest"); + } + +hook Telemetry::sync() &priority=-100 + { + # break is not good as it prevents other Telemetry::sync() hooks + # from running. This will produce a warning. + # We could find this via script validation if we wanted to. + break; + } diff --git a/testing/btest/scripts/base/frameworks/telemetry/sync.zeek b/testing/btest/scripts/base/frameworks/telemetry/sync.zeek new file mode 100644 index 0000000000..2e20eeed14 --- /dev/null +++ b/testing/btest/scripts/base/frameworks/telemetry/sync.zeek @@ -0,0 +1,68 @@ +# @TEST-DOC: Verify Telemetry::sync() is invoked for metric scraping via the Prometheus HTTP endpoint. +# Note compilable to C++ due to globals being initialized to a record that +# has an opaque type as a field. +# @TEST-REQUIRES: test "${ZEEK_USE_CPP}" != "1" +# @TEST-REQUIRES: which jq +# @TEST-REQUIRES: which curl +# +# @TEST-PORT: METRICS_PORT +# +# @TEST-EXEC: chmod +x fetch-metrics.sh +# @TEST-EXEC: zeek --parse-only %INPUT +# @TEST-EXEC: btest-bg-run zeek ZEEKPATH=$ZEEKPATH:.. zeek -b %INPUT +# @TEST-EXEC: $SCRIPTS/wait-for-file zeek/up 5 || (btest-bg-wait -k 1 && false) +# @TEST-EXEC: ./fetch-metrics.sh 1.trace metrics1.txt +# @TEST-EXEC: ./fetch-metrics.sh 2.trace metrics2.txt +# @TEST-EXEC: ./fetch-metrics.sh 3.trace metrics3.txt +# @TEST-EXEC: btest-bg-wait 10 +# +# @TEST-EXEC: btest-diff zeek/.stdout +# @TEST-EXEC: btest-diff metrics1.txt +# @TEST-EXEC: btest-diff metrics2.txt +# @TEST-EXEC: btest-diff metrics3.txt + +@TEST-START-FILE fetch-metrics.sh +#! /usr/bin/env bash +set -ux +trace_file=$1 +output_file=$2 + +PORT=$(echo ${METRICS_PORT} | cut -d '/' -f 1) +URL=http://localhost:${PORT}/metrics + +curl -m 5 --trace $trace_file $URL | grep ^btest > $output_file + +exit 0 +@TEST-END-FILE + +@load base/frameworks/telemetry + +redef exit_only_after_terminate = T; +redef Telemetry::metrics_port = to_port(getenv("METRICS_PORT")); + +event zeek_init() + { + print "node up"; + system("touch up"); + } + +global connections_by_proto_cf = Telemetry::register_counter_family([ + $prefix="btest", + $name="connections", + $unit="", + $help_text="Total number of monitored connections", + $label_names=vector("proto") +]); + +global sync_calls = 0; + +hook Telemetry::sync() + { + ++sync_calls; + local proto = sync_calls == 1 ? "tcp" : "udp"; + print "sync", sync_calls, proto; + Telemetry::counter_family_inc(connections_by_proto_cf, vector(proto)); + + if ( sync_calls == 3 ) + terminate(); + } From 1822f4f6fe1d9f55dccd830fce8e6fc0ff586cdb Mon Sep 17 00:00:00 2001 From: Arne Welzel Date: Fri, 4 Oct 2024 11:10:56 +0200 Subject: [PATCH 2/4] telemetry/Manager: Remove variant include --- src/telemetry/Manager.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/src/telemetry/Manager.cc b/src/telemetry/Manager.cc index 2924301dad..6441807544 100644 --- a/src/telemetry/Manager.cc +++ b/src/telemetry/Manager.cc @@ -13,7 +13,6 @@ #include #include #include -#include #include "zeek/3rdparty/doctest.h" #include "zeek/Func.h" From e20aa66eae3087cd46759e399777511c5257297a Mon Sep 17 00:00:00 2001 From: Arne Welzel Date: Fri, 4 Oct 2024 11:34:02 +0200 Subject: [PATCH 3/4] misc/stats: Add zeek_net_timestamp_seconds Adding a metric for the network time value itself should make it possible to observe it stopping or growing slowly as compared to realtime when Zeek isn't able to keep up. Also, modify the telemetry/log.zeek test to include misc/stats and log at a higher frequency with a more interesting pcap. --- scripts/policy/misc/stats.zeek | 15 +++- .../telemetry.log.filtered | 76 +++++++++++++++++-- .../telemetry_histogram.log.filtered | 9 ++- .../policy/frameworks/telemetry/log.zeek | 10 ++- 4 files changed, 96 insertions(+), 14 deletions(-) diff --git a/scripts/policy/misc/stats.zeek b/scripts/policy/misc/stats.zeek index cf5bb1cbd0..c324a728be 100644 --- a/scripts/policy/misc/stats.zeek +++ b/scripts/policy/misc/stats.zeek @@ -130,9 +130,20 @@ global packet_lag_gf = Telemetry::register_gauge_family([ $help_text="Difference of network time and wallclock time in seconds.", ]); +# Gauge as motivated by: +# https://www.robustperception.io/are-increasing-timestamps-counters-or-gauges/ +global network_time_cf = Telemetry::register_gauge_family([ + $prefix="zeek", + $name="net-timestamp", + $unit="seconds", + $help_text="The current network time.", +]); + global no_labels: vector of string; -hook Telemetry::sync() { +hook Telemetry::sync() + { + Telemetry::gauge_family_set(network_time_cf, no_labels, time_to_double(network_time())); local net_stats = get_net_stats(); Telemetry::counter_family_set(bytes_received_cf, no_labels, net_stats$bytes_recvd); Telemetry::counter_family_set(packets_received_cf, no_labels, net_stats$pkts_recvd); @@ -148,7 +159,7 @@ hook Telemetry::sync() { Telemetry::gauge_family_set(packet_lag_gf, no_labels, interval_to_double(current_time() - network_time())); } -} + } event zeek_init() &priority=5 { diff --git a/testing/btest/Baseline/scripts.policy.frameworks.telemetry.log/telemetry.log.filtered b/testing/btest/Baseline/scripts.policy.frameworks.telemetry.log/telemetry.log.filtered index b5c04c3f44..27fe96a750 100644 --- a/testing/btest/Baseline/scripts.policy.frameworks.telemetry.log/telemetry.log.filtered +++ b/testing/btest/Baseline/scripts.policy.frameworks.telemetry.log/telemetry.log.filtered @@ -1,7 +1,71 @@ ### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. -XXXXXXXXXX.XXXXXX zeek counter zeek_ended_sessions_total reason inactivity 0.0 -XXXXXXXXXX.XXXXXX zeek counter zeek_total_sessions_total protocol tcp 1.0 -XXXXXXXXXX.XXXXXX zeek gauge zeek_active_sessions protocol tcp 1.0 -XXXXXXXXXX.XXXXXX zeek counter zeek_ended_sessions_total reason inactivity 0.0 -XXXXXXXXXX.XXXXXX zeek counter zeek_total_sessions_total protocol tcp 500.0 -XXXXXXXXXX.XXXXXX zeek gauge zeek_active_sessions protocol tcp 500.0 +1300475167.096535 zeek counter zeek_net_received_bytes_total (empty) (empty) 87.0 +1300475167.096535 zeek counter zeek_net_received_packets_total (empty) (empty) 1.0 +1300475167.096535 zeek counter zeek_ended_sessions_total reason inactivity 0.0 +1300475167.096535 zeek counter zeek_total_sessions_total protocol icmp 0.0 +1300475167.096535 zeek counter zeek_total_sessions_total protocol udp 1.0 +1300475167.096535 zeek counter zeek_total_sessions_total protocol tcp 0.0 +1300475167.096535 zeek gauge zeek_net_timestamp_seconds (empty) (empty) 1300475167.096535 +1300475167.096535 zeek gauge zeek_active_sessions protocol icmp 0.0 +1300475167.096535 zeek gauge zeek_active_sessions protocol udp 1.0 +1300475167.096535 zeek gauge zeek_active_sessions protocol tcp 0.0 +1300475168.652003 zeek counter zeek_net_received_bytes_total (empty) (empty) 1142.0 +1300475168.652003 zeek counter zeek_net_received_packets_total (empty) (empty) 6.0 +1300475168.652003 zeek counter zeek_ended_sessions_total reason inactivity 0.0 +1300475168.652003 zeek counter zeek_total_sessions_total protocol icmp 0.0 +1300475168.652003 zeek counter zeek_total_sessions_total protocol udp 3.0 +1300475168.652003 zeek counter zeek_total_sessions_total protocol tcp 1.0 +1300475168.652003 zeek gauge zeek_net_timestamp_seconds (empty) (empty) 1300475168.652003 +1300475168.652003 zeek gauge zeek_active_sessions protocol icmp 0.0 +1300475168.652003 zeek gauge zeek_active_sessions protocol udp 3.0 +1300475168.652003 zeek gauge zeek_active_sessions protocol tcp 1.0 +1300475169.780331 zeek counter zeek_net_received_bytes_total (empty) (empty) 23385.0 +1300475169.780331 zeek counter zeek_net_received_packets_total (empty) (empty) 113.0 +1300475169.780331 zeek counter zeek_ended_sessions_total reason inactivity 0.0 +1300475169.780331 zeek counter zeek_total_sessions_total protocol icmp 0.0 +1300475169.780331 zeek counter zeek_total_sessions_total protocol udp 17.0 +1300475169.780331 zeek counter zeek_total_sessions_total protocol tcp 10.0 +1300475169.780331 zeek gauge zeek_net_timestamp_seconds (empty) (empty) 1300475169.780331 +1300475169.780331 zeek gauge zeek_active_sessions protocol icmp 0.0 +1300475169.780331 zeek gauge zeek_active_sessions protocol udp 17.0 +1300475169.780331 zeek gauge zeek_active_sessions protocol tcp 10.0 +1300475170.811500 zeek counter zeek_net_received_bytes_total (empty) (empty) 23604.0 +1300475170.811500 zeek counter zeek_net_received_packets_total (empty) (empty) 116.0 +1300475170.811500 zeek counter zeek_ended_sessions_total reason inactivity 0.0 +1300475170.811500 zeek counter zeek_total_sessions_total protocol icmp 0.0 +1300475170.811500 zeek counter zeek_total_sessions_total protocol udp 18.0 +1300475170.811500 zeek counter zeek_total_sessions_total protocol tcp 10.0 +1300475170.811500 zeek gauge zeek_net_timestamp_seconds (empty) (empty) 1300475170.8115 +1300475170.811500 zeek gauge zeek_active_sessions protocol icmp 0.0 +1300475170.811500 zeek gauge zeek_active_sessions protocol udp 18.0 +1300475170.811500 zeek gauge zeek_active_sessions protocol tcp 10.0 +1300475171.886280 zeek counter zeek_net_received_bytes_total (empty) (empty) 24248.0 +1300475171.886280 zeek counter zeek_net_received_packets_total (empty) (empty) 124.0 +1300475171.886280 zeek counter zeek_ended_sessions_total reason inactivity 0.0 +1300475171.886280 zeek counter zeek_total_sessions_total protocol icmp 0.0 +1300475171.886280 zeek counter zeek_total_sessions_total protocol udp 21.0 +1300475171.886280 zeek counter zeek_total_sessions_total protocol tcp 10.0 +1300475171.886280 zeek gauge zeek_net_timestamp_seconds (empty) (empty) 1300475171.88628 +1300475171.886280 zeek gauge zeek_active_sessions protocol icmp 0.0 +1300475171.886280 zeek gauge zeek_active_sessions protocol udp 21.0 +1300475171.886280 zeek gauge zeek_active_sessions protocol tcp 10.0 +1300475173.116749 zeek counter zeek_net_received_bytes_total (empty) (empty) 24679.0 +1300475173.116749 zeek counter zeek_net_received_packets_total (empty) (empty) 129.0 +1300475173.116749 zeek counter zeek_ended_sessions_total reason inactivity 0.0 +1300475173.116749 zeek counter zeek_total_sessions_total protocol icmp 0.0 +1300475173.116749 zeek counter zeek_total_sessions_total protocol udp 22.0 +1300475173.116749 zeek counter zeek_total_sessions_total protocol tcp 10.0 +1300475173.116749 zeek gauge zeek_net_timestamp_seconds (empty) (empty) 1300475173.116749 +1300475173.116749 zeek gauge zeek_active_sessions protocol icmp 0.0 +1300475173.116749 zeek gauge zeek_active_sessions protocol udp 22.0 +1300475173.116749 zeek gauge zeek_active_sessions protocol tcp 10.0 +1300475173.475401 zeek counter zeek_net_received_bytes_total (empty) (empty) 25260.0 +1300475173.475401 zeek counter zeek_net_received_packets_total (empty) (empty) 136.0 +1300475173.475401 zeek counter zeek_ended_sessions_total reason inactivity 0.0 +1300475173.475401 zeek counter zeek_total_sessions_total protocol icmp 0.0 +1300475173.475401 zeek counter zeek_total_sessions_total protocol udp 24.0 +1300475173.475401 zeek counter zeek_total_sessions_total protocol tcp 10.0 +1300475173.475401 zeek gauge zeek_net_timestamp_seconds (empty) (empty) 1300475173.475401 +1300475173.475401 zeek gauge zeek_active_sessions protocol icmp 0.0 +1300475173.475401 zeek gauge zeek_active_sessions protocol udp 24.0 +1300475173.475401 zeek gauge zeek_active_sessions protocol tcp 9.0 diff --git a/testing/btest/Baseline/scripts.policy.frameworks.telemetry.log/telemetry_histogram.log.filtered b/testing/btest/Baseline/scripts.policy.frameworks.telemetry.log/telemetry_histogram.log.filtered index d47ba69d07..28672af18d 100644 --- a/testing/btest/Baseline/scripts.policy.frameworks.telemetry.log/telemetry_histogram.log.filtered +++ b/testing/btest/Baseline/scripts.policy.frameworks.telemetry.log/telemetry_histogram.log.filtered @@ -1,3 +1,8 @@ ### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. -XXXXXXXXXX.XXXXXX zeek zeek_connection_duration_seconds (empty) (empty) 2.0,3.0,4.0,5.0,6.0,10.0,inf 0.0,0.0,0.0,0.0,0.0,0.0,0.0 0.0 0.0 -XXXXXXXXXX.XXXXXX zeek zeek_connection_duration_seconds (empty) (empty) 2.0,3.0,4.0,5.0,6.0,10.0,inf 0.0,322.0,90.0,5.0,76.0,7.0,0.0 1650.264644 500.0 +XXXXXXXXXX.XXXXXX zeek zeek_connection_duration_seconds (empty) (empty) 0.0001,0.001,0.01,0.1,0.5,1.0,5.0,10.0,30.0,60.0,inf 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 0.0 0.0 +XXXXXXXXXX.XXXXXX zeek zeek_connection_duration_seconds (empty) (empty) 0.0001,0.001,0.01,0.1,0.5,1.0,5.0,10.0,30.0,60.0,inf 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 0.0 0.0 +XXXXXXXXXX.XXXXXX zeek zeek_connection_duration_seconds (empty) (empty) 0.0001,0.001,0.01,0.1,0.5,1.0,5.0,10.0,30.0,60.0,inf 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 0.0 0.0 +XXXXXXXXXX.XXXXXX zeek zeek_connection_duration_seconds (empty) (empty) 0.0001,0.001,0.01,0.1,0.5,1.0,5.0,10.0,30.0,60.0,inf 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 0.0 0.0 +XXXXXXXXXX.XXXXXX zeek zeek_connection_duration_seconds (empty) (empty) 0.0001,0.001,0.01,0.1,0.5,1.0,5.0,10.0,30.0,60.0,inf 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 0.0 0.0 +XXXXXXXXXX.XXXXXX zeek zeek_connection_duration_seconds (empty) (empty) 0.0001,0.001,0.01,0.1,0.5,1.0,5.0,10.0,30.0,60.0,inf 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 0.0 0.0 +XXXXXXXXXX.XXXXXX zeek zeek_connection_duration_seconds (empty) (empty) 0.0001,0.001,0.01,0.1,0.5,1.0,5.0,10.0,30.0,60.0,inf 6.0,14.0,0.0,3.0,10.0,0.0,1.0,0.0,0.0,0.0,0.0 4.651298 34.0 diff --git a/testing/btest/scripts/policy/frameworks/telemetry/log.zeek b/testing/btest/scripts/policy/frameworks/telemetry/log.zeek index ab74478913..1faff83719 100644 --- a/testing/btest/scripts/policy/frameworks/telemetry/log.zeek +++ b/testing/btest/scripts/policy/frameworks/telemetry/log.zeek @@ -1,20 +1,22 @@ # @TEST-DOC: Test loading of telemetry/log and smoke check the telemetry.log file -# @TEST-EXEC: zcat <$TRACES/echo-connections.pcap.gz | zeek -b -Cr - %INPUT > out -# @TEST-EXEC: grep 'zeek.*sessions' telemetry.log > telemetry.log.filtered +# @TEST-EXEC: zeek -b -Cr $TRACES/wikipedia.trace %INPUT > out +# @TEST-EXEC: grep -E 'zeek_(net|.*sessions)' telemetry.log > telemetry.log.filtered # @TEST-EXEC: grep 'zeek.*connection_duration' telemetry_histogram.log > telemetry_histogram.log.filtered -# @TEST-EXEC: btest-diff telemetry.log.filtered +# @TEST-EXEC: TEST_DIFF_CANONIFIER= btest-diff telemetry.log.filtered # @TEST-EXEC: btest-diff telemetry_histogram.log.filtered +@load misc/stats @load frameworks/telemetry/log +redef Telemetry::log_interval = 1sec; global connection_duration_hf = Telemetry::register_histogram_family([ $prefix="zeek", $name="connection_duration", $unit="seconds", $help_text="Monitored connection duration", - $bounds=vector(2.0, 3.0, 4.0, 5.0, 6.0, 10.0) + $bounds=vector(0.0001, 0.001, 0.01, 0.1, 0.5, 1.0, 5.0, 10.0, 30.0, 60.0) ]); global connection_duration_h = Telemetry::histogram_with(connection_duration_hf); From 3d053a94f7976baeb31e4763cff28d8d1525d66c Mon Sep 17 00:00:00 2001 From: Arne Welzel Date: Tue, 22 Oct 2024 18:48:24 +0200 Subject: [PATCH 4/4] btest/telemetry: Fix "Note compilable" typo --- testing/btest/scripts/base/frameworks/telemetry/basic.zeek | 2 +- .../base/frameworks/telemetry/conn-duration-histogram.zeek | 2 +- .../base/frameworks/telemetry/event-handler-invocations.zeek | 2 +- .../scripts/base/frameworks/telemetry/internal-metrics.zeek | 2 +- .../btest/scripts/base/frameworks/telemetry/sync-collect.zeek | 2 +- .../btest/scripts/base/frameworks/telemetry/sync-warnings.zeek | 2 +- testing/btest/scripts/base/frameworks/telemetry/sync.zeek | 2 +- .../btest/scripts/policy/frameworks/telemetry/prometheus.zeek | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/testing/btest/scripts/base/frameworks/telemetry/basic.zeek b/testing/btest/scripts/base/frameworks/telemetry/basic.zeek index 0592bff684..14b7ea9904 100644 --- a/testing/btest/scripts/base/frameworks/telemetry/basic.zeek +++ b/testing/btest/scripts/base/frameworks/telemetry/basic.zeek @@ -1,5 +1,5 @@ # @TEST-DOC: Using and listing of counters and gauges using the telemetry module. -# Note compilable to C++ due to globals being initialized to a record that +# Not compilable to C++ due to globals being initialized to a record that # has an opaque type as a field. # @TEST-REQUIRES: test "${ZEEK_USE_CPP}" != "1" # @TEST-EXEC: zcat <$TRACES/echo-connections.pcap.gz | zeek -b -Cr - %INPUT > out diff --git a/testing/btest/scripts/base/frameworks/telemetry/conn-duration-histogram.zeek b/testing/btest/scripts/base/frameworks/telemetry/conn-duration-histogram.zeek index 3f01d9ddf3..c7ab5651fa 100644 --- a/testing/btest/scripts/base/frameworks/telemetry/conn-duration-histogram.zeek +++ b/testing/btest/scripts/base/frameworks/telemetry/conn-duration-histogram.zeek @@ -1,4 +1,4 @@ -# Note compilable to C++ due to globals being initialized to a record that +# Not compilable to C++ due to globals being initialized to a record that # has an opaque type as a field. # @TEST-REQUIRES: test "${ZEEK_USE_CPP}" != "1" # @TEST-EXEC: zcat <$TRACES/echo-connections.pcap.gz | zeek -b -Cr - %INPUT > out diff --git a/testing/btest/scripts/base/frameworks/telemetry/event-handler-invocations.zeek b/testing/btest/scripts/base/frameworks/telemetry/event-handler-invocations.zeek index c0a9c73b2d..7fd72f92b3 100644 --- a/testing/btest/scripts/base/frameworks/telemetry/event-handler-invocations.zeek +++ b/testing/btest/scripts/base/frameworks/telemetry/event-handler-invocations.zeek @@ -1,6 +1,6 @@ # @TEST-DOC: Query for zeek event-handler-invocations metrics counting number of times handlers were called. -# Note compilable to C++ due to globals being initialized to a record that +# Not compilable to C++ due to globals being initialized to a record that # has an opaque type as a field. # @TEST-REQUIRES: test "${ZEEK_USE_CPP}" != "1" # @TEST-EXEC: zcat <$TRACES/echo-connections.pcap.gz | zeek -b -Cr - %INPUT > out diff --git a/testing/btest/scripts/base/frameworks/telemetry/internal-metrics.zeek b/testing/btest/scripts/base/frameworks/telemetry/internal-metrics.zeek index d130a22e48..0c98d40e1c 100644 --- a/testing/btest/scripts/base/frameworks/telemetry/internal-metrics.zeek +++ b/testing/btest/scripts/base/frameworks/telemetry/internal-metrics.zeek @@ -1,5 +1,5 @@ # @TEST-DOC: Query Broker's telemetry to verify it ends up in Zeek's registry. -# Note compilable to C++ due to globals being initialized to a record that +# Not compilable to C++ due to globals being initialized to a record that # has an opaque type as a field. # @TEST-REQUIRES: test "${ZEEK_USE_CPP}" != "1" # @TEST-EXEC: zcat <$TRACES/echo-connections.pcap.gz | zeek -b -Cr - %INPUT > out diff --git a/testing/btest/scripts/base/frameworks/telemetry/sync-collect.zeek b/testing/btest/scripts/base/frameworks/telemetry/sync-collect.zeek index ca52fb77fb..c2d5de46fe 100644 --- a/testing/btest/scripts/base/frameworks/telemetry/sync-collect.zeek +++ b/testing/btest/scripts/base/frameworks/telemetry/sync-collect.zeek @@ -1,5 +1,5 @@ # @TEST-DOC: Calling collect_metrics() invokes Telemetry::sync. -# Note compilable to C++ due to globals being initialized to a record that +# Not compilable to C++ due to globals being initialized to a record that # has an opaque type as a field. # @TEST-REQUIRES: test "${ZEEK_USE_CPP}" != "1" # diff --git a/testing/btest/scripts/base/frameworks/telemetry/sync-warnings.zeek b/testing/btest/scripts/base/frameworks/telemetry/sync-warnings.zeek index b542ff5ccb..971cf88408 100644 --- a/testing/btest/scripts/base/frameworks/telemetry/sync-warnings.zeek +++ b/testing/btest/scripts/base/frameworks/telemetry/sync-warnings.zeek @@ -1,5 +1,5 @@ # @TEST-DOC: Breaking and recursive Telemetry::sync() warning -# Note compilable to C++ due to globals being initialized to a record that +# Not compilable to C++ due to globals being initialized to a record that # has an opaque type as a field. # @TEST-REQUIRES: test "${ZEEK_USE_CPP}" != "1" # diff --git a/testing/btest/scripts/base/frameworks/telemetry/sync.zeek b/testing/btest/scripts/base/frameworks/telemetry/sync.zeek index 2e20eeed14..8cecb911ad 100644 --- a/testing/btest/scripts/base/frameworks/telemetry/sync.zeek +++ b/testing/btest/scripts/base/frameworks/telemetry/sync.zeek @@ -1,5 +1,5 @@ # @TEST-DOC: Verify Telemetry::sync() is invoked for metric scraping via the Prometheus HTTP endpoint. -# Note compilable to C++ due to globals being initialized to a record that +# Not compilable to C++ due to globals being initialized to a record that # has an opaque type as a field. # @TEST-REQUIRES: test "${ZEEK_USE_CPP}" != "1" # @TEST-REQUIRES: which jq diff --git a/testing/btest/scripts/policy/frameworks/telemetry/prometheus.zeek b/testing/btest/scripts/policy/frameworks/telemetry/prometheus.zeek index 50a6fd8d2c..231f743999 100644 --- a/testing/btest/scripts/policy/frameworks/telemetry/prometheus.zeek +++ b/testing/btest/scripts/policy/frameworks/telemetry/prometheus.zeek @@ -1,5 +1,5 @@ # @TEST-DOC: Query the Prometheus endpoint and smoke check that zeek_version_info{...} is contained in the response for all cluster nodes. -# Note compilable to C++ due to globals being initialized to a record that +# Not compilable to C++ due to globals being initialized to a record that # has an opaque type as a field. # @TEST-REQUIRES: test "${ZEEK_USE_CPP}" != "1" # @TEST-REQUIRES: which jq