diff --git a/CHANGES b/CHANGES index 44209ae9cb..cc47072613 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,27 @@ +7.1.0-dev.418 | 2024-10-22 19:04:22 +0200 + + * btest/telemetry: Fix "Note compilable" typo (Arne Welzel, Corelight) + + * misc/stats: Add zeek_net_timestamp_seconds (Arne Welzel, Corelight) + + Adding a metric for the network time value itself should make it + possible to observe it stopping or growing slowly as compared to + realtime when Zeek isn't able to keep up. + + Also, modify the telemetry/log.zeek test to include misc/stats and + log at a higher frequency with a more interesting pcap. + + * telemetry/Manager: Remove variant include (Arne Welzel, Corelight) + + * GH-3947: telemetry: Invoke Telemetry::sync() only at scrape/collection time (Arne Welzel, Corelight) + + This stops invoking Telemetry::sync() via a scheduled event and instead + only invokes it on-demand. This makes metric collection network time + independent and lazier, too. + + With Prometheus scrape requests being processed on Zeek's main thread + now, we can safely invoke the script layer Telemetry::sync() hook. + 7.1.0-dev.413 | 2024-10-22 11:24:50 +0200 * GH-3978: Bump zeekjs to 0.12.1 (Arne Welzel, Corelight) diff --git a/NEWS b/NEWS index 8b46a774c7..bddb781f4e 100644 --- a/NEWS +++ b/NEWS @@ -93,6 +93,10 @@ Changed Functionality * The ASCII input reader now suppresses warnings for consecutive invalid lines, producing a summary of total suppressions once a valid line is encountered. +* The `Telemetry::sync()` hook is now invoked on demand. Either when the metrics + of a node are scraped via the Prometheus HTTP endpoint, or one of the collect + methods is invoked from Zeek script. + Removed Functionality --------------------- diff --git a/VERSION b/VERSION index 172c4a2648..71841ca5e8 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -7.1.0-dev.413 +7.1.0-dev.418 diff --git a/scripts/base/frameworks/telemetry/main.zeek b/scripts/base/frameworks/telemetry/main.zeek index 0e5ce1b1f5..ce67c642d1 100644 --- a/scripts/base/frameworks/telemetry/main.zeek +++ b/scripts/base/frameworks/telemetry/main.zeek @@ -263,22 +263,15 @@ export { label_values: labels_vector, measurement: double): bool; - ## Telemetry sync hook. - ## - ## This hook is invoked every :zeek:see:`Telemetry::sync_interval` - ## for script writers to synchronize or mirror metrics with the - ## telemetry subsystem. For example, when tracking table or value - ## footprints with gauges, the value in question can be set on an actual - ## :zeek:see:`Telemetry::Gauge` instance during execution of this hook. - ## - ## Implementations should be lightweight, this hook may be called - ## multiple times per minute. The interval can increased by changing - ## :zeek:see:`Telemetry::sync_interval` at the cost of delaying - ## metric updates and thereby reducing granularity. - global sync: hook(); - ## Interval at which the :zeek:see:`Telemetry::sync` hook is invoked. - option sync_interval = 10sec; + ## + ## By default, the hook is invoked on demand, setting this option to + ## a positive interval allows to invoke it regularly, too. Regular + ## invocations are relative to Zeek's network time. + ## + ## Note that on-demand hook invocation will happen even if this + ## is set. + option sync_interval = 0sec &deprecated="Remove in 8.1. If you require regular sync invocation, do so explicitly in a scheduled event."; ## Collect all counter and gauge metrics matching the given *name* and *prefix*. ## @@ -493,7 +486,9 @@ function collect_histogram_metrics(prefix: string, name: string): vector of Hist event run_sync_hook() { hook Telemetry::sync(); +@pragma push ignore-deprecations schedule sync_interval { run_sync_hook() }; +@pragma pop ignore-deprecations } # Expose the Zeek version as Prometheus style info metric @@ -508,7 +503,10 @@ global version_gauge_family = Telemetry::register_gauge_family([ event zeek_init() { - schedule sync_interval { run_sync_hook() }; +@pragma push ignore-deprecations + if ( sync_interval > 0sec ) + schedule sync_interval { run_sync_hook() }; +@pragma pop ignore-deprecations local v = Version::info; local labels = vector(cat(v$version_number), diff --git a/scripts/base/init-bare.zeek b/scripts/base/init-bare.zeek index f053cec928..846679f8b9 100644 --- a/scripts/base/init-bare.zeek +++ b/scripts/base/init-bare.zeek @@ -5933,6 +5933,21 @@ export { sum: double; }; + ## Telemetry sync hook. + ## + ## This hook is invoked when metrics are requested via functions + ## :zeek:see:`Telemetry::collect_metrics` and :zeek:see:`Telemetry::collect_histogram_metrics`, + ## or just before Zeek collects metrics when being scraped through + ## its Prometheus endpoint. + ## Script writers can use it to synchronize (or mirror) metrics with the + ## telemetry subsystem. For example, when tracking table or value + ## footprints with gauges, the value in question can be set on an actual + ## :zeek:see:`Telemetry::Gauge` instance during execution of this hook. + ## + ## Implementations should be lightweight, this hook may be called + ## multiple times per minute. + global sync: hook(); + type MetricVector : vector of Metric; type HistogramMetricVector : vector of HistogramMetric; diff --git a/scripts/policy/misc/stats.zeek b/scripts/policy/misc/stats.zeek index cf5bb1cbd0..c324a728be 100644 --- a/scripts/policy/misc/stats.zeek +++ b/scripts/policy/misc/stats.zeek @@ -130,9 +130,20 @@ global packet_lag_gf = Telemetry::register_gauge_family([ $help_text="Difference of network time and wallclock time in seconds.", ]); +# Gauge as motivated by: +# https://www.robustperception.io/are-increasing-timestamps-counters-or-gauges/ +global network_time_cf = Telemetry::register_gauge_family([ + $prefix="zeek", + $name="net-timestamp", + $unit="seconds", + $help_text="The current network time.", +]); + global no_labels: vector of string; -hook Telemetry::sync() { +hook Telemetry::sync() + { + Telemetry::gauge_family_set(network_time_cf, no_labels, time_to_double(network_time())); local net_stats = get_net_stats(); Telemetry::counter_family_set(bytes_received_cf, no_labels, net_stats$bytes_recvd); Telemetry::counter_family_set(packets_received_cf, no_labels, net_stats$pkts_recvd); @@ -148,7 +159,7 @@ hook Telemetry::sync() { Telemetry::gauge_family_set(packet_lag_gf, no_labels, interval_to_double(current_time() - network_time())); } -} + } event zeek_init() &priority=5 { diff --git a/src/telemetry/Manager.cc b/src/telemetry/Manager.cc index 3272641882..6441807544 100644 --- a/src/telemetry/Manager.cc +++ b/src/telemetry/Manager.cc @@ -13,9 +13,9 @@ #include #include #include -#include #include "zeek/3rdparty/doctest.h" +#include "zeek/Func.h" #include "zeek/ID.h" #include "zeek/RunState.h" #include "zeek/ZeekString.h" @@ -256,6 +256,28 @@ static bool compare_histograms(const std::optional& a, const std::optional return comparer(a, b, metric_record_type); } +void Manager::InvokeTelemetrySyncHook() { + static const auto sync_hook = zeek::id::find_func("Telemetry::sync"); + + if ( sync_hook->Flavor() != FUNC_FLAVOR_HOOK ) + reporter->InternalError("Telemetry::sync not a hook?"); + + if ( in_sync_hook ) { + reporter->Warning("Telemetry::sync() hook invoked recursively"); + return; + } + + in_sync_hook = true; + + zeek::Args empty; + auto result = sync_hook->Invoke(&empty); + + if ( ! result->IsOne() ) + reporter->Warning("Telemetry::sync() implementations skipped due to 'break' usage"); + + in_sync_hook = false; +} + ValPtr Manager::CollectMetrics(std::string_view prefix_pattern, std::string_view name_pattern) { static auto metrics_vector_type = zeek::id::find_type("Telemetry::MetricVector"); static auto string_vec_type = zeek::id::find_type("string_vec"); @@ -268,6 +290,8 @@ ValPtr Manager::CollectMetrics(std::string_view prefix_pattern, std::string_view static auto metric_opts_type = zeek::id::find_type("Telemetry::MetricOpts"); static auto metric_type_idx = metric_opts_type->FieldOffset("metric_type"); + InvokeTelemetrySyncHook(); + VectorValPtr ret_val = make_intrusive(metrics_vector_type); // Due to the name containing the full information about a metric including a potential unit add an @@ -345,6 +369,8 @@ ValPtr Manager::CollectHistogramMetrics(std::string_view prefix_pattern, std::st static auto metric_opts_type = zeek::id::find_type("Telemetry::MetricOpts"); static auto metric_type_idx = metric_opts_type->FieldOffset("metric_type"); + InvokeTelemetrySyncHook(); + VectorValPtr ret_val = make_intrusive(metrics_vector_type); // Due to the name containing the full information about a metric including a potential unit add an @@ -574,6 +600,8 @@ void Manager::ProcessFd(int fd, int flags) { for ( const auto& [name, f] : families ) f->RunCallbacks(); + InvokeTelemetrySyncHook(); + collector_response_idx = collector_request_idx; lk.unlock(); diff --git a/src/telemetry/Manager.h b/src/telemetry/Manager.h index 26e2403f58..9523385b77 100644 --- a/src/telemetry/Manager.h +++ b/src/telemetry/Manager.h @@ -259,6 +259,13 @@ private: RecordValPtr GetMetricOptsRecord(const prometheus::MetricFamily& metric_family); void BuildClusterJson(); + /** + * Runs the Telemetry::sync() hook in Zeek script land. + */ + void InvokeTelemetrySyncHook(); + + bool in_sync_hook = false; + std::map> families; std::map opts_records; diff --git a/testing/btest/Baseline/bifs.event-handler-stats/out b/testing/btest/Baseline/bifs.event-handler-stats/out index ae43a66216..d438e254c3 100644 --- a/testing/btest/Baseline/bifs.event-handler-stats/out +++ b/testing/btest/Baseline/bifs.event-handler-stats/out @@ -1,2 +1,2 @@ ### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. -[[name=Broker::log_flush, times_called=2], [name=ChecksumOffloading::check, times_called=2], [name=NetControl::init, times_called=1], [name=analyzer_confirmation_info, times_called=1], [name=connection_established, times_called=1], [name=connection_state_remove, times_called=1], [name=file_new, times_called=1], [name=file_over_new_connection, times_called=1], [name=file_sniff, times_called=1], [name=file_state_remove, times_called=1], [name=filter_change_tracking, times_called=3], [name=get_file_handle, times_called=4], [name=http_begin_entity, times_called=2], [name=http_end_entity, times_called=2], [name=http_header, times_called=13], [name=http_message_done, times_called=2], [name=http_reply, times_called=1], [name=http_request, times_called=1], [name=net_done, times_called=1], [name=new_connection, times_called=1], [name=run_sync_hook, times_called=2], [name=zeek_done, times_called=1], [name=zeek_init, times_called=1]] +[[name=Broker::log_flush, times_called=2], [name=ChecksumOffloading::check, times_called=2], [name=NetControl::init, times_called=1], [name=analyzer_confirmation_info, times_called=1], [name=connection_established, times_called=1], [name=connection_state_remove, times_called=1], [name=file_new, times_called=1], [name=file_over_new_connection, times_called=1], [name=file_sniff, times_called=1], [name=file_state_remove, times_called=1], [name=filter_change_tracking, times_called=3], [name=get_file_handle, times_called=4], [name=http_begin_entity, times_called=2], [name=http_end_entity, times_called=2], [name=http_header, times_called=13], [name=http_message_done, times_called=2], [name=http_reply, times_called=1], [name=http_request, times_called=1], [name=net_done, times_called=1], [name=new_connection, times_called=1], [name=zeek_done, times_called=1], [name=zeek_init, times_called=1]] diff --git a/testing/btest/Baseline/scripts.base.frameworks.telemetry.sync-collect/out b/testing/btest/Baseline/scripts.base.frameworks.telemetry.sync-collect/out new file mode 100644 index 0000000000..393c51b20d --- /dev/null +++ b/testing/btest/Baseline/scripts.base.frameworks.telemetry.sync-collect/out @@ -0,0 +1,13 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +node up +sync, 1, tcp +btest_connections_total, [tcp], 1.0 +sync, 2, udp +btest_connections_total, [udp], 1.0 +btest_connections_total, [tcp], 1.0 +sync, 3, udp +btest_connections_total, [udp], 2.0 +btest_connections_total, [tcp], 1.0 +sync, 4, udp +btest_connections_total, [udp], 2.0 +btest_connections_total, [tcp], 1.0 diff --git a/testing/btest/Baseline/scripts.base.frameworks.telemetry.sync-warnings/.stderr b/testing/btest/Baseline/scripts.base.frameworks.telemetry.sync-warnings/.stderr new file mode 100644 index 0000000000..32c85b57ed --- /dev/null +++ b/testing/btest/Baseline/scripts.base.frameworks.telemetry.sync-warnings/.stderr @@ -0,0 +1,3 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +warning: Telemetry::sync() hook invoked recursively +warning: Telemetry::sync() implementations skipped due to 'break' usage diff --git a/testing/btest/Baseline/scripts.base.frameworks.telemetry.sync-warnings/out b/testing/btest/Baseline/scripts.base.frameworks.telemetry.sync-warnings/out new file mode 100644 index 0000000000..a4801a85eb --- /dev/null +++ b/testing/btest/Baseline/scripts.base.frameworks.telemetry.sync-warnings/out @@ -0,0 +1,2 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +node up diff --git a/testing/btest/Baseline/scripts.base.frameworks.telemetry.sync/metrics1.txt b/testing/btest/Baseline/scripts.base.frameworks.telemetry.sync/metrics1.txt new file mode 100644 index 0000000000..7df20bcb5a --- /dev/null +++ b/testing/btest/Baseline/scripts.base.frameworks.telemetry.sync/metrics1.txt @@ -0,0 +1,2 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +btest_connections_total{proto="tcp"} 1 diff --git a/testing/btest/Baseline/scripts.base.frameworks.telemetry.sync/metrics2.txt b/testing/btest/Baseline/scripts.base.frameworks.telemetry.sync/metrics2.txt new file mode 100644 index 0000000000..db8cfd6872 --- /dev/null +++ b/testing/btest/Baseline/scripts.base.frameworks.telemetry.sync/metrics2.txt @@ -0,0 +1,3 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +btest_connections_total{proto="udp"} 1 +btest_connections_total{proto="tcp"} 1 diff --git a/testing/btest/Baseline/scripts.base.frameworks.telemetry.sync/metrics3.txt b/testing/btest/Baseline/scripts.base.frameworks.telemetry.sync/metrics3.txt new file mode 100644 index 0000000000..cce741488a --- /dev/null +++ b/testing/btest/Baseline/scripts.base.frameworks.telemetry.sync/metrics3.txt @@ -0,0 +1,3 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +btest_connections_total{proto="udp"} 2 +btest_connections_total{proto="tcp"} 1 diff --git a/testing/btest/Baseline/scripts.base.frameworks.telemetry.sync/zeek..stdout b/testing/btest/Baseline/scripts.base.frameworks.telemetry.sync/zeek..stdout new file mode 100644 index 0000000000..f3259c3511 --- /dev/null +++ b/testing/btest/Baseline/scripts.base.frameworks.telemetry.sync/zeek..stdout @@ -0,0 +1,5 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +node up +sync, 1, tcp +sync, 2, udp +sync, 3, udp diff --git a/testing/btest/Baseline/scripts.policy.frameworks.telemetry.log/telemetry.log.filtered b/testing/btest/Baseline/scripts.policy.frameworks.telemetry.log/telemetry.log.filtered index b5c04c3f44..27fe96a750 100644 --- a/testing/btest/Baseline/scripts.policy.frameworks.telemetry.log/telemetry.log.filtered +++ b/testing/btest/Baseline/scripts.policy.frameworks.telemetry.log/telemetry.log.filtered @@ -1,7 +1,71 @@ ### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. -XXXXXXXXXX.XXXXXX zeek counter zeek_ended_sessions_total reason inactivity 0.0 -XXXXXXXXXX.XXXXXX zeek counter zeek_total_sessions_total protocol tcp 1.0 -XXXXXXXXXX.XXXXXX zeek gauge zeek_active_sessions protocol tcp 1.0 -XXXXXXXXXX.XXXXXX zeek counter zeek_ended_sessions_total reason inactivity 0.0 -XXXXXXXXXX.XXXXXX zeek counter zeek_total_sessions_total protocol tcp 500.0 -XXXXXXXXXX.XXXXXX zeek gauge zeek_active_sessions protocol tcp 500.0 +1300475167.096535 zeek counter zeek_net_received_bytes_total (empty) (empty) 87.0 +1300475167.096535 zeek counter zeek_net_received_packets_total (empty) (empty) 1.0 +1300475167.096535 zeek counter zeek_ended_sessions_total reason inactivity 0.0 +1300475167.096535 zeek counter zeek_total_sessions_total protocol icmp 0.0 +1300475167.096535 zeek counter zeek_total_sessions_total protocol udp 1.0 +1300475167.096535 zeek counter zeek_total_sessions_total protocol tcp 0.0 +1300475167.096535 zeek gauge zeek_net_timestamp_seconds (empty) (empty) 1300475167.096535 +1300475167.096535 zeek gauge zeek_active_sessions protocol icmp 0.0 +1300475167.096535 zeek gauge zeek_active_sessions protocol udp 1.0 +1300475167.096535 zeek gauge zeek_active_sessions protocol tcp 0.0 +1300475168.652003 zeek counter zeek_net_received_bytes_total (empty) (empty) 1142.0 +1300475168.652003 zeek counter zeek_net_received_packets_total (empty) (empty) 6.0 +1300475168.652003 zeek counter zeek_ended_sessions_total reason inactivity 0.0 +1300475168.652003 zeek counter zeek_total_sessions_total protocol icmp 0.0 +1300475168.652003 zeek counter zeek_total_sessions_total protocol udp 3.0 +1300475168.652003 zeek counter zeek_total_sessions_total protocol tcp 1.0 +1300475168.652003 zeek gauge zeek_net_timestamp_seconds (empty) (empty) 1300475168.652003 +1300475168.652003 zeek gauge zeek_active_sessions protocol icmp 0.0 +1300475168.652003 zeek gauge zeek_active_sessions protocol udp 3.0 +1300475168.652003 zeek gauge zeek_active_sessions protocol tcp 1.0 +1300475169.780331 zeek counter zeek_net_received_bytes_total (empty) (empty) 23385.0 +1300475169.780331 zeek counter zeek_net_received_packets_total (empty) (empty) 113.0 +1300475169.780331 zeek counter zeek_ended_sessions_total reason inactivity 0.0 +1300475169.780331 zeek counter zeek_total_sessions_total protocol icmp 0.0 +1300475169.780331 zeek counter zeek_total_sessions_total protocol udp 17.0 +1300475169.780331 zeek counter zeek_total_sessions_total protocol tcp 10.0 +1300475169.780331 zeek gauge zeek_net_timestamp_seconds (empty) (empty) 1300475169.780331 +1300475169.780331 zeek gauge zeek_active_sessions protocol icmp 0.0 +1300475169.780331 zeek gauge zeek_active_sessions protocol udp 17.0 +1300475169.780331 zeek gauge zeek_active_sessions protocol tcp 10.0 +1300475170.811500 zeek counter zeek_net_received_bytes_total (empty) (empty) 23604.0 +1300475170.811500 zeek counter zeek_net_received_packets_total (empty) (empty) 116.0 +1300475170.811500 zeek counter zeek_ended_sessions_total reason inactivity 0.0 +1300475170.811500 zeek counter zeek_total_sessions_total protocol icmp 0.0 +1300475170.811500 zeek counter zeek_total_sessions_total protocol udp 18.0 +1300475170.811500 zeek counter zeek_total_sessions_total protocol tcp 10.0 +1300475170.811500 zeek gauge zeek_net_timestamp_seconds (empty) (empty) 1300475170.8115 +1300475170.811500 zeek gauge zeek_active_sessions protocol icmp 0.0 +1300475170.811500 zeek gauge zeek_active_sessions protocol udp 18.0 +1300475170.811500 zeek gauge zeek_active_sessions protocol tcp 10.0 +1300475171.886280 zeek counter zeek_net_received_bytes_total (empty) (empty) 24248.0 +1300475171.886280 zeek counter zeek_net_received_packets_total (empty) (empty) 124.0 +1300475171.886280 zeek counter zeek_ended_sessions_total reason inactivity 0.0 +1300475171.886280 zeek counter zeek_total_sessions_total protocol icmp 0.0 +1300475171.886280 zeek counter zeek_total_sessions_total protocol udp 21.0 +1300475171.886280 zeek counter zeek_total_sessions_total protocol tcp 10.0 +1300475171.886280 zeek gauge zeek_net_timestamp_seconds (empty) (empty) 1300475171.88628 +1300475171.886280 zeek gauge zeek_active_sessions protocol icmp 0.0 +1300475171.886280 zeek gauge zeek_active_sessions protocol udp 21.0 +1300475171.886280 zeek gauge zeek_active_sessions protocol tcp 10.0 +1300475173.116749 zeek counter zeek_net_received_bytes_total (empty) (empty) 24679.0 +1300475173.116749 zeek counter zeek_net_received_packets_total (empty) (empty) 129.0 +1300475173.116749 zeek counter zeek_ended_sessions_total reason inactivity 0.0 +1300475173.116749 zeek counter zeek_total_sessions_total protocol icmp 0.0 +1300475173.116749 zeek counter zeek_total_sessions_total protocol udp 22.0 +1300475173.116749 zeek counter zeek_total_sessions_total protocol tcp 10.0 +1300475173.116749 zeek gauge zeek_net_timestamp_seconds (empty) (empty) 1300475173.116749 +1300475173.116749 zeek gauge zeek_active_sessions protocol icmp 0.0 +1300475173.116749 zeek gauge zeek_active_sessions protocol udp 22.0 +1300475173.116749 zeek gauge zeek_active_sessions protocol tcp 10.0 +1300475173.475401 zeek counter zeek_net_received_bytes_total (empty) (empty) 25260.0 +1300475173.475401 zeek counter zeek_net_received_packets_total (empty) (empty) 136.0 +1300475173.475401 zeek counter zeek_ended_sessions_total reason inactivity 0.0 +1300475173.475401 zeek counter zeek_total_sessions_total protocol icmp 0.0 +1300475173.475401 zeek counter zeek_total_sessions_total protocol udp 24.0 +1300475173.475401 zeek counter zeek_total_sessions_total protocol tcp 10.0 +1300475173.475401 zeek gauge zeek_net_timestamp_seconds (empty) (empty) 1300475173.475401 +1300475173.475401 zeek gauge zeek_active_sessions protocol icmp 0.0 +1300475173.475401 zeek gauge zeek_active_sessions protocol udp 24.0 +1300475173.475401 zeek gauge zeek_active_sessions protocol tcp 9.0 diff --git a/testing/btest/Baseline/scripts.policy.frameworks.telemetry.log/telemetry_histogram.log.filtered b/testing/btest/Baseline/scripts.policy.frameworks.telemetry.log/telemetry_histogram.log.filtered index d47ba69d07..28672af18d 100644 --- a/testing/btest/Baseline/scripts.policy.frameworks.telemetry.log/telemetry_histogram.log.filtered +++ b/testing/btest/Baseline/scripts.policy.frameworks.telemetry.log/telemetry_histogram.log.filtered @@ -1,3 +1,8 @@ ### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. -XXXXXXXXXX.XXXXXX zeek zeek_connection_duration_seconds (empty) (empty) 2.0,3.0,4.0,5.0,6.0,10.0,inf 0.0,0.0,0.0,0.0,0.0,0.0,0.0 0.0 0.0 -XXXXXXXXXX.XXXXXX zeek zeek_connection_duration_seconds (empty) (empty) 2.0,3.0,4.0,5.0,6.0,10.0,inf 0.0,322.0,90.0,5.0,76.0,7.0,0.0 1650.264644 500.0 +XXXXXXXXXX.XXXXXX zeek zeek_connection_duration_seconds (empty) (empty) 0.0001,0.001,0.01,0.1,0.5,1.0,5.0,10.0,30.0,60.0,inf 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 0.0 0.0 +XXXXXXXXXX.XXXXXX zeek zeek_connection_duration_seconds (empty) (empty) 0.0001,0.001,0.01,0.1,0.5,1.0,5.0,10.0,30.0,60.0,inf 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 0.0 0.0 +XXXXXXXXXX.XXXXXX zeek zeek_connection_duration_seconds (empty) (empty) 0.0001,0.001,0.01,0.1,0.5,1.0,5.0,10.0,30.0,60.0,inf 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 0.0 0.0 +XXXXXXXXXX.XXXXXX zeek zeek_connection_duration_seconds (empty) (empty) 0.0001,0.001,0.01,0.1,0.5,1.0,5.0,10.0,30.0,60.0,inf 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 0.0 0.0 +XXXXXXXXXX.XXXXXX zeek zeek_connection_duration_seconds (empty) (empty) 0.0001,0.001,0.01,0.1,0.5,1.0,5.0,10.0,30.0,60.0,inf 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 0.0 0.0 +XXXXXXXXXX.XXXXXX zeek zeek_connection_duration_seconds (empty) (empty) 0.0001,0.001,0.01,0.1,0.5,1.0,5.0,10.0,30.0,60.0,inf 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 0.0 0.0 +XXXXXXXXXX.XXXXXX zeek zeek_connection_duration_seconds (empty) (empty) 0.0001,0.001,0.01,0.1,0.5,1.0,5.0,10.0,30.0,60.0,inf 6.0,14.0,0.0,3.0,10.0,0.0,1.0,0.0,0.0,0.0,0.0 4.651298 34.0 diff --git a/testing/btest/scripts/base/frameworks/telemetry/basic.zeek b/testing/btest/scripts/base/frameworks/telemetry/basic.zeek index 0592bff684..14b7ea9904 100644 --- a/testing/btest/scripts/base/frameworks/telemetry/basic.zeek +++ b/testing/btest/scripts/base/frameworks/telemetry/basic.zeek @@ -1,5 +1,5 @@ # @TEST-DOC: Using and listing of counters and gauges using the telemetry module. -# Note compilable to C++ due to globals being initialized to a record that +# Not compilable to C++ due to globals being initialized to a record that # has an opaque type as a field. # @TEST-REQUIRES: test "${ZEEK_USE_CPP}" != "1" # @TEST-EXEC: zcat <$TRACES/echo-connections.pcap.gz | zeek -b -Cr - %INPUT > out diff --git a/testing/btest/scripts/base/frameworks/telemetry/conn-duration-histogram.zeek b/testing/btest/scripts/base/frameworks/telemetry/conn-duration-histogram.zeek index 3f01d9ddf3..c7ab5651fa 100644 --- a/testing/btest/scripts/base/frameworks/telemetry/conn-duration-histogram.zeek +++ b/testing/btest/scripts/base/frameworks/telemetry/conn-duration-histogram.zeek @@ -1,4 +1,4 @@ -# Note compilable to C++ due to globals being initialized to a record that +# Not compilable to C++ due to globals being initialized to a record that # has an opaque type as a field. # @TEST-REQUIRES: test "${ZEEK_USE_CPP}" != "1" # @TEST-EXEC: zcat <$TRACES/echo-connections.pcap.gz | zeek -b -Cr - %INPUT > out diff --git a/testing/btest/scripts/base/frameworks/telemetry/event-handler-invocations.zeek b/testing/btest/scripts/base/frameworks/telemetry/event-handler-invocations.zeek index c0a9c73b2d..7fd72f92b3 100644 --- a/testing/btest/scripts/base/frameworks/telemetry/event-handler-invocations.zeek +++ b/testing/btest/scripts/base/frameworks/telemetry/event-handler-invocations.zeek @@ -1,6 +1,6 @@ # @TEST-DOC: Query for zeek event-handler-invocations metrics counting number of times handlers were called. -# Note compilable to C++ due to globals being initialized to a record that +# Not compilable to C++ due to globals being initialized to a record that # has an opaque type as a field. # @TEST-REQUIRES: test "${ZEEK_USE_CPP}" != "1" # @TEST-EXEC: zcat <$TRACES/echo-connections.pcap.gz | zeek -b -Cr - %INPUT > out diff --git a/testing/btest/scripts/base/frameworks/telemetry/internal-metrics.zeek b/testing/btest/scripts/base/frameworks/telemetry/internal-metrics.zeek index d130a22e48..0c98d40e1c 100644 --- a/testing/btest/scripts/base/frameworks/telemetry/internal-metrics.zeek +++ b/testing/btest/scripts/base/frameworks/telemetry/internal-metrics.zeek @@ -1,5 +1,5 @@ # @TEST-DOC: Query Broker's telemetry to verify it ends up in Zeek's registry. -# Note compilable to C++ due to globals being initialized to a record that +# Not compilable to C++ due to globals being initialized to a record that # has an opaque type as a field. # @TEST-REQUIRES: test "${ZEEK_USE_CPP}" != "1" # @TEST-EXEC: zcat <$TRACES/echo-connections.pcap.gz | zeek -b -Cr - %INPUT > out diff --git a/testing/btest/scripts/base/frameworks/telemetry/sync-collect.zeek b/testing/btest/scripts/base/frameworks/telemetry/sync-collect.zeek new file mode 100644 index 0000000000..c2d5de46fe --- /dev/null +++ b/testing/btest/scripts/base/frameworks/telemetry/sync-collect.zeek @@ -0,0 +1,48 @@ +# @TEST-DOC: Calling collect_metrics() invokes Telemetry::sync. +# Not compilable to C++ due to globals being initialized to a record that +# has an opaque type as a field. +# @TEST-REQUIRES: test "${ZEEK_USE_CPP}" != "1" +# +# @TEST-EXEC: zeek -b %INPUT >out +# @TEST-EXEC: btest-diff out + + +@load base/frameworks/telemetry + +global connections_by_proto_cf = Telemetry::register_counter_family([ + $prefix="btest", + $name="connections", + $unit="", + $help_text="Total number of monitored connections", + $label_names=vector("proto") +]); + +function print_metrics(ms: vector of Telemetry::Metric) { + for (_, m in ms) { + print m$opts$name, m$label_values, m$value; + } +} + +event zeek_init() + { + print "node up"; + local ms = Telemetry::collect_metrics("btest"); + print_metrics(ms); + ms = Telemetry::collect_metrics("btest"); + print_metrics(ms); + ms = Telemetry::collect_metrics("btest"); + print_metrics(ms); + local hm = Telemetry::collect_histogram_metrics("btest"); + print_metrics(ms); + } + + +global sync_calls = 0; + +hook Telemetry::sync() + { + ++sync_calls; + local proto = sync_calls == 1 ? "tcp" : "udp"; + print "sync", sync_calls, proto; + Telemetry::counter_family_inc(connections_by_proto_cf, vector(proto)); + } diff --git a/testing/btest/scripts/base/frameworks/telemetry/sync-warnings.zeek b/testing/btest/scripts/base/frameworks/telemetry/sync-warnings.zeek new file mode 100644 index 0000000000..971cf88408 --- /dev/null +++ b/testing/btest/scripts/base/frameworks/telemetry/sync-warnings.zeek @@ -0,0 +1,43 @@ +# @TEST-DOC: Breaking and recursive Telemetry::sync() warning +# Not compilable to C++ due to globals being initialized to a record that +# has an opaque type as a field. +# @TEST-REQUIRES: test "${ZEEK_USE_CPP}" != "1" +# +# @TEST-EXEC: zeek -b %INPUT >out +# @TEST-EXEC: btest-diff out +# @TEST-EXEC: btest-diff .stderr + + +@load base/frameworks/telemetry + +global connections_by_proto_cf = Telemetry::register_counter_family([ + $prefix="btest", + $name="connections", + $unit="", + $help_text="Total number of monitored connections", + $label_names=vector("proto") +]); + +event zeek_init() + { + print "node up"; + Telemetry::counter_family_inc(connections_by_proto_cf, vector("tcp")); + local ms = Telemetry::collect_metrics("btest"); + } + + +hook Telemetry::sync() + { + # Calling collect_metrics() in Telemetry::sync() is not good as + # it would invoke Telemetry::sync() recursively. The manager will + # emit a warning and not run the second Telemetry::sync() invocation. + local ms = Telemetry::collect_metrics("btest"); + } + +hook Telemetry::sync() &priority=-100 + { + # break is not good as it prevents other Telemetry::sync() hooks + # from running. This will produce a warning. + # We could find this via script validation if we wanted to. + break; + } diff --git a/testing/btest/scripts/base/frameworks/telemetry/sync.zeek b/testing/btest/scripts/base/frameworks/telemetry/sync.zeek new file mode 100644 index 0000000000..8cecb911ad --- /dev/null +++ b/testing/btest/scripts/base/frameworks/telemetry/sync.zeek @@ -0,0 +1,68 @@ +# @TEST-DOC: Verify Telemetry::sync() is invoked for metric scraping via the Prometheus HTTP endpoint. +# Not compilable to C++ due to globals being initialized to a record that +# has an opaque type as a field. +# @TEST-REQUIRES: test "${ZEEK_USE_CPP}" != "1" +# @TEST-REQUIRES: which jq +# @TEST-REQUIRES: which curl +# +# @TEST-PORT: METRICS_PORT +# +# @TEST-EXEC: chmod +x fetch-metrics.sh +# @TEST-EXEC: zeek --parse-only %INPUT +# @TEST-EXEC: btest-bg-run zeek ZEEKPATH=$ZEEKPATH:.. zeek -b %INPUT +# @TEST-EXEC: $SCRIPTS/wait-for-file zeek/up 5 || (btest-bg-wait -k 1 && false) +# @TEST-EXEC: ./fetch-metrics.sh 1.trace metrics1.txt +# @TEST-EXEC: ./fetch-metrics.sh 2.trace metrics2.txt +# @TEST-EXEC: ./fetch-metrics.sh 3.trace metrics3.txt +# @TEST-EXEC: btest-bg-wait 10 +# +# @TEST-EXEC: btest-diff zeek/.stdout +# @TEST-EXEC: btest-diff metrics1.txt +# @TEST-EXEC: btest-diff metrics2.txt +# @TEST-EXEC: btest-diff metrics3.txt + +@TEST-START-FILE fetch-metrics.sh +#! /usr/bin/env bash +set -ux +trace_file=$1 +output_file=$2 + +PORT=$(echo ${METRICS_PORT} | cut -d '/' -f 1) +URL=http://localhost:${PORT}/metrics + +curl -m 5 --trace $trace_file $URL | grep ^btest > $output_file + +exit 0 +@TEST-END-FILE + +@load base/frameworks/telemetry + +redef exit_only_after_terminate = T; +redef Telemetry::metrics_port = to_port(getenv("METRICS_PORT")); + +event zeek_init() + { + print "node up"; + system("touch up"); + } + +global connections_by_proto_cf = Telemetry::register_counter_family([ + $prefix="btest", + $name="connections", + $unit="", + $help_text="Total number of monitored connections", + $label_names=vector("proto") +]); + +global sync_calls = 0; + +hook Telemetry::sync() + { + ++sync_calls; + local proto = sync_calls == 1 ? "tcp" : "udp"; + print "sync", sync_calls, proto; + Telemetry::counter_family_inc(connections_by_proto_cf, vector(proto)); + + if ( sync_calls == 3 ) + terminate(); + } diff --git a/testing/btest/scripts/policy/frameworks/telemetry/log.zeek b/testing/btest/scripts/policy/frameworks/telemetry/log.zeek index ab74478913..1faff83719 100644 --- a/testing/btest/scripts/policy/frameworks/telemetry/log.zeek +++ b/testing/btest/scripts/policy/frameworks/telemetry/log.zeek @@ -1,20 +1,22 @@ # @TEST-DOC: Test loading of telemetry/log and smoke check the telemetry.log file -# @TEST-EXEC: zcat <$TRACES/echo-connections.pcap.gz | zeek -b -Cr - %INPUT > out -# @TEST-EXEC: grep 'zeek.*sessions' telemetry.log > telemetry.log.filtered +# @TEST-EXEC: zeek -b -Cr $TRACES/wikipedia.trace %INPUT > out +# @TEST-EXEC: grep -E 'zeek_(net|.*sessions)' telemetry.log > telemetry.log.filtered # @TEST-EXEC: grep 'zeek.*connection_duration' telemetry_histogram.log > telemetry_histogram.log.filtered -# @TEST-EXEC: btest-diff telemetry.log.filtered +# @TEST-EXEC: TEST_DIFF_CANONIFIER= btest-diff telemetry.log.filtered # @TEST-EXEC: btest-diff telemetry_histogram.log.filtered +@load misc/stats @load frameworks/telemetry/log +redef Telemetry::log_interval = 1sec; global connection_duration_hf = Telemetry::register_histogram_family([ $prefix="zeek", $name="connection_duration", $unit="seconds", $help_text="Monitored connection duration", - $bounds=vector(2.0, 3.0, 4.0, 5.0, 6.0, 10.0) + $bounds=vector(0.0001, 0.001, 0.01, 0.1, 0.5, 1.0, 5.0, 10.0, 30.0, 60.0) ]); global connection_duration_h = Telemetry::histogram_with(connection_duration_hf); diff --git a/testing/btest/scripts/policy/frameworks/telemetry/prometheus.zeek b/testing/btest/scripts/policy/frameworks/telemetry/prometheus.zeek index 50a6fd8d2c..231f743999 100644 --- a/testing/btest/scripts/policy/frameworks/telemetry/prometheus.zeek +++ b/testing/btest/scripts/policy/frameworks/telemetry/prometheus.zeek @@ -1,5 +1,5 @@ # @TEST-DOC: Query the Prometheus endpoint and smoke check that zeek_version_info{...} is contained in the response for all cluster nodes. -# Note compilable to C++ due to globals being initialized to a record that +# Not compilable to C++ due to globals being initialized to a record that # has an opaque type as a field. # @TEST-REQUIRES: test "${ZEEK_USE_CPP}" != "1" # @TEST-REQUIRES: which jq