Merge remote-tracking branch 'origin/topic/awelzel/3947-telemetry-hook-scrape'

* origin/topic/awelzel/3947-telemetry-hook-scrape:
  btest/telemetry: Fix "Note compilable" typo
  misc/stats: Add zeek_net_timestamp_seconds
  telemetry/Manager: Remove variant include
  telemetry: Invoke Telemetry::sync() only at scrape/collection time
This commit is contained in:
Arne Welzel 2024-10-22 19:04:22 +02:00
commit daa358c840
27 changed files with 386 additions and 38 deletions

24
CHANGES
View file

@ -1,3 +1,27 @@
7.1.0-dev.418 | 2024-10-22 19:04:22 +0200
* btest/telemetry: Fix "Note compilable" typo (Arne Welzel, Corelight)
* misc/stats: Add zeek_net_timestamp_seconds (Arne Welzel, Corelight)
Adding a metric for the network time value itself should make it
possible to observe it stopping or growing slowly as compared to
realtime when Zeek isn't able to keep up.
Also, modify the telemetry/log.zeek test to include misc/stats and
log at a higher frequency with a more interesting pcap.
* telemetry/Manager: Remove variant include (Arne Welzel, Corelight)
* GH-3947: telemetry: Invoke Telemetry::sync() only at scrape/collection time (Arne Welzel, Corelight)
This stops invoking Telemetry::sync() via a scheduled event and instead
only invokes it on-demand. This makes metric collection network time
independent and lazier, too.
With Prometheus scrape requests being processed on Zeek's main thread
now, we can safely invoke the script layer Telemetry::sync() hook.
7.1.0-dev.413 | 2024-10-22 11:24:50 +0200
* GH-3978: Bump zeekjs to 0.12.1 (Arne Welzel, Corelight)

4
NEWS
View file

@ -93,6 +93,10 @@ Changed Functionality
* The ASCII input reader now suppresses warnings for consecutive invalid lines,
producing a summary of total suppressions once a valid line is encountered.
* The `Telemetry::sync()` hook is now invoked on demand. Either when the metrics
of a node are scraped via the Prometheus HTTP endpoint, or one of the collect
methods is invoked from Zeek script.
Removed Functionality
---------------------

View file

@ -1 +1 @@
7.1.0-dev.413
7.1.0-dev.418

View file

@ -263,22 +263,15 @@ export {
label_values: labels_vector,
measurement: double): bool;
## Telemetry sync hook.
##
## This hook is invoked every :zeek:see:`Telemetry::sync_interval`
## for script writers to synchronize or mirror metrics with the
## telemetry subsystem. For example, when tracking table or value
## footprints with gauges, the value in question can be set on an actual
## :zeek:see:`Telemetry::Gauge` instance during execution of this hook.
##
## Implementations should be lightweight, this hook may be called
## multiple times per minute. The interval can increased by changing
## :zeek:see:`Telemetry::sync_interval` at the cost of delaying
## metric updates and thereby reducing granularity.
global sync: hook();
## Interval at which the :zeek:see:`Telemetry::sync` hook is invoked.
option sync_interval = 10sec;
##
## By default, the hook is invoked on demand, setting this option to
## a positive interval allows to invoke it regularly, too. Regular
## invocations are relative to Zeek's network time.
##
## Note that on-demand hook invocation will happen even if this
## is set.
option sync_interval = 0sec &deprecated="Remove in 8.1. If you require regular sync invocation, do so explicitly in a scheduled event.";
## Collect all counter and gauge metrics matching the given *name* and *prefix*.
##
@ -493,7 +486,9 @@ function collect_histogram_metrics(prefix: string, name: string): vector of Hist
event run_sync_hook()
{
hook Telemetry::sync();
@pragma push ignore-deprecations
schedule sync_interval { run_sync_hook() };
@pragma pop ignore-deprecations
}
# Expose the Zeek version as Prometheus style info metric
@ -508,7 +503,10 @@ global version_gauge_family = Telemetry::register_gauge_family([
event zeek_init()
{
schedule sync_interval { run_sync_hook() };
@pragma push ignore-deprecations
if ( sync_interval > 0sec )
schedule sync_interval { run_sync_hook() };
@pragma pop ignore-deprecations
local v = Version::info;
local labels = vector(cat(v$version_number),

View file

@ -5933,6 +5933,21 @@ export {
sum: double;
};
## Telemetry sync hook.
##
## This hook is invoked when metrics are requested via functions
## :zeek:see:`Telemetry::collect_metrics` and :zeek:see:`Telemetry::collect_histogram_metrics`,
## or just before Zeek collects metrics when being scraped through
## its Prometheus endpoint.
## Script writers can use it to synchronize (or mirror) metrics with the
## telemetry subsystem. For example, when tracking table or value
## footprints with gauges, the value in question can be set on an actual
## :zeek:see:`Telemetry::Gauge` instance during execution of this hook.
##
## Implementations should be lightweight, this hook may be called
## multiple times per minute.
global sync: hook();
type MetricVector : vector of Metric;
type HistogramMetricVector : vector of HistogramMetric;

View file

@ -130,9 +130,20 @@ global packet_lag_gf = Telemetry::register_gauge_family([
$help_text="Difference of network time and wallclock time in seconds.",
]);
# Gauge as motivated by:
# https://www.robustperception.io/are-increasing-timestamps-counters-or-gauges/
global network_time_cf = Telemetry::register_gauge_family([
$prefix="zeek",
$name="net-timestamp",
$unit="seconds",
$help_text="The current network time.",
]);
global no_labels: vector of string;
hook Telemetry::sync() {
hook Telemetry::sync()
{
Telemetry::gauge_family_set(network_time_cf, no_labels, time_to_double(network_time()));
local net_stats = get_net_stats();
Telemetry::counter_family_set(bytes_received_cf, no_labels, net_stats$bytes_recvd);
Telemetry::counter_family_set(packets_received_cf, no_labels, net_stats$pkts_recvd);
@ -148,7 +159,7 @@ hook Telemetry::sync() {
Telemetry::gauge_family_set(packet_lag_gf, no_labels,
interval_to_double(current_time() - network_time()));
}
}
}
event zeek_init() &priority=5
{

View file

@ -13,9 +13,9 @@
#include <rapidjson/writer.h>
#include <algorithm>
#include <thread>
#include <variant>
#include "zeek/3rdparty/doctest.h"
#include "zeek/Func.h"
#include "zeek/ID.h"
#include "zeek/RunState.h"
#include "zeek/ZeekString.h"
@ -256,6 +256,28 @@ static bool compare_histograms(const std::optional<ZVal>& a, const std::optional
return comparer(a, b, metric_record_type);
}
void Manager::InvokeTelemetrySyncHook() {
static const auto sync_hook = zeek::id::find_func("Telemetry::sync");
if ( sync_hook->Flavor() != FUNC_FLAVOR_HOOK )
reporter->InternalError("Telemetry::sync not a hook?");
if ( in_sync_hook ) {
reporter->Warning("Telemetry::sync() hook invoked recursively");
return;
}
in_sync_hook = true;
zeek::Args empty;
auto result = sync_hook->Invoke(&empty);
if ( ! result->IsOne() )
reporter->Warning("Telemetry::sync() implementations skipped due to 'break' usage");
in_sync_hook = false;
}
ValPtr Manager::CollectMetrics(std::string_view prefix_pattern, std::string_view name_pattern) {
static auto metrics_vector_type = zeek::id::find_type<VectorType>("Telemetry::MetricVector");
static auto string_vec_type = zeek::id::find_type<zeek::VectorType>("string_vec");
@ -268,6 +290,8 @@ ValPtr Manager::CollectMetrics(std::string_view prefix_pattern, std::string_view
static auto metric_opts_type = zeek::id::find_type<zeek::RecordType>("Telemetry::MetricOpts");
static auto metric_type_idx = metric_opts_type->FieldOffset("metric_type");
InvokeTelemetrySyncHook();
VectorValPtr ret_val = make_intrusive<VectorVal>(metrics_vector_type);
// Due to the name containing the full information about a metric including a potential unit add an
@ -345,6 +369,8 @@ ValPtr Manager::CollectHistogramMetrics(std::string_view prefix_pattern, std::st
static auto metric_opts_type = zeek::id::find_type<zeek::RecordType>("Telemetry::MetricOpts");
static auto metric_type_idx = metric_opts_type->FieldOffset("metric_type");
InvokeTelemetrySyncHook();
VectorValPtr ret_val = make_intrusive<VectorVal>(metrics_vector_type);
// Due to the name containing the full information about a metric including a potential unit add an
@ -574,6 +600,8 @@ void Manager::ProcessFd(int fd, int flags) {
for ( const auto& [name, f] : families )
f->RunCallbacks();
InvokeTelemetrySyncHook();
collector_response_idx = collector_request_idx;
lk.unlock();

View file

@ -259,6 +259,13 @@ private:
RecordValPtr GetMetricOptsRecord(const prometheus::MetricFamily& metric_family);
void BuildClusterJson();
/**
* Runs the Telemetry::sync() hook in Zeek script land.
*/
void InvokeTelemetrySyncHook();
bool in_sync_hook = false;
std::map<std::string, std::shared_ptr<MetricFamily>> families;
std::map<std::string, RecordValPtr> opts_records;

View file

@ -1,2 +1,2 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
[[name=Broker::log_flush, times_called=2], [name=ChecksumOffloading::check, times_called=2], [name=NetControl::init, times_called=1], [name=analyzer_confirmation_info, times_called=1], [name=connection_established, times_called=1], [name=connection_state_remove, times_called=1], [name=file_new, times_called=1], [name=file_over_new_connection, times_called=1], [name=file_sniff, times_called=1], [name=file_state_remove, times_called=1], [name=filter_change_tracking, times_called=3], [name=get_file_handle, times_called=4], [name=http_begin_entity, times_called=2], [name=http_end_entity, times_called=2], [name=http_header, times_called=13], [name=http_message_done, times_called=2], [name=http_reply, times_called=1], [name=http_request, times_called=1], [name=net_done, times_called=1], [name=new_connection, times_called=1], [name=run_sync_hook, times_called=2], [name=zeek_done, times_called=1], [name=zeek_init, times_called=1]]
[[name=Broker::log_flush, times_called=2], [name=ChecksumOffloading::check, times_called=2], [name=NetControl::init, times_called=1], [name=analyzer_confirmation_info, times_called=1], [name=connection_established, times_called=1], [name=connection_state_remove, times_called=1], [name=file_new, times_called=1], [name=file_over_new_connection, times_called=1], [name=file_sniff, times_called=1], [name=file_state_remove, times_called=1], [name=filter_change_tracking, times_called=3], [name=get_file_handle, times_called=4], [name=http_begin_entity, times_called=2], [name=http_end_entity, times_called=2], [name=http_header, times_called=13], [name=http_message_done, times_called=2], [name=http_reply, times_called=1], [name=http_request, times_called=1], [name=net_done, times_called=1], [name=new_connection, times_called=1], [name=zeek_done, times_called=1], [name=zeek_init, times_called=1]]

View file

@ -0,0 +1,13 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
node up
sync, 1, tcp
btest_connections_total, [tcp], 1.0
sync, 2, udp
btest_connections_total, [udp], 1.0
btest_connections_total, [tcp], 1.0
sync, 3, udp
btest_connections_total, [udp], 2.0
btest_connections_total, [tcp], 1.0
sync, 4, udp
btest_connections_total, [udp], 2.0
btest_connections_total, [tcp], 1.0

View file

@ -0,0 +1,3 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
warning: Telemetry::sync() hook invoked recursively
warning: Telemetry::sync() implementations skipped due to 'break' usage

View file

@ -0,0 +1,2 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
node up

View file

@ -0,0 +1,2 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
btest_connections_total{proto="tcp"} 1

View file

@ -0,0 +1,3 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
btest_connections_total{proto="udp"} 1
btest_connections_total{proto="tcp"} 1

View file

@ -0,0 +1,3 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
btest_connections_total{proto="udp"} 2
btest_connections_total{proto="tcp"} 1

View file

@ -0,0 +1,5 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
node up
sync, 1, tcp
sync, 2, udp
sync, 3, udp

View file

@ -1,7 +1,71 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
XXXXXXXXXX.XXXXXX zeek counter zeek_ended_sessions_total reason inactivity 0.0
XXXXXXXXXX.XXXXXX zeek counter zeek_total_sessions_total protocol tcp 1.0
XXXXXXXXXX.XXXXXX zeek gauge zeek_active_sessions protocol tcp 1.0
XXXXXXXXXX.XXXXXX zeek counter zeek_ended_sessions_total reason inactivity 0.0
XXXXXXXXXX.XXXXXX zeek counter zeek_total_sessions_total protocol tcp 500.0
XXXXXXXXXX.XXXXXX zeek gauge zeek_active_sessions protocol tcp 500.0
1300475167.096535 zeek counter zeek_net_received_bytes_total (empty) (empty) 87.0
1300475167.096535 zeek counter zeek_net_received_packets_total (empty) (empty) 1.0
1300475167.096535 zeek counter zeek_ended_sessions_total reason inactivity 0.0
1300475167.096535 zeek counter zeek_total_sessions_total protocol icmp 0.0
1300475167.096535 zeek counter zeek_total_sessions_total protocol udp 1.0
1300475167.096535 zeek counter zeek_total_sessions_total protocol tcp 0.0
1300475167.096535 zeek gauge zeek_net_timestamp_seconds (empty) (empty) 1300475167.096535
1300475167.096535 zeek gauge zeek_active_sessions protocol icmp 0.0
1300475167.096535 zeek gauge zeek_active_sessions protocol udp 1.0
1300475167.096535 zeek gauge zeek_active_sessions protocol tcp 0.0
1300475168.652003 zeek counter zeek_net_received_bytes_total (empty) (empty) 1142.0
1300475168.652003 zeek counter zeek_net_received_packets_total (empty) (empty) 6.0
1300475168.652003 zeek counter zeek_ended_sessions_total reason inactivity 0.0
1300475168.652003 zeek counter zeek_total_sessions_total protocol icmp 0.0
1300475168.652003 zeek counter zeek_total_sessions_total protocol udp 3.0
1300475168.652003 zeek counter zeek_total_sessions_total protocol tcp 1.0
1300475168.652003 zeek gauge zeek_net_timestamp_seconds (empty) (empty) 1300475168.652003
1300475168.652003 zeek gauge zeek_active_sessions protocol icmp 0.0
1300475168.652003 zeek gauge zeek_active_sessions protocol udp 3.0
1300475168.652003 zeek gauge zeek_active_sessions protocol tcp 1.0
1300475169.780331 zeek counter zeek_net_received_bytes_total (empty) (empty) 23385.0
1300475169.780331 zeek counter zeek_net_received_packets_total (empty) (empty) 113.0
1300475169.780331 zeek counter zeek_ended_sessions_total reason inactivity 0.0
1300475169.780331 zeek counter zeek_total_sessions_total protocol icmp 0.0
1300475169.780331 zeek counter zeek_total_sessions_total protocol udp 17.0
1300475169.780331 zeek counter zeek_total_sessions_total protocol tcp 10.0
1300475169.780331 zeek gauge zeek_net_timestamp_seconds (empty) (empty) 1300475169.780331
1300475169.780331 zeek gauge zeek_active_sessions protocol icmp 0.0
1300475169.780331 zeek gauge zeek_active_sessions protocol udp 17.0
1300475169.780331 zeek gauge zeek_active_sessions protocol tcp 10.0
1300475170.811500 zeek counter zeek_net_received_bytes_total (empty) (empty) 23604.0
1300475170.811500 zeek counter zeek_net_received_packets_total (empty) (empty) 116.0
1300475170.811500 zeek counter zeek_ended_sessions_total reason inactivity 0.0
1300475170.811500 zeek counter zeek_total_sessions_total protocol icmp 0.0
1300475170.811500 zeek counter zeek_total_sessions_total protocol udp 18.0
1300475170.811500 zeek counter zeek_total_sessions_total protocol tcp 10.0
1300475170.811500 zeek gauge zeek_net_timestamp_seconds (empty) (empty) 1300475170.8115
1300475170.811500 zeek gauge zeek_active_sessions protocol icmp 0.0
1300475170.811500 zeek gauge zeek_active_sessions protocol udp 18.0
1300475170.811500 zeek gauge zeek_active_sessions protocol tcp 10.0
1300475171.886280 zeek counter zeek_net_received_bytes_total (empty) (empty) 24248.0
1300475171.886280 zeek counter zeek_net_received_packets_total (empty) (empty) 124.0
1300475171.886280 zeek counter zeek_ended_sessions_total reason inactivity 0.0
1300475171.886280 zeek counter zeek_total_sessions_total protocol icmp 0.0
1300475171.886280 zeek counter zeek_total_sessions_total protocol udp 21.0
1300475171.886280 zeek counter zeek_total_sessions_total protocol tcp 10.0
1300475171.886280 zeek gauge zeek_net_timestamp_seconds (empty) (empty) 1300475171.88628
1300475171.886280 zeek gauge zeek_active_sessions protocol icmp 0.0
1300475171.886280 zeek gauge zeek_active_sessions protocol udp 21.0
1300475171.886280 zeek gauge zeek_active_sessions protocol tcp 10.0
1300475173.116749 zeek counter zeek_net_received_bytes_total (empty) (empty) 24679.0
1300475173.116749 zeek counter zeek_net_received_packets_total (empty) (empty) 129.0
1300475173.116749 zeek counter zeek_ended_sessions_total reason inactivity 0.0
1300475173.116749 zeek counter zeek_total_sessions_total protocol icmp 0.0
1300475173.116749 zeek counter zeek_total_sessions_total protocol udp 22.0
1300475173.116749 zeek counter zeek_total_sessions_total protocol tcp 10.0
1300475173.116749 zeek gauge zeek_net_timestamp_seconds (empty) (empty) 1300475173.116749
1300475173.116749 zeek gauge zeek_active_sessions protocol icmp 0.0
1300475173.116749 zeek gauge zeek_active_sessions protocol udp 22.0
1300475173.116749 zeek gauge zeek_active_sessions protocol tcp 10.0
1300475173.475401 zeek counter zeek_net_received_bytes_total (empty) (empty) 25260.0
1300475173.475401 zeek counter zeek_net_received_packets_total (empty) (empty) 136.0
1300475173.475401 zeek counter zeek_ended_sessions_total reason inactivity 0.0
1300475173.475401 zeek counter zeek_total_sessions_total protocol icmp 0.0
1300475173.475401 zeek counter zeek_total_sessions_total protocol udp 24.0
1300475173.475401 zeek counter zeek_total_sessions_total protocol tcp 10.0
1300475173.475401 zeek gauge zeek_net_timestamp_seconds (empty) (empty) 1300475173.475401
1300475173.475401 zeek gauge zeek_active_sessions protocol icmp 0.0
1300475173.475401 zeek gauge zeek_active_sessions protocol udp 24.0
1300475173.475401 zeek gauge zeek_active_sessions protocol tcp 9.0

View file

@ -1,3 +1,8 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
XXXXXXXXXX.XXXXXX zeek zeek_connection_duration_seconds (empty) (empty) 2.0,3.0,4.0,5.0,6.0,10.0,inf 0.0,0.0,0.0,0.0,0.0,0.0,0.0 0.0 0.0
XXXXXXXXXX.XXXXXX zeek zeek_connection_duration_seconds (empty) (empty) 2.0,3.0,4.0,5.0,6.0,10.0,inf 0.0,322.0,90.0,5.0,76.0,7.0,0.0 1650.264644 500.0
XXXXXXXXXX.XXXXXX zeek zeek_connection_duration_seconds (empty) (empty) 0.0001,0.001,0.01,0.1,0.5,1.0,5.0,10.0,30.0,60.0,inf 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 0.0 0.0
XXXXXXXXXX.XXXXXX zeek zeek_connection_duration_seconds (empty) (empty) 0.0001,0.001,0.01,0.1,0.5,1.0,5.0,10.0,30.0,60.0,inf 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 0.0 0.0
XXXXXXXXXX.XXXXXX zeek zeek_connection_duration_seconds (empty) (empty) 0.0001,0.001,0.01,0.1,0.5,1.0,5.0,10.0,30.0,60.0,inf 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 0.0 0.0
XXXXXXXXXX.XXXXXX zeek zeek_connection_duration_seconds (empty) (empty) 0.0001,0.001,0.01,0.1,0.5,1.0,5.0,10.0,30.0,60.0,inf 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 0.0 0.0
XXXXXXXXXX.XXXXXX zeek zeek_connection_duration_seconds (empty) (empty) 0.0001,0.001,0.01,0.1,0.5,1.0,5.0,10.0,30.0,60.0,inf 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 0.0 0.0
XXXXXXXXXX.XXXXXX zeek zeek_connection_duration_seconds (empty) (empty) 0.0001,0.001,0.01,0.1,0.5,1.0,5.0,10.0,30.0,60.0,inf 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 0.0 0.0
XXXXXXXXXX.XXXXXX zeek zeek_connection_duration_seconds (empty) (empty) 0.0001,0.001,0.01,0.1,0.5,1.0,5.0,10.0,30.0,60.0,inf 6.0,14.0,0.0,3.0,10.0,0.0,1.0,0.0,0.0,0.0,0.0 4.651298 34.0

View file

@ -1,5 +1,5 @@
# @TEST-DOC: Using and listing of counters and gauges using the telemetry module.
# Note compilable to C++ due to globals being initialized to a record that
# Not compilable to C++ due to globals being initialized to a record that
# has an opaque type as a field.
# @TEST-REQUIRES: test "${ZEEK_USE_CPP}" != "1"
# @TEST-EXEC: zcat <$TRACES/echo-connections.pcap.gz | zeek -b -Cr - %INPUT > out

View file

@ -1,4 +1,4 @@
# Note compilable to C++ due to globals being initialized to a record that
# Not compilable to C++ due to globals being initialized to a record that
# has an opaque type as a field.
# @TEST-REQUIRES: test "${ZEEK_USE_CPP}" != "1"
# @TEST-EXEC: zcat <$TRACES/echo-connections.pcap.gz | zeek -b -Cr - %INPUT > out

View file

@ -1,6 +1,6 @@
# @TEST-DOC: Query for zeek event-handler-invocations metrics counting number of times handlers were called.
# Note compilable to C++ due to globals being initialized to a record that
# Not compilable to C++ due to globals being initialized to a record that
# has an opaque type as a field.
# @TEST-REQUIRES: test "${ZEEK_USE_CPP}" != "1"
# @TEST-EXEC: zcat <$TRACES/echo-connections.pcap.gz | zeek -b -Cr - %INPUT > out

View file

@ -1,5 +1,5 @@
# @TEST-DOC: Query Broker's telemetry to verify it ends up in Zeek's registry.
# Note compilable to C++ due to globals being initialized to a record that
# Not compilable to C++ due to globals being initialized to a record that
# has an opaque type as a field.
# @TEST-REQUIRES: test "${ZEEK_USE_CPP}" != "1"
# @TEST-EXEC: zcat <$TRACES/echo-connections.pcap.gz | zeek -b -Cr - %INPUT > out

View file

@ -0,0 +1,48 @@
# @TEST-DOC: Calling collect_metrics() invokes Telemetry::sync.
# Not compilable to C++ due to globals being initialized to a record that
# has an opaque type as a field.
# @TEST-REQUIRES: test "${ZEEK_USE_CPP}" != "1"
#
# @TEST-EXEC: zeek -b %INPUT >out
# @TEST-EXEC: btest-diff out
@load base/frameworks/telemetry
global connections_by_proto_cf = Telemetry::register_counter_family([
$prefix="btest",
$name="connections",
$unit="",
$help_text="Total number of monitored connections",
$label_names=vector("proto")
]);
function print_metrics(ms: vector of Telemetry::Metric) {
for (_, m in ms) {
print m$opts$name, m$label_values, m$value;
}
}
event zeek_init()
{
print "node up";
local ms = Telemetry::collect_metrics("btest");
print_metrics(ms);
ms = Telemetry::collect_metrics("btest");
print_metrics(ms);
ms = Telemetry::collect_metrics("btest");
print_metrics(ms);
local hm = Telemetry::collect_histogram_metrics("btest");
print_metrics(ms);
}
global sync_calls = 0;
hook Telemetry::sync()
{
++sync_calls;
local proto = sync_calls == 1 ? "tcp" : "udp";
print "sync", sync_calls, proto;
Telemetry::counter_family_inc(connections_by_proto_cf, vector(proto));
}

View file

@ -0,0 +1,43 @@
# @TEST-DOC: Breaking and recursive Telemetry::sync() warning
# Not compilable to C++ due to globals being initialized to a record that
# has an opaque type as a field.
# @TEST-REQUIRES: test "${ZEEK_USE_CPP}" != "1"
#
# @TEST-EXEC: zeek -b %INPUT >out
# @TEST-EXEC: btest-diff out
# @TEST-EXEC: btest-diff .stderr
@load base/frameworks/telemetry
global connections_by_proto_cf = Telemetry::register_counter_family([
$prefix="btest",
$name="connections",
$unit="",
$help_text="Total number of monitored connections",
$label_names=vector("proto")
]);
event zeek_init()
{
print "node up";
Telemetry::counter_family_inc(connections_by_proto_cf, vector("tcp"));
local ms = Telemetry::collect_metrics("btest");
}
hook Telemetry::sync()
{
# Calling collect_metrics() in Telemetry::sync() is not good as
# it would invoke Telemetry::sync() recursively. The manager will
# emit a warning and not run the second Telemetry::sync() invocation.
local ms = Telemetry::collect_metrics("btest");
}
hook Telemetry::sync() &priority=-100
{
# break is not good as it prevents other Telemetry::sync() hooks
# from running. This will produce a warning.
# We could find this via script validation if we wanted to.
break;
}

View file

@ -0,0 +1,68 @@
# @TEST-DOC: Verify Telemetry::sync() is invoked for metric scraping via the Prometheus HTTP endpoint.
# Not compilable to C++ due to globals being initialized to a record that
# has an opaque type as a field.
# @TEST-REQUIRES: test "${ZEEK_USE_CPP}" != "1"
# @TEST-REQUIRES: which jq
# @TEST-REQUIRES: which curl
#
# @TEST-PORT: METRICS_PORT
#
# @TEST-EXEC: chmod +x fetch-metrics.sh
# @TEST-EXEC: zeek --parse-only %INPUT
# @TEST-EXEC: btest-bg-run zeek ZEEKPATH=$ZEEKPATH:.. zeek -b %INPUT
# @TEST-EXEC: $SCRIPTS/wait-for-file zeek/up 5 || (btest-bg-wait -k 1 && false)
# @TEST-EXEC: ./fetch-metrics.sh 1.trace metrics1.txt
# @TEST-EXEC: ./fetch-metrics.sh 2.trace metrics2.txt
# @TEST-EXEC: ./fetch-metrics.sh 3.trace metrics3.txt
# @TEST-EXEC: btest-bg-wait 10
#
# @TEST-EXEC: btest-diff zeek/.stdout
# @TEST-EXEC: btest-diff metrics1.txt
# @TEST-EXEC: btest-diff metrics2.txt
# @TEST-EXEC: btest-diff metrics3.txt
@TEST-START-FILE fetch-metrics.sh
#! /usr/bin/env bash
set -ux
trace_file=$1
output_file=$2
PORT=$(echo ${METRICS_PORT} | cut -d '/' -f 1)
URL=http://localhost:${PORT}/metrics
curl -m 5 --trace $trace_file $URL | grep ^btest > $output_file
exit 0
@TEST-END-FILE
@load base/frameworks/telemetry
redef exit_only_after_terminate = T;
redef Telemetry::metrics_port = to_port(getenv("METRICS_PORT"));
event zeek_init()
{
print "node up";
system("touch up");
}
global connections_by_proto_cf = Telemetry::register_counter_family([
$prefix="btest",
$name="connections",
$unit="",
$help_text="Total number of monitored connections",
$label_names=vector("proto")
]);
global sync_calls = 0;
hook Telemetry::sync()
{
++sync_calls;
local proto = sync_calls == 1 ? "tcp" : "udp";
print "sync", sync_calls, proto;
Telemetry::counter_family_inc(connections_by_proto_cf, vector(proto));
if ( sync_calls == 3 )
terminate();
}

View file

@ -1,20 +1,22 @@
# @TEST-DOC: Test loading of telemetry/log and smoke check the telemetry.log file
# @TEST-EXEC: zcat <$TRACES/echo-connections.pcap.gz | zeek -b -Cr - %INPUT > out
# @TEST-EXEC: grep 'zeek.*sessions' telemetry.log > telemetry.log.filtered
# @TEST-EXEC: zeek -b -Cr $TRACES/wikipedia.trace %INPUT > out
# @TEST-EXEC: grep -E 'zeek_(net|.*sessions)' telemetry.log > telemetry.log.filtered
# @TEST-EXEC: grep 'zeek.*connection_duration' telemetry_histogram.log > telemetry_histogram.log.filtered
# @TEST-EXEC: btest-diff telemetry.log.filtered
# @TEST-EXEC: TEST_DIFF_CANONIFIER= btest-diff telemetry.log.filtered
# @TEST-EXEC: btest-diff telemetry_histogram.log.filtered
@load misc/stats
@load frameworks/telemetry/log
redef Telemetry::log_interval = 1sec;
global connection_duration_hf = Telemetry::register_histogram_family([
$prefix="zeek",
$name="connection_duration",
$unit="seconds",
$help_text="Monitored connection duration",
$bounds=vector(2.0, 3.0, 4.0, 5.0, 6.0, 10.0)
$bounds=vector(0.0001, 0.001, 0.01, 0.1, 0.5, 1.0, 5.0, 10.0, 30.0, 60.0)
]);
global connection_duration_h = Telemetry::histogram_with(connection_duration_hf);

View file

@ -1,5 +1,5 @@
# @TEST-DOC: Query the Prometheus endpoint and smoke check that zeek_version_info{...} is contained in the response for all cluster nodes.
# Note compilable to C++ due to globals being initialized to a record that
# Not compilable to C++ due to globals being initialized to a record that
# has an opaque type as a field.
# @TEST-REQUIRES: test "${ZEEK_USE_CPP}" != "1"
# @TEST-REQUIRES: which jq