zeek/scripts/policy/frameworks/telemetry/log.zeek
2024-05-31 13:36:37 -07:00

208 lines
5.5 KiB
Text

##! Implementation of a telemetry.log and telemetry_histogram.log file
##! using metrics accessible via the Telemetry module.
@load base/frameworks/telemetry
module Telemetry;
export {
redef enum Log::ID += { LOG, LOG_HISTOGRAM };
## How often metrics are reported.
option log_interval = 60sec;
## Only metrics with prefixes in this set will be included in the
## `telemetry.log` and `telemetry_histogram.log` files by default.
## Setting this option to an empty set includes all prefixes.
##
## For more fine-grained customization, setting this option to an
## empty set and implementing the :zeek:see:`Telemetry::log_policy`
## and :zeek:see:`Telemetry::log_policy_histogram` hooks to filter
## individual records is recommended.
option log_prefixes: set[string] = {"process", "zeek"};
## Record type used for logging counter and gauge metrics.
type Info: record {
## Timestamp of reporting.
ts: time &log;
## Peer that generated this log.
peer: string &log;
## Contains the value "counter" or "gauge" depending on
## the underlying metric type.
metric_type: string &log;
## The prefix (namespace) of the metric.
prefix: string &log;
## The name of the metric.
name: string &log;
## The names of the individual labels.
labels: vector of string &log;
## The values of the labels as listed in ``labels``.
label_values: vector of string &log;
## The value of this metric.
value: double &log;
};
## Record type used for logging histogram metrics.
type HistogramInfo: record {
## Timestamp of reporting.
ts: time &log;
## Peer that generated this log.
peer: string &log;
## The prefix (namespace) of the metric.
prefix: string &log;
## The name of the metric.
name: string &log;
## The names of the individual labels.
labels: vector of string &log;
## The values of the labels as listed in ``labels``.
label_values: vector of string &log;
## The bounds of the individual buckets
bounds: vector of double &log;
## The number of observations within each individual bucket.
values: vector of double &log;
## The sum over all observations
sum: double &log;
## The total number of observations.
observations: double &log;
};
## A default logging policy hook for the stream.
global log_policy: Log::PolicyHook;
## A default logging policy hook for the histogram stream.
global log_policy_histogram: Log::PolicyHook;
## Event triggered for every record in the stream.
global log_telemetry: event(rec: Info);
## Event triggered for every record in the histogram stream.
global log_telemetry_histogram: event(rec: HistogramInfo);
}
function do_log()
{
local ts = network_time();
## TODO: this is potentially slow, since it requires looping over all of the metrics for each
## prefix, and then doing it again for all of the histograms multiple times.
local metrics : vector of Telemetry::Metric;
if ( |log_prefixes| > 0 )
{
for ( prefix in log_prefixes )
{
metrics += Telemetry::collect_metrics(prefix, "*");
}
}
else
{
metrics = Telemetry::collect_metrics();
}
for ( i in metrics )
{
local m = metrics[i];
# Histograms don't have single values, skip over them.
if ( m$opts$metric_type == HISTOGRAM )
next;
# Render the metric_type as a short string. Unknown
# shouldn't really happen, but lets have a fallback.
local metric_type = "unknown";
switch ( m$opts$metric_type ) {
case COUNTER:
metric_type = "counter";
break;
case GAUGE:
metric_type = "gauge";
break;
}
local rec = Info($ts=ts,
$peer=peer_description,
$metric_type=metric_type,
$prefix=m$opts$prefix,
$name=m$opts$name,
$labels=m$opts$labels,
$label_values=m$labels,
$value=m$value);
Log::write(LOG, rec);
}
# Logging of histograms.
ts = network_time();
local histogram_metrics : vector of Telemetry::HistogramMetric;
if ( |log_prefixes| > 0 )
{
for ( prefix in log_prefixes )
{
histogram_metrics += Telemetry::collect_histogram_metrics(prefix, "*");
}
}
else
{
histogram_metrics = Telemetry::collect_histogram_metrics();
}
for ( i in histogram_metrics )
{
local hm = histogram_metrics[i];
local hrec = HistogramInfo($ts=ts,
$peer=peer_description,
$prefix=hm$opts$prefix,
$name=hm$opts$name,
$labels=hm$opts$labels,
$label_values=hm$labels,
$bounds=hm$opts$bounds,
$values=hm$values,
$sum=hm$sum,
$observations=hm$observations);
Log::write(LOG_HISTOGRAM, hrec);
}
}
event Telemetry::log()
{
# We explicitly log once during zeek_done(), so short-circuit
# here when we're already in the process of shutting down.
if ( zeek_is_terminating() )
return;
do_log();
schedule log_interval { Telemetry::log() };
}
event zeek_init() &priority=5
{
Log::create_stream(LOG, [$columns=Info, $ev=log_telemetry, $path="telemetry", $policy=log_policy]);
Log::create_stream(LOG_HISTOGRAM, [$columns=HistogramInfo, $ev=log_telemetry_histogram, $path="telemetry_histogram", $policy=log_policy_histogram]);
schedule log_interval { Telemetry::log() };
}
# Log late during zeek_done() once more. Any metric updates
# afterwards won't be visible in the log.
event zeek_done() &priority=-1000
{
do_log();
}