mirror of
https://github.com/zeek/zeek.git
synced 2025-10-02 06:38:20 +00:00
Introduce telemetry framework
Adds base/frameworks/telemetry with wrappers around telemetry.bif and updates telemetry/Manager to support collecting metrics from script land. Add policy/frameworks/telemetry/log for logging of metrics data into a new telemetry.log and telemetry_histogram.log and add into local.zeek by default.
This commit is contained in:
parent
95fba8fd29
commit
3fe930dbf2
32 changed files with 1950 additions and 27 deletions
199
scripts/policy/frameworks/telemetry/log.zeek
Normal file
199
scripts/policy/frameworks/telemetry/log.zeek
Normal file
|
@ -0,0 +1,199 @@
|
|||
##! Implementation of a telemetry.log and telemetry_histogram.log file
|
||||
##! using metrics accessible via the Telemetry module.
|
||||
|
||||
@load base/frameworks/telemetry
|
||||
|
||||
module Telemetry;
|
||||
|
||||
export {
|
||||
redef enum Log::ID += { LOG, LOG_HISTOGRAM };
|
||||
|
||||
## How often metrics are reported.
|
||||
option log_interval = 60sec;
|
||||
|
||||
## Only metrics with prefixes in this set will be included in the
|
||||
## `telemetry.log` and `telemetry_histogram.log` files by default.
|
||||
## Setting this option to an empty set includes all prefixes.
|
||||
##
|
||||
## For more fine-grained customization, setting this option to an
|
||||
## empty set and implementing the :zeek:see:`Telemetry::log_policy`
|
||||
## and :zeek:see:`Telemetry::log_policy_histogram` hooks to filter
|
||||
## individual records is recommended.
|
||||
option log_prefixes: set[string] = {"process", "zeek"};
|
||||
|
||||
## Record type used for logging counter and gauge metrics.
|
||||
type Info: record {
|
||||
## Timestamp of reporting.
|
||||
ts: time &log;
|
||||
|
||||
## Peer that generated this log.
|
||||
peer: string &log;
|
||||
|
||||
## Contains the value "counter" or "gauge" depending on
|
||||
## the underlying metric type.
|
||||
metric_type: string &log;
|
||||
|
||||
## The prefix (namespace) of the metric.
|
||||
prefix: string &log;
|
||||
|
||||
## The name of the metric.
|
||||
name: string &log;
|
||||
|
||||
## The unit of this metric, or unset if unit-less.
|
||||
unit: string &log &optional;
|
||||
|
||||
## The names of the individual labels.
|
||||
labels: vector of string &log;
|
||||
|
||||
## The values of the labels as listed in ``labels``.
|
||||
label_values: vector of string &log;
|
||||
|
||||
## The value of this metric.
|
||||
value: double &log;
|
||||
};
|
||||
|
||||
## Record type used for logging histogram metrics.
|
||||
type HistogramInfo: record {
|
||||
## Timestamp of reporting.
|
||||
ts: time &log;
|
||||
|
||||
## Peer that generated this log.
|
||||
peer: string &log;
|
||||
|
||||
## The prefix (namespace) of the metric.
|
||||
prefix: string &log;
|
||||
|
||||
## The name of the metric.
|
||||
name: string &log;
|
||||
|
||||
## The unit of this metric, or unset if unit-less.
|
||||
unit: string &log &optional;
|
||||
|
||||
## The names of the individual labels.
|
||||
labels: vector of string &log;
|
||||
|
||||
## The values of the labels as listed in ``labels``.
|
||||
label_values: vector of string &log;
|
||||
|
||||
## The bounds of the individual buckets
|
||||
bounds: vector of double &log;
|
||||
|
||||
## The number of observations within each individual bucket.
|
||||
values: vector of double &log;
|
||||
|
||||
## The sum over all observations
|
||||
sum: double &log;
|
||||
|
||||
## The total number of observations.
|
||||
observations: double &log;
|
||||
};
|
||||
|
||||
## A default logging policy hook for the stream.
|
||||
global log_policy: Log::PolicyHook;
|
||||
|
||||
## A default logging policy hook for the histogram stream.
|
||||
global log_policy_histogram: Log::PolicyHook;
|
||||
|
||||
## Event triggered for every record in the stream.
|
||||
global log_telemetry: event(rec: Info);
|
||||
|
||||
## Event triggered for every record in the histogram stream.
|
||||
global log_telemetry_histogram: event(rec: HistogramInfo);
|
||||
}
|
||||
|
||||
function do_log()
|
||||
{
|
||||
local ts = network_time();
|
||||
local metrics = Telemetry::collect_metrics();
|
||||
|
||||
for ( i in metrics )
|
||||
{
|
||||
local m = metrics[i];
|
||||
|
||||
# Histograms don't have single values, skip over them.
|
||||
if ( m$opts$metric_type == DOUBLE_HISTOGRAM || m$opts$metric_type == INT_HISTOGRAM )
|
||||
next;
|
||||
|
||||
if ( |log_prefixes| > 0 && m$opts$prefix !in log_prefixes )
|
||||
next;
|
||||
|
||||
# Render the metric_type as a short string. Unknown
|
||||
# shouldn't really happen, but lets have a fallback.
|
||||
local metric_type = "unknown";
|
||||
switch ( m$opts$metric_type ) {
|
||||
case DOUBLE_COUNTER, INT_COUNTER:
|
||||
metric_type = "counter";
|
||||
break;
|
||||
case DOUBLE_GAUGE, INT_GAUGE:
|
||||
metric_type = "gauge";
|
||||
break;
|
||||
}
|
||||
|
||||
local rec = Info($ts=ts,
|
||||
$peer=peer_description,
|
||||
$metric_type=metric_type,
|
||||
$prefix=m$opts$prefix,
|
||||
$name=m$opts$name,
|
||||
$labels=m$opts$labels,
|
||||
$label_values=m$labels,
|
||||
$value=m$value);
|
||||
|
||||
if ( m$opts$unit != "1" )
|
||||
rec$unit = m$opts$unit;
|
||||
|
||||
Log::write(LOG, rec);
|
||||
}
|
||||
|
||||
# Logging of histograms.
|
||||
ts = network_time();
|
||||
local histogram_metrics = Telemetry::collect_histogram_metrics();
|
||||
for ( i in histogram_metrics )
|
||||
{
|
||||
local hm = histogram_metrics[i];
|
||||
|
||||
if ( |log_prefixes| > 0 && hm$opts$prefix !in log_prefixes )
|
||||
next;
|
||||
|
||||
local hrec = HistogramInfo($ts=ts,
|
||||
$peer=peer_description,
|
||||
$prefix=hm$opts$prefix,
|
||||
$name=hm$opts$name,
|
||||
$labels=hm$opts$labels,
|
||||
$label_values=hm$labels,
|
||||
$bounds=hm$opts$bounds,
|
||||
$values=hm$values,
|
||||
$sum=hm$sum,
|
||||
$observations=hm$observations);
|
||||
|
||||
if ( hm$opts$unit != "1" )
|
||||
hrec$unit = hm$opts$unit;
|
||||
|
||||
Log::write(LOG_HISTOGRAM, hrec);
|
||||
}
|
||||
}
|
||||
|
||||
event Telemetry::log()
|
||||
{
|
||||
# We explicitly log once during zeek_done(), so short-circuit
|
||||
# here when we're already in the process of shutting down.
|
||||
if ( zeek_is_terminating() )
|
||||
return;
|
||||
|
||||
do_log();
|
||||
schedule log_interval { Telemetry::log() };
|
||||
}
|
||||
|
||||
event zeek_init() &priority=5
|
||||
{
|
||||
Log::create_stream(LOG, [$columns=Info, $ev=log_telemetry, $path="telemetry", $policy=log_policy]);
|
||||
Log::create_stream(LOG_HISTOGRAM, [$columns=HistogramInfo, $ev=log_telemetry_histogram, $path="telemetry_histogram", $policy=log_policy_histogram]);
|
||||
|
||||
schedule log_interval { Telemetry::log() };
|
||||
}
|
||||
|
||||
# Log late during zeek_done() once more. Any metric updates
|
||||
# afterwards won't be visible in the log.
|
||||
event zeek_done() &priority=-1000
|
||||
{
|
||||
do_log();
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue