Convert telemetry code to use prometheus-cpp

This commit is contained in:
Tim Wojtulewicz 2024-03-01 13:43:37 -07:00
parent 97a35011a7
commit a0ae06b3cd
22 changed files with 1517 additions and 1195 deletions

View file

@ -2,11 +2,11 @@
#pragma once
#include <condition_variable>
#include <cstdint>
#include <initializer_list>
#include <memory>
#include <string_view>
#include <unordered_map>
#include <variant>
#include <vector>
#include "zeek/IntrusivePtr.h"
@ -14,149 +14,52 @@
#include "zeek/telemetry/Counter.h"
#include "zeek/telemetry/Gauge.h"
#include "zeek/telemetry/Histogram.h"
#include "zeek/telemetry/ProcessStats.h"
#include "broker/telemetry/fwd.hh"
namespace broker {
class endpoint;
}
#include "prometheus/exposer.h"
#include "prometheus/registry.h"
namespace zeek {
class RecordVal;
using RecordValPtr = IntrusivePtr<RecordVal>;
} // namespace zeek
namespace zeek::Broker {
class Manager;
}
namespace zeek::telemetry {
class OtelReader;
/**
* Manages a collection of metric families.
*/
class Manager {
class Manager final {
public:
friend class Broker::Manager;
Manager();
Manager(const Manager&) = delete;
Manager& operator=(const Manager&) = delete;
virtual ~Manager() = default;
~Manager() = default;
/**
* Initialization of the manager. This is called late during Zeek's
* initialization after any scripts are processed.
*/
virtual void InitPostScript();
void InitPostScript();
/**
* Supported metric types.
*/
enum class MetricType { Counter, Gauge, Histogram };
/**
* Captures information about counter and gauge metrics.
*/
struct CollectedValueMetric {
/**
* Constructor.
* @param metric_type The type of this metric.
* @param family Broker layer family handle for this metric.
* @param label_values The string values for each of the metric's labels.
* @param value The metric's current value.
*/
CollectedValueMetric(MetricType metric_type, const broker::telemetry::metric_family_hdl* family,
std::vector<std::string_view> label_values, std::variant<double, int64_t> value)
: metric_type(metric_type), family(family), label_values(std::move(label_values)), value(value) {}
/**
* @return A script layer Telemetry::Metric record for this metric.
*/
zeek::RecordValPtr AsMetricRecord() const;
enum MetricType metric_type;
const broker::telemetry::metric_family_hdl* family;
std::vector<std::string_view> label_values;
std::variant<double, int64_t> value;
};
/**
* Captures information about histogram metrics.
*/
struct CollectedHistogramMetric {
/**
* Helper struct representing a single bucket of a histogram.
* @tparam T The data type used by the histogram (double or int64_t).
*/
template<class T>
struct Bucket {
Bucket(T count, T upper_bound) : count(count), upper_bound(upper_bound) {}
T count;
T upper_bound;
};
/**
* Helper struct representing a histogram as sum and buckets.
* @tparam T The data type used by the histogram (double or int64_t).
*/
template<class T>
struct HistogramData {
T sum;
std::vector<Bucket<T>> buckets;
};
using DblHistogramData = HistogramData<double>;
using IntHistogramData = HistogramData<int64_t>;
/**
* Constructor.
* @param family Broker layer family handle for this metric.
* @param label_values The string values for each of the metric's labels.
* @param histogram The histogram's data (sum and individual buckets).
*/
CollectedHistogramMetric(const broker::telemetry::metric_family_hdl* family,
std::vector<std::string_view> label_values,
std::variant<DblHistogramData, IntHistogramData> histogram)
: family(family), label_values(std::move(label_values)), histogram(std::move(histogram)) {}
const broker::telemetry::metric_family_hdl* family;
std::vector<std::string_view> label_values;
std::variant<DblHistogramData, IntHistogramData> histogram;
/**
* @return A script layer Telemetry::HistogramMetric record for this histogram.
*/
zeek::RecordValPtr AsHistogramMetricRecord() const;
};
/**
* @return A script layer Telemetry::MetricOpts record for the given metric family.
* @param metric_typ The type of metric.
* @param family Broker layer family handle for the family.
* @tparam T The underlying data type (double or int64_t)
*/
template<typename T>
zeek::RecordValPtr GetMetricOptsRecord(MetricType metric_type, const broker::telemetry::metric_family_hdl* family);
/**
* @return All counter and gauge metrics and their values matching prefix and name.
* @return A VectorVal containing all counter and gauge metrics and their values matching prefix and name.
* @param prefix The prefix pattern to use for filtering. Supports globbing.
* @param name The name pattern to use for filtering. Supports globbing.
*/
std::vector<CollectedValueMetric> CollectMetrics(std::string_view prefix, std::string_view name);
ValPtr CollectMetrics(std::string_view prefix, std::string_view name);
/**
* @return All histogram metrics and their data matching prefix and name.
* @return A VectorVal containing all histogram metrics and their values matching prefix and name.
* @param prefix The prefix pattern to use for filtering. Supports globbing.
* @param name The name pattern to use for filtering. Supports globbing.
*/
std::vector<CollectedHistogramMetric> CollectHistogramMetrics(std::string_view prefix, std::string_view name);
ValPtr CollectHistogramMetrics(std::string_view prefix, std::string_view name);
/**
* @return A counter metric family. Creates the family lazily if necessary.
@ -165,27 +68,41 @@ public:
* @param labels Names for all label dimensions of the metric.
* @param helptext Short explanation of the metric.
* @param unit Unit of measurement.
* @param is_sum Indicates whether this metric accumulates something, where
* only the total value is of interest.
* @param is_sum Indicates whether this metric accumulates something, where only the total value is of interest.
* @param callback Passing a callback method will enable asynchronous mode. The callback method will be called by
* the metrics subsystem whenever data is requested.
*/
template<class ValueType = int64_t>
auto CounterFamily(std::string_view prefix, std::string_view name, Span<const std::string_view> labels,
std::string_view helptext, std::string_view unit = "1", bool is_sum = false) {
std::string_view helptext, std::string_view unit = "", bool is_sum = false) {
auto fam = LookupFamily(prefix, name);
if constexpr ( std::is_same<ValueType, int64_t>::value ) {
auto fam = int_counter_fam(Ptr(), prefix, name, labels, helptext, unit, is_sum);
return IntCounterFamily{fam};
if ( fam )
return std::static_pointer_cast<IntCounterFamily>(fam);
auto int_fam =
std::make_shared<IntCounterFamily>(prefix, name, labels, helptext, prometheus_registry, unit, is_sum);
families.insert_or_assign(int_fam->FullName(), int_fam);
return int_fam;
}
else {
static_assert(std::is_same<ValueType, double>::value, "metrics only support int64_t and double values");
auto fam = dbl_counter_fam(Ptr(), prefix, name, labels, helptext, unit, is_sum);
return DblCounterFamily{fam};
if ( fam )
return std::static_pointer_cast<DblCounterFamily>(fam);
auto dbl_fam =
std::make_shared<DblCounterFamily>(prefix, name, labels, helptext, prometheus_registry, unit, is_sum);
families.insert_or_assign(dbl_fam->FullName(), dbl_fam);
return dbl_fam;
}
}
/// @copydoc CounterFamily
template<class ValueType = int64_t>
auto CounterFamily(std::string_view prefix, std::string_view name, std::initializer_list<std::string_view> labels,
std::string_view helptext, std::string_view unit = "1", bool is_sum = false) {
std::string_view helptext, std::string_view unit = "", bool is_sum = false) {
auto lbl_span = Span{labels.begin(), labels.size()};
return CounterFamily<ValueType>(prefix, name, lbl_span, helptext, unit, is_sum);
}
@ -198,25 +115,28 @@ public:
* @param labels Values for all label dimensions of the metric.
* @param helptext Short explanation of the metric.
* @param unit Unit of measurement.
* @param is_sum Indicates whether this metric accumulates something, where
* only the total value is of interest.
* @param is_sum Indicates whether this metric accumulates something, where only the total value is of interest.
* @param callback Passing a callback method will enable asynchronous mode. The callback method will be called by
* the metrics subsystem whenever data is requested.
*/
template<class ValueType = int64_t>
Counter<ValueType> CounterInstance(std::string_view prefix, std::string_view name, Span<const LabelView> labels,
std::string_view helptext, std::string_view unit = "1", bool is_sum = false) {
std::shared_ptr<Counter<ValueType>> CounterInstance(std::string_view prefix, std::string_view name,
Span<const LabelView> labels, std::string_view helptext,
std::string_view unit = "", bool is_sum = false) {
return WithLabelNames(labels, [&, this](auto labelNames) {
auto family = CounterFamily<ValueType>(prefix, name, labelNames, helptext, unit, is_sum);
return family.getOrAdd(labels);
return family->GetOrAdd(labels);
});
}
/// @copydoc counterInstance
template<class ValueType = int64_t>
Counter<ValueType> CounterInstance(std::string_view prefix, std::string_view name,
std::initializer_list<LabelView> labels, std::string_view helptext,
std::string_view unit = "1", bool is_sum = false) {
std::shared_ptr<Counter<ValueType>> CounterInstance(std::string_view prefix, std::string_view name,
std::initializer_list<LabelView> labels,
std::string_view helptext, std::string_view unit = "",
bool is_sum = false) {
auto lbl_span = Span{labels.begin(), labels.size()};
return CounterInstance(prefix, name, lbl_span, helptext, unit, is_sum);
return CounterInstance<ValueType>(prefix, name, lbl_span, helptext, unit, is_sum);
}
/**
@ -226,27 +146,40 @@ public:
* @param labels Names for all label dimensions of the metric.
* @param helptext Short explanation of the metric.
* @param unit Unit of measurement.
* @param is_sum Indicates whether this metric accumulates something, where
* only the total value is of interest.
* @param is_sum Indicates whether this metric accumulates something, where only the total value is of interest.
* @param callback Passing a callback method will enable asynchronous mode. The callback method will be called by
* the metrics subsystem whenever data is requested.
*/
template<class ValueType = int64_t>
auto GaugeFamily(std::string_view prefix, std::string_view name, Span<const std::string_view> labels,
std::string_view helptext, std::string_view unit = "1", bool is_sum = false) {
std::string_view helptext, std::string_view unit = "", bool is_sum = false) {
auto fam = LookupFamily(prefix, name);
if constexpr ( std::is_same<ValueType, int64_t>::value ) {
auto fam = int_gauge_fam(Ptr(), prefix, name, labels, helptext, unit, is_sum);
return IntGaugeFamily{fam};
if ( fam )
return std::static_pointer_cast<IntGaugeFamily>(fam);
auto int_fam =
std::make_shared<IntGaugeFamily>(prefix, name, labels, helptext, prometheus_registry, unit, is_sum);
families.insert_or_assign(int_fam->FullName(), int_fam);
return int_fam;
}
else {
static_assert(std::is_same<ValueType, double>::value, "metrics only support int64_t and double values");
auto fam = dbl_gauge_fam(Ptr(), prefix, name, labels, helptext, unit, is_sum);
return DblGaugeFamily{fam};
if ( fam )
return std::static_pointer_cast<DblGaugeFamily>(fam);
auto dbl_fam =
std::make_shared<DblGaugeFamily>(prefix, name, labels, helptext, prometheus_registry, unit, is_sum);
families.insert_or_assign(dbl_fam->FullName(), dbl_fam);
return dbl_fam;
}
}
/// @copydoc GaugeFamily
template<class ValueType = int64_t>
auto GaugeFamily(std::string_view prefix, std::string_view name, std::initializer_list<std::string_view> labels,
std::string_view helptext, std::string_view unit = "1", bool is_sum = false) {
std::string_view helptext, std::string_view unit = "", bool is_sum = false) {
auto lbl_span = Span{labels.begin(), labels.size()};
return GaugeFamily<ValueType>(prefix, name, lbl_span, helptext, unit, is_sum);
}
@ -259,25 +192,27 @@ public:
* @param labels Values for all label dimensions of the metric.
* @param helptext Short explanation of the metric.
* @param unit Unit of measurement.
* @param is_sum Indicates whether this metric accumulates something, where
* only the total value is of interest.
* @param is_sum Indicates whether this metric accumulates something, where only the total value is of interest.
* @param callback Passing a callback method will enable asynchronous mode. The callback method will be called by
* the metrics subsystem whenever data is requested.
*/
template<class ValueType = int64_t>
Gauge<ValueType> GaugeInstance(std::string_view prefix, std::string_view name, Span<const LabelView> labels,
std::string_view helptext, std::string_view unit = "1", bool is_sum = false) {
std::shared_ptr<Gauge<ValueType>> GaugeInstance(std::string_view prefix, std::string_view name,
Span<const LabelView> labels, std::string_view helptext,
std::string_view unit = "", bool is_sum = false) {
return WithLabelNames(labels, [&, this](auto labelNames) {
auto family = GaugeFamily<ValueType>(prefix, name, labelNames, helptext, unit, is_sum);
return family.getOrAdd(labels);
return family->GetOrAdd(labels);
});
}
/// @copydoc GaugeInstance
template<class ValueType = int64_t>
Gauge<ValueType> GaugeInstance(std::string_view prefix, std::string_view name,
std::initializer_list<LabelView> labels, std::string_view helptext,
std::string_view unit = "1", bool is_sum = false) {
std::shared_ptr<Gauge<ValueType>> GaugeInstance(std::string_view prefix, std::string_view name,
std::initializer_list<LabelView> labels, std::string_view helptext,
std::string_view unit = "", bool is_sum = false) {
auto lbl_span = Span{labels.begin(), labels.size()};
return GaugeInstance(prefix, name, lbl_span, helptext, unit, is_sum);
return GaugeInstance<ValueType>(prefix, name, lbl_span, helptext, unit, is_sum);
}
// Forces the compiler to use the type `Span<const T>` instead of trying to
@ -315,15 +250,27 @@ public:
template<class ValueType = int64_t>
auto HistogramFamily(std::string_view prefix, std::string_view name, Span<const std::string_view> labels,
ConstSpan<ValueType> default_upper_bounds, std::string_view helptext,
std::string_view unit = "1", bool is_sum = false) {
std::string_view unit = "") {
auto fam = LookupFamily(prefix, name);
if constexpr ( std::is_same<ValueType, int64_t>::value ) {
auto fam = int_histogram_fam(Ptr(), prefix, name, labels, default_upper_bounds, helptext, unit, is_sum);
return IntHistogramFamily{fam};
if ( fam )
return std::static_pointer_cast<IntHistogramFamily>(fam);
auto int_fam = std::make_shared<IntHistogramFamily>(prefix, name, labels, default_upper_bounds, helptext,
prometheus_registry, unit);
families.insert_or_assign(int_fam->FullName(), int_fam);
return int_fam;
}
else {
static_assert(std::is_same<ValueType, double>::value, "metrics only support int64_t and double values");
auto fam = dbl_histogram_fam(Ptr(), prefix, name, labels, default_upper_bounds, helptext, unit, is_sum);
return DblHistogramFamily{fam};
if ( fam )
return std::static_pointer_cast<DblHistogramFamily>(fam);
auto dbl_fam = std::make_shared<DblHistogramFamily>(prefix, name, labels, default_upper_bounds, helptext,
prometheus_registry, unit);
families.insert_or_assign(dbl_fam->FullName(), dbl_fam);
return dbl_fam;
}
}
@ -331,9 +278,9 @@ public:
template<class ValueType = int64_t>
auto HistogramFamily(std::string_view prefix, std::string_view name, std::initializer_list<std::string_view> labels,
ConstSpan<ValueType> default_upper_bounds, std::string_view helptext,
std::string_view unit = "1", bool is_sum = false) {
std::string_view unit = "") {
auto lbl_span = Span{labels.begin(), labels.size()};
return HistogramFamily<ValueType>(prefix, name, lbl_span, default_upper_bounds, helptext, unit, is_sum);
return HistogramFamily<ValueType>(prefix, name, lbl_span, default_upper_bounds, helptext, unit);
}
/**
@ -357,29 +304,76 @@ public:
* @p default_upper_bounds via run-time configuration.
*/
template<class ValueType = int64_t>
Histogram<ValueType> HistogramInstance(std::string_view prefix, std::string_view name, Span<const LabelView> labels,
ConstSpan<ValueType> default_upper_bounds, std::string_view helptext,
std::string_view unit = "1", bool is_sum = false) {
std::shared_ptr<Histogram<ValueType>> HistogramInstance(std::string_view prefix, std::string_view name,
Span<const LabelView> labels,
ConstSpan<ValueType> default_upper_bounds,
std::string_view helptext, std::string_view unit = "") {
return WithLabelNames(labels, [&, this](auto labelNames) {
auto family =
HistogramFamily<ValueType>(prefix, name, labelNames, default_upper_bounds, helptext, unit, is_sum);
return family.getOrAdd(labels);
auto family = HistogramFamily<ValueType>(prefix, name, labelNames, default_upper_bounds, helptext, unit);
return family->GetOrAdd(labels);
});
}
/// @copdoc HistogramInstance
template<class ValueType = int64_t>
Histogram<ValueType> HistogramInstance(std::string_view prefix, std::string_view name,
std::initializer_list<LabelView> labels,
ConstSpan<ValueType> default_upper_bounds, std::string_view helptext,
std::string_view unit = "1", bool is_sum = false) {
std::shared_ptr<Histogram<ValueType>> HistogramInstance(std::string_view prefix, std::string_view name,
std::initializer_list<LabelView> labels,
std::initializer_list<ValueType> default_upper_bounds,
std::string_view helptext, std::string_view unit = "") {
auto lbls = Span{labels.begin(), labels.size()};
return HistogramInstance(prefix, name, lbls, default_upper_bounds, helptext, unit, is_sum);
auto bounds = Span{default_upper_bounds.begin(), default_upper_bounds.size()};
return HistogramInstance<ValueType>(prefix, name, lbls, bounds, helptext, unit);
}
/**
* Changes the frequency for publishing scraped metrics to the target topic.
* Passing a zero-length interval has no effect.
* @param value Interval between two scrapes in seconds.
*/
void SetMetricsExportInterval(double value);
/**
* Sets a new target topic for the metrics. Passing an empty string has no
* effect.
* @param value The new topic for publishing local metrics to.
*/
void SetMetricsExportTopic(std::string value);
/**
* Sets the import topics for a node importing metrics.
*
* @param topics List of topics from which to import metrics.
*/
void SetMetricsImportTopics(std::vector<std::string> topics);
/**
* Sets a new ID for the metrics exporter. Passing an empty string has no
* effect.
* @param value The new ID of the exporter in published metrics.
*/
void SetMetricsExportEndpointName(std::string value);
/**
* Sets a prefix selection for the metrics exporter. An empty vector selects
* *all* metrics.
* @param filter List of selected metric prefixes or an empty vector for
* selecting all metrics.
*/
void SetMetricsExportPrefixes(std::vector<std::string> filter);
bool IsExporting() const { return ! export_topic.empty() && ! export_endpoint.empty(); }
const std::string& MetricsSchema() const { return metrics_schema; }
std::shared_ptr<MetricFamily> GetFamilyByFullName(const std::string& full_name) const {
if ( auto it = families.find(full_name); it != families.end() )
return it->second;
return nullptr;
}
protected:
template<class F>
static void WithLabelNames(Span<const LabelView> xs, F continuation) {
static auto WithLabelNames(Span<const LabelView> xs, F continuation) {
if ( xs.size() <= 10 ) {
std::string_view buf[10];
for ( size_t index = 0; index < xs.size(); ++index )
@ -390,29 +384,41 @@ protected:
else {
std::vector<std::string_view> buf;
for ( auto x : xs )
buf.emplace_back(x.first, x.second);
buf.emplace_back(x.first);
return continuation(Span{buf});
}
}
broker::telemetry::metric_registry_impl* Ptr() { return pimpl.get(); }
// Connects all the dots after the Broker Manager constructed the endpoint
// for this Zeek instance. Called from Broker::Manager::InitPostScript().
void InitPostBrokerSetup(broker::endpoint&);
IntrusivePtr<broker::telemetry::metric_registry_impl> pimpl;
private:
// Caching of metric_family_hdl instances to their Zeek record representation.
std::unordered_map<const broker::telemetry::metric_family_hdl*, zeek::RecordValPtr> metric_opts_cache;
std::shared_ptr<MetricFamily> LookupFamily(std::string_view prefix, std::string_view name) const;
std::string metrics_schema;
std::shared_ptr<OtelReader> otel_reader;
std::map<std::string, std::shared_ptr<MetricFamily>> families;
detail::process_stats current_process_stats;
double process_stats_last_updated = 0.0;
std::shared_ptr<IntGauge> rss_gauge;
std::shared_ptr<IntGauge> vms_gauge;
std::shared_ptr<DblGauge> cpu_gauge;
std::shared_ptr<IntGauge> fds_gauge;
std::string export_topic;
std::vector<std::string> import_topics;
std::string export_endpoint;
std::vector<std::string> export_prefixes;
double export_interval = 0.0;
std::shared_ptr<prometheus::Registry> prometheus_registry;
std::unique_ptr<prometheus::Exposer> prometheus_exposer;
};
} // namespace zeek::telemetry
namespace zeek {
extern telemetry::Manager* telemetry_mgr;
} // namespace zeek