threading/Manager: "lt" to "le" and do not break

The buckets are specified as lower-equal (changed from lower-than now),
which means we shouldn't break: The larger "le" bucket contains all previous
buckets, too. The "inf" bucket represents the current number of threads.

For example, with a total of 10 threads, 5 threads with 0 messages pending,
another 4 threads with 50 messages, and on with 2000 messages, the metrics
would end end up as follows:

    pending_buckets{le=1}      = 5
    pending_buckets{le=10}     = 5
    pending_buckets{le=100}    = 9
    pending_buckets{le=1000}   = 9
    pending_buckets{le=10000}  = 10
    pending_buckets{le=inf}    = 10

This might be strange initially, but aligns with the Prometheus
histogram approach (though we're using gauges here).
This commit is contained in:
Arne Welzel 2024-08-06 09:30:29 +02:00
parent 25f65a705f
commit 98480cf339

View file

@ -61,16 +61,11 @@ void Manager::InitPostScript() {
thread_mgr->current_bucketed_messages.pending_out_total += thread_stats.pending_out; thread_mgr->current_bucketed_messages.pending_out_total += thread_stats.pending_out;
for ( auto upper_limit : pending_bucket_brackets ) { for ( auto upper_limit : pending_bucket_brackets ) {
if ( thread_stats.pending_in < upper_limit ) { if ( thread_stats.pending_in <= upper_limit )
thread_mgr->current_bucketed_messages.pending_in[upper_limit]++; thread_mgr->current_bucketed_messages.pending_in[upper_limit]++;
break;
} if ( thread_stats.pending_out <= upper_limit )
}
for ( auto upper_limit : pending_bucket_brackets ) {
if ( thread_stats.pending_out < upper_limit ) {
thread_mgr->current_bucketed_messages.pending_out[upper_limit]++; thread_mgr->current_bucketed_messages.pending_out[upper_limit]++;
break;
}
} }
} }
@ -127,10 +122,10 @@ void Manager::InitPostScript() {
}); });
pending_message_in_buckets_fam = pending_message_in_buckets_fam =
telemetry_mgr->GaugeFamily("zeek", "msgthread_pending_messages_in_buckets", {"lt"}, telemetry_mgr->GaugeFamily("zeek", "msgthread_pending_messages_in_buckets", {"le"},
"Number of threads with pending inbound messages split into buckets"); "Number of threads with pending inbound messages split into buckets");
pending_message_out_buckets_fam = pending_message_out_buckets_fam =
telemetry_mgr->GaugeFamily("zeek", "msgthread_pending_messages_out_buckets", {"lt"}, telemetry_mgr->GaugeFamily("zeek", "msgthread_pending_messages_out_buckets", {"le"},
"Number of threads with pending outbound messages split into buckets"); "Number of threads with pending outbound messages split into buckets");
for ( auto upper_limit : pending_bucket_brackets ) { for ( auto upper_limit : pending_bucket_brackets ) {
@ -144,7 +139,7 @@ void Manager::InitPostScript() {
current_bucketed_messages.pending_out[upper_limit] = 0; current_bucketed_messages.pending_out[upper_limit] = 0;
pending_message_in_buckets[upper_limit] = pending_message_in_buckets[upper_limit] =
pending_message_in_buckets_fam->GetOrAdd({{"lt", upper_limit_str}}, pending_message_in_buckets_fam->GetOrAdd({{"le", upper_limit_str}},
[upper_limit]() -> prometheus::ClientMetric { [upper_limit]() -> prometheus::ClientMetric {
auto* s = get_message_thread_stats(); auto* s = get_message_thread_stats();
prometheus::ClientMetric metric; prometheus::ClientMetric metric;
@ -153,7 +148,7 @@ void Manager::InitPostScript() {
return metric; return metric;
}); });
pending_message_out_buckets[upper_limit] = pending_message_out_buckets[upper_limit] =
pending_message_out_buckets_fam->GetOrAdd({{"lt", upper_limit_str}}, pending_message_out_buckets_fam->GetOrAdd({{"le", upper_limit_str}},
[upper_limit]() -> prometheus::ClientMetric { [upper_limit]() -> prometheus::ClientMetric {
auto* s = get_message_thread_stats(); auto* s = get_message_thread_stats();
prometheus::ClientMetric metric; prometheus::ClientMetric metric;