zeek.bif: Implement table_pattern_matcher_stats() bif for introspection

Provide a script accessible way to introspect the DFA stats that can be
leveraged to gather runtime statistics of the underlying DFA. This
re-uses the existing MatcherStats used by ``get_matcher_stats()``.
This commit is contained in:
Arne Welzel 2023-11-08 20:30:04 +01:00
parent 3f240e0f0a
commit e39f280e3d
6 changed files with 116 additions and 0 deletions

View file

@ -20,6 +20,7 @@
#include "zeek/Attr.h" #include "zeek/Attr.h"
#include "zeek/CompHash.h" #include "zeek/CompHash.h"
#include "zeek/Conn.h" #include "zeek/Conn.h"
#include "zeek/DFA.h"
#include "zeek/Desc.h" #include "zeek/Desc.h"
#include "zeek/Dict.h" #include "zeek/Dict.h"
#include "zeek/Expr.h" #include "zeek/Expr.h"
@ -1442,6 +1443,13 @@ public:
VectorValPtr Lookup(const StringValPtr& s); VectorValPtr Lookup(const StringValPtr& s);
void GetStats(detail::DFA_State_Cache_Stats* stats) const {
if ( matcher && matcher->DFA() )
matcher->DFA()->Cache()->GetStats(stats);
else
*stats = {0};
};
private: private:
void Build(); void Build();
@ -2030,6 +2038,13 @@ VectorValPtr TableVal::LookupPattern(const StringValPtr& s) {
return pattern_matcher->Lookup(s); return pattern_matcher->Lookup(s);
} }
void TableVal::GetPatternMatcherStats(detail::DFA_State_Cache_Stats* stats) const {
if ( ! pattern_matcher )
reporter->InternalError("GetPatternMatcherStats called on wrong table type");
return pattern_matcher->GetStats(stats);
}
bool TableVal::UpdateTimestamp(Val* index) { bool TableVal::UpdateTimestamp(Val* index) {
TableEntryVal* v; TableEntryVal* v;

View file

@ -53,6 +53,8 @@ class CompositeHash;
class HashKey; class HashKey;
class TablePatternMatcher; class TablePatternMatcher;
struct DFA_State_Cache_Stats;
class ValTrace; class ValTrace;
class ZBody; class ZBody;
class CPPRuntime; class CPPRuntime;
@ -869,6 +871,10 @@ public:
// Causes an internal error if called for any other kind of table. // Causes an internal error if called for any other kind of table.
VectorValPtr LookupPattern(const StringValPtr& s); VectorValPtr LookupPattern(const StringValPtr& s);
// For a table[pattern], fill stats with information about
// the DFA's state for introspection.
void GetPatternMatcherStats(detail::DFA_State_Cache_Stats* stats) const;
// Sets the timestamp for the given index to network time. // Sets the timestamp for the given index to network time.
// Returns false if index does not exist. // Returns false if index does not exist.
bool UpdateTimestamp(Val* index); bool UpdateTimestamp(Val* index);

View file

@ -5795,3 +5795,53 @@ function have_spicy_analyzers%(%) : bool
%{ %{
return zeek::val_mgr->Bool(USE_SPICY_ANALYZERS); return zeek::val_mgr->Bool(USE_SPICY_ANALYZERS);
%} %}
%%{
#include "zeek/DFA.h"
%%}
## Return MatcherStats for a table[pattern] or set[pattern] value.
##
## This returns a MatcherStats objects that can be used for introspection
## of the DFA used for such a table. Statistics reset whenever elements are
## added or removed to the table as these operations result in the underlying
## DFA being rebuilt.
##
## This function iterates over all states of the DFA. Calling it at a high
## frequency is likely detrimental to performance.
##
## tbl: The table to get stats for.
##
## Returns: A record with matcher statistics.
function table_pattern_matcher_stats%(tbl: any%) : MatcherStats
%{
static auto matcher_stats_type = zeek::id::find_type<zeek::RecordType>("MatcherStats");
const auto& type = tbl->GetType();
if ( type->Tag() != zeek::TYPE_TABLE )
{
zeek::emit_builtin_error("pattern-table_stats() requires a table argument");
return nullptr;
}
if ( ! type->AsTableType()->IsPatternIndex() )
{
zeek::emit_builtin_error("pattern_table_stats() requires a single index of type pattern");
return nullptr;
}
zeek::detail::DFA_State_Cache::Stats stats;
tbl->AsTableVal()->GetPatternMatcherStats(&stats);
auto result = zeek::make_intrusive<zeek::RecordVal>(matcher_stats_type);
int n = 0;
result->Assign(n++, 1); // matchers
result->Assign(n++, stats.nfa_states);
result->Assign(n++, stats.dfa_states);
result->Assign(n++, stats.computed);
result->Assign(n++, stats.mem);
result->Assign(n++, stats.hits);
result->Assign(n++, stats.misses);
return result;
%}

View file

@ -0,0 +1 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.

View file

@ -0,0 +1,12 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
initial stats, [matchers=1, nfa_states=0, dfa_states=0, computed=0, mem=0, hits=0, misses=0]
populated stats, [matchers=1, nfa_states=0, dfa_states=0, computed=0, mem=0, hits=0, misses=0]
[1], [], T, F
after lookup stats, [matchers=1, nfa_states=10, dfa_states=6, computed=6, mem=2368, hits=0, misses=6]
reset stats, [matchers=1, nfa_states=0, dfa_states=0, computed=0, mem=0, hits=0, misses=0]
[], [3], [1, 3], T, F
after more lookup stats, [matchers=1, nfa_states=34, dfa_states=13, computed=13, mem=7720, hits=0, misses=13]
reset stats after delete, [matchers=1, nfa_states=0, dfa_states=0, computed=0, mem=0, hits=0, misses=0]
[], [3], [1, 3]
after even more lookup stats, [matchers=1, nfa_states=29, dfa_states=13, computed=13, mem=7056, hits=0, misses=13]
reset after reassignment, [matchers=1, nfa_states=0, dfa_states=0, computed=0, mem=0, hits=0, misses=0]

View file

@ -0,0 +1,32 @@
# @TEST-DOC: Test table_pattern_matcher_stats()
# @TEST-EXEC: zeek -b %INPUT >out
# @TEST-EXEC: btest-diff out
# @TEST-EXEC: btest-diff .stderr
global pt: table[pattern] of count;
event zeek_init()
{
print "initial stats", table_pattern_matcher_stats(pt);
pt[/foo/] = 1;
print "populated stats", table_pattern_matcher_stats(pt);
print pt["foo"], pt["foox"], "foo" in pt, "foox" in pt;
print "after lookup stats", table_pattern_matcher_stats(pt);
pt[/bar/] = 2;
pt[/(foo|bletch)/] = 3;
print "reset stats", table_pattern_matcher_stats(pt);
print pt["x"], pt["bletch"], sort(pt["foo"]), "foo" in pt, "x" in pt;
print "after more lookup stats", table_pattern_matcher_stats(pt);
delete pt[/bar/];
print "reset stats after delete", table_pattern_matcher_stats(pt);
print pt["x"], pt["bletch"], sort(pt["foo"]);
print "after even more lookup stats", table_pattern_matcher_stats(pt);
pt = table();
print "reset after reassignment", table_pattern_matcher_stats(pt);
}