From e39f280e3d32b1a0380233cfb172ccebbfc70903 Mon Sep 17 00:00:00 2001 From: Arne Welzel Date: Wed, 8 Nov 2023 20:30:04 +0100 Subject: [PATCH] zeek.bif: Implement table_pattern_matcher_stats() bif for introspection Provide a script accessible way to introspect the DFA stats that can be leveraged to gather runtime statistics of the underlying DFA. This re-uses the existing MatcherStats used by ``get_matcher_stats()``. --- src/Val.cc | 15 ++++++ src/Val.h | 6 +++ src/zeek.bif | 50 +++++++++++++++++++ .../language.pattern-tables-stats/.stderr | 1 + .../language.pattern-tables-stats/out | 12 +++++ .../btest/language/pattern-tables-stats.zeek | 32 ++++++++++++ 6 files changed, 116 insertions(+) create mode 100644 testing/btest/Baseline/language.pattern-tables-stats/.stderr create mode 100644 testing/btest/Baseline/language.pattern-tables-stats/out create mode 100644 testing/btest/language/pattern-tables-stats.zeek diff --git a/src/Val.cc b/src/Val.cc index 4304570df7..83324c3ae2 100644 --- a/src/Val.cc +++ b/src/Val.cc @@ -20,6 +20,7 @@ #include "zeek/Attr.h" #include "zeek/CompHash.h" #include "zeek/Conn.h" +#include "zeek/DFA.h" #include "zeek/Desc.h" #include "zeek/Dict.h" #include "zeek/Expr.h" @@ -1442,6 +1443,13 @@ public: VectorValPtr Lookup(const StringValPtr& s); + void GetStats(detail::DFA_State_Cache_Stats* stats) const { + if ( matcher && matcher->DFA() ) + matcher->DFA()->Cache()->GetStats(stats); + else + *stats = {0}; + }; + private: void Build(); @@ -2030,6 +2038,13 @@ VectorValPtr TableVal::LookupPattern(const StringValPtr& s) { return pattern_matcher->Lookup(s); } +void TableVal::GetPatternMatcherStats(detail::DFA_State_Cache_Stats* stats) const { + if ( ! pattern_matcher ) + reporter->InternalError("GetPatternMatcherStats called on wrong table type"); + + return pattern_matcher->GetStats(stats); +} + bool TableVal::UpdateTimestamp(Val* index) { TableEntryVal* v; diff --git a/src/Val.h b/src/Val.h index 9f201be1b2..9fc026dc6f 100644 --- a/src/Val.h +++ b/src/Val.h @@ -53,6 +53,8 @@ class CompositeHash; class HashKey; class TablePatternMatcher; +struct DFA_State_Cache_Stats; + class ValTrace; class ZBody; class CPPRuntime; @@ -869,6 +871,10 @@ public: // Causes an internal error if called for any other kind of table. VectorValPtr LookupPattern(const StringValPtr& s); + // For a table[pattern], fill stats with information about + // the DFA's state for introspection. + void GetPatternMatcherStats(detail::DFA_State_Cache_Stats* stats) const; + // Sets the timestamp for the given index to network time. // Returns false if index does not exist. bool UpdateTimestamp(Val* index); diff --git a/src/zeek.bif b/src/zeek.bif index 4c70a6fcbf..f8358fec8c 100644 --- a/src/zeek.bif +++ b/src/zeek.bif @@ -5795,3 +5795,53 @@ function have_spicy_analyzers%(%) : bool %{ return zeek::val_mgr->Bool(USE_SPICY_ANALYZERS); %} + +%%{ +#include "zeek/DFA.h" +%%} + +## Return MatcherStats for a table[pattern] or set[pattern] value. +## +## This returns a MatcherStats objects that can be used for introspection +## of the DFA used for such a table. Statistics reset whenever elements are +## added or removed to the table as these operations result in the underlying +## DFA being rebuilt. +## +## This function iterates over all states of the DFA. Calling it at a high +## frequency is likely detrimental to performance. +## +## tbl: The table to get stats for. +## +## Returns: A record with matcher statistics. +function table_pattern_matcher_stats%(tbl: any%) : MatcherStats + %{ + static auto matcher_stats_type = zeek::id::find_type("MatcherStats"); + + const auto& type = tbl->GetType(); + if ( type->Tag() != zeek::TYPE_TABLE ) + { + zeek::emit_builtin_error("pattern-table_stats() requires a table argument"); + return nullptr; + } + + if ( ! type->AsTableType()->IsPatternIndex() ) + { + zeek::emit_builtin_error("pattern_table_stats() requires a single index of type pattern"); + return nullptr; + } + + zeek::detail::DFA_State_Cache::Stats stats; + tbl->AsTableVal()->GetPatternMatcherStats(&stats); + + auto result = zeek::make_intrusive(matcher_stats_type); + int n = 0; + result->Assign(n++, 1); // matchers + result->Assign(n++, stats.nfa_states); + result->Assign(n++, stats.dfa_states); + result->Assign(n++, stats.computed); + result->Assign(n++, stats.mem); + result->Assign(n++, stats.hits); + result->Assign(n++, stats.misses); + + return result; + %} diff --git a/testing/btest/Baseline/language.pattern-tables-stats/.stderr b/testing/btest/Baseline/language.pattern-tables-stats/.stderr new file mode 100644 index 0000000000..49d861c74c --- /dev/null +++ b/testing/btest/Baseline/language.pattern-tables-stats/.stderr @@ -0,0 +1 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. diff --git a/testing/btest/Baseline/language.pattern-tables-stats/out b/testing/btest/Baseline/language.pattern-tables-stats/out new file mode 100644 index 0000000000..c7646c68bc --- /dev/null +++ b/testing/btest/Baseline/language.pattern-tables-stats/out @@ -0,0 +1,12 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +initial stats, [matchers=1, nfa_states=0, dfa_states=0, computed=0, mem=0, hits=0, misses=0] +populated stats, [matchers=1, nfa_states=0, dfa_states=0, computed=0, mem=0, hits=0, misses=0] +[1], [], T, F +after lookup stats, [matchers=1, nfa_states=10, dfa_states=6, computed=6, mem=2368, hits=0, misses=6] +reset stats, [matchers=1, nfa_states=0, dfa_states=0, computed=0, mem=0, hits=0, misses=0] +[], [3], [1, 3], T, F +after more lookup stats, [matchers=1, nfa_states=34, dfa_states=13, computed=13, mem=7720, hits=0, misses=13] +reset stats after delete, [matchers=1, nfa_states=0, dfa_states=0, computed=0, mem=0, hits=0, misses=0] +[], [3], [1, 3] +after even more lookup stats, [matchers=1, nfa_states=29, dfa_states=13, computed=13, mem=7056, hits=0, misses=13] +reset after reassignment, [matchers=1, nfa_states=0, dfa_states=0, computed=0, mem=0, hits=0, misses=0] diff --git a/testing/btest/language/pattern-tables-stats.zeek b/testing/btest/language/pattern-tables-stats.zeek new file mode 100644 index 0000000000..1670dee86c --- /dev/null +++ b/testing/btest/language/pattern-tables-stats.zeek @@ -0,0 +1,32 @@ +# @TEST-DOC: Test table_pattern_matcher_stats() +# @TEST-EXEC: zeek -b %INPUT >out +# @TEST-EXEC: btest-diff out +# @TEST-EXEC: btest-diff .stderr + +global pt: table[pattern] of count; + +event zeek_init() + { + print "initial stats", table_pattern_matcher_stats(pt); + pt[/foo/] = 1; + print "populated stats", table_pattern_matcher_stats(pt); + + print pt["foo"], pt["foox"], "foo" in pt, "foox" in pt; + print "after lookup stats", table_pattern_matcher_stats(pt); + + pt[/bar/] = 2; + pt[/(foo|bletch)/] = 3; + print "reset stats", table_pattern_matcher_stats(pt); + + print pt["x"], pt["bletch"], sort(pt["foo"]), "foo" in pt, "x" in pt; + print "after more lookup stats", table_pattern_matcher_stats(pt); + + delete pt[/bar/]; + print "reset stats after delete", table_pattern_matcher_stats(pt); + + print pt["x"], pt["bletch"], sort(pt["foo"]); + print "after even more lookup stats", table_pattern_matcher_stats(pt); + + pt = table(); + print "reset after reassignment", table_pattern_matcher_stats(pt); + }