Expr/Val: Add support for in set[pattern]

This commit is contained in:
Arne Welzel 2023-11-14 11:33:29 +01:00
parent e39f280e3d
commit c113b9b297
8 changed files with 135 additions and 7 deletions

View file

@ -3815,12 +3815,12 @@ InExpr::InExpr(ExprPtr arg_op1, ExprPtr arg_op2) : BinaryExpr(EXPR_IN, std::move
} }
} }
// Support <string> in table[pattern] of X // Support <string> in table[pattern] / set[pattern]
if ( op1->GetType()->Tag() == TYPE_STRING ) { if ( op1->GetType()->Tag() == TYPE_STRING ) {
if ( op2->GetType()->Tag() == TYPE_TABLE ) { if ( op2->GetType()->Tag() == TYPE_TABLE ) {
const auto& table_type = op2->GetType()->AsTableType(); const auto& table_type = op2->GetType()->AsTableType();
if ( table_type->IsPatternIndex() && table_type->Yield() ) { if ( table_type->IsPatternIndex() ) {
SetType(base_type(TYPE_BOOL)); SetType(base_type(TYPE_BOOL));
return; return;
} }
@ -3868,8 +3868,9 @@ ValPtr InExpr::Fold(Val* v1, Val* v2) const {
else { else {
const auto& table_val = v2->AsTableVal(); const auto& table_val = v2->AsTableVal();
const auto& table_type = table_val->GetType<zeek::TableType>(); const auto& table_type = table_val->GetType<zeek::TableType>();
if ( table_type->IsPatternIndex() && table_type->Yield() && v1->GetType()->Tag() == TYPE_STRING ) // Special table[pattern] / set[pattern] in expression.
res = table_val->LookupPattern({NewRef{}, v1->AsStringVal()})->Size() > 0; if ( table_type->IsPatternIndex() && v1->GetType()->Tag() == TYPE_STRING )
res = table_val->MatchPattern({NewRef{}, v1->AsStringVal()});
else else
res = (bool)v2->AsTableVal()->Find({NewRef{}, v1}); res = (bool)v2->AsTableVal()->Find({NewRef{}, v1});
} }

View file

@ -1443,6 +1443,9 @@ public:
VectorValPtr Lookup(const StringValPtr& s); VectorValPtr Lookup(const StringValPtr& s);
// Delegate to matcher->MatchAll().
bool MatchAll(const StringValPtr& s);
void GetStats(detail::DFA_State_Cache_Stats* stats) const { void GetStats(detail::DFA_State_Cache_Stats* stats) const {
if ( matcher && matcher->DFA() ) if ( matcher && matcher->DFA() )
matcher->DFA()->Cache()->GetStats(stats); matcher->DFA()->Cache()->GetStats(stats);
@ -1490,6 +1493,17 @@ VectorValPtr detail::TablePatternMatcher::Lookup(const StringValPtr& s) {
return results; return results;
} }
bool detail::TablePatternMatcher::MatchAll(const StringValPtr& s) {
if ( ! matcher ) {
if ( tbl->Get()->Length() == 0 )
return false;
Build();
}
return matcher->MatchAll(s->AsString());
}
void detail::TablePatternMatcher::Build() { void detail::TablePatternMatcher::Build() {
matcher_yields.clear(); matcher_yields.clear();
matcher_yields.push_back(nullptr); matcher_yields.push_back(nullptr);
@ -1556,7 +1570,7 @@ void TableVal::Init(TableTypePtr t, bool ordered) {
else else
subnets = nullptr; subnets = nullptr;
if ( table_type->IsPatternIndex() && table_type->Yield() ) if ( table_type->IsPatternIndex() )
pattern_matcher = new detail::TablePatternMatcher(this, table_type->Yield()); pattern_matcher = new detail::TablePatternMatcher(this, table_type->Yield());
table_hash = new detail::CompositeHash(table_type->GetIndices()); table_hash = new detail::CompositeHash(table_type->GetIndices());
@ -1674,7 +1688,7 @@ bool TableVal::Assign(ValPtr index, ValPtr new_val, bool broker_forward, bool* i
} }
if ( pattern_matcher ) if ( pattern_matcher )
pattern_matcher->Insert(index->AsListVal()->Idx(0), new_val); pattern_matcher->Insert(index, new_val);
return Assign(std::move(index), std::move(k), std::move(new_val), broker_forward, iterators_invalidated); return Assign(std::move(index), std::move(k), std::move(new_val), broker_forward, iterators_invalidated);
} }
@ -2032,12 +2046,19 @@ TableValPtr TableVal::LookupSubnetValues(const SubNetVal* search) {
} }
VectorValPtr TableVal::LookupPattern(const StringValPtr& s) { VectorValPtr TableVal::LookupPattern(const StringValPtr& s) {
if ( ! pattern_matcher ) if ( ! pattern_matcher || ! GetType()->Yield() )
reporter->InternalError("LookupPattern called on wrong table type"); reporter->InternalError("LookupPattern called on wrong table type");
return pattern_matcher->Lookup(s); return pattern_matcher->Lookup(s);
} }
bool TableVal::MatchPattern(const StringValPtr& s) {
if ( ! pattern_matcher )
reporter->InternalError("LookupPattern called on wrong table type");
return pattern_matcher->MatchAll(s);
}
void TableVal::GetPatternMatcherStats(detail::DFA_State_Cache_Stats* stats) const { void TableVal::GetPatternMatcherStats(detail::DFA_State_Cache_Stats* stats) const {
if ( ! pattern_matcher ) if ( ! pattern_matcher )
reporter->InternalError("GetPatternMatcherStats called on wrong table type"); reporter->InternalError("GetPatternMatcherStats called on wrong table type");

View file

@ -871,6 +871,11 @@ public:
// Causes an internal error if called for any other kind of table. // Causes an internal error if called for any other kind of table.
VectorValPtr LookupPattern(const StringValPtr& s); VectorValPtr LookupPattern(const StringValPtr& s);
// For a table[pattern] or set[pattern], returns True if any of the
// patterns in the index matches the given string, else False.
// Causes an internal error if called for any other kind of table.
bool MatchPattern(const StringValPtr& s);
// For a table[pattern], fill stats with information about // For a table[pattern], fill stats with information about
// the DFA's state for introspection. // the DFA's state for introspection.
void GetPatternMatcherStats(detail::DFA_State_Cache_Stats* stats) const; void GetPatternMatcherStats(detail::DFA_State_Cache_Stats* stats) const;

View file

@ -0,0 +1 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.

View file

@ -0,0 +1,14 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
in empty, F
single insert, foo in, T
single insert, foox not-in, T
multiple inserts, x not-in, T
multiple insert, foo in, T
multiple insert, bletch in, T
multiple insert, foobletch not-in, T
single delete, bletch in, T
single delete, foo in, T
two deletes, bletch not-in, T
two deletes, foo not-in, T
two deletes, bar in, T
clear_table, bar not-in, T

View file

@ -10,3 +10,12 @@ reset stats after delete, [matchers=1, nfa_states=0, dfa_states=0, computed=0, m
[], [3], [1, 3] [], [3], [1, 3]
after even more lookup stats, [matchers=1, nfa_states=29, dfa_states=13, computed=13, mem=7056, hits=0, misses=13] after even more lookup stats, [matchers=1, nfa_states=29, dfa_states=13, computed=13, mem=7056, hits=0, misses=13]
reset after reassignment, [matchers=1, nfa_states=0, dfa_states=0, computed=0, mem=0, hits=0, misses=0] reset after reassignment, [matchers=1, nfa_states=0, dfa_states=0, computed=0, mem=0, hits=0, misses=0]
set initial stats, [matchers=1, nfa_states=0, dfa_states=0, computed=0, mem=0, hits=0, misses=0]
set populated stats, [matchers=1, nfa_states=0, dfa_states=0, computed=0, mem=0, hits=0, misses=0]
T, F
set after lookup stats, [matchers=1, nfa_states=10, dfa_states=6, computed=6, mem=2368, hits=0, misses=6]
set reset stats, [matchers=1, nfa_states=0, dfa_states=0, computed=0, mem=0, hits=0, misses=0]
F, T
set after more lookup stats, [matchers=1, nfa_states=24, dfa_states=9, computed=9, mem=5336, hits=0, misses=9]
set reset stats after delete, [matchers=1, nfa_states=24, dfa_states=9, computed=9, mem=5336, hits=0, misses=9]
set reset after reassignment, [matchers=1, nfa_states=0, dfa_states=0, computed=0, mem=0, hits=0, misses=0]

View file

@ -0,0 +1,53 @@
# @TEST-DOC: set[pattern] also supports parallel RE matching using in expression
# @TEST-EXEC: zeek -b %INPUT >out
# @TEST-EXEC: btest-diff out
# @TEST-EXEC: btest-diff .stderr
global ps: set[pattern];
event zeek_init()
{
assert "foo" !in ps;
print "in empty", "foo" in ps;
add ps[/foo/];
assert "foo" in ps;
assert "foox" !in ps;
print "single insert, foo in", "foo" in ps;
print "single insert, foox not-in", "foox" !in ps;
add ps[/bar/];
add ps[/(foo|bletch)/];
assert "x" !in ps;
assert "bar" in ps;
assert "foo" in ps;
assert "bletch" in ps;
assert "foobletch" !in ps;
print "multiple inserts, x not-in", "x" !in ps;
print "multiple insert, foo in", "foo" in ps;
print "multiple insert, bletch in", "bletch" in ps;
print "multiple insert, foobletch not-in", "foobletch" !in ps;
# After delete of /foo/, still matches "foo" due to /(foo|bletch)/
delete ps[/foo/];
assert "foo" in ps;
assert "bletch" in ps;
print "single delete, bletch in", "bletch" in ps;
print "single delete, foo in", "foo" in ps;
delete ps[/(foo|bletch)/];
assert "foo" !in ps;
assert "bar" in ps;
assert "bletch" !in ps;
print "two deletes, bletch not-in", "bletch" !in ps;
print "two deletes, foo not-in", "foo" !in ps;
print "two deletes, bar in", "bar" in ps;
clear_table(ps);
assert "bar" !in ps;
print "clear_table, bar not-in", "bar" !in ps;
}

View file

@ -4,6 +4,7 @@
# @TEST-EXEC: btest-diff .stderr # @TEST-EXEC: btest-diff .stderr
global pt: table[pattern] of count; global pt: table[pattern] of count;
global ps: set[pattern];
event zeek_init() event zeek_init()
{ {
@ -30,3 +31,26 @@ event zeek_init()
pt = table(); pt = table();
print "reset after reassignment", table_pattern_matcher_stats(pt); print "reset after reassignment", table_pattern_matcher_stats(pt);
} }
event zeek_init() &priority=-10
{
print "set initial stats", table_pattern_matcher_stats(ps);
add ps[/foo/];
print "set populated stats", table_pattern_matcher_stats(ps);
print "foo" in ps, "foox" in ps;
print "set after lookup stats", table_pattern_matcher_stats(ps);
add ps[/bar/];
add ps[/(foo|bletch)/];
print "set reset stats", table_pattern_matcher_stats(ps);
print "x" in ps, "bletch" in ps;
print "set after more lookup stats", table_pattern_matcher_stats(ps);
delete pt[/bar/];
print "set reset stats after delete", table_pattern_matcher_stats(ps);
ps = set();
print "set reset after reassignment", table_pattern_matcher_stats(pt);
}