From 02c3ed82d7d523f2acb48e33914210c3b8320d3f Mon Sep 17 00:00:00 2001 From: Tim Wojtulewicz Date: Fri, 15 Sep 2023 17:22:10 -0700 Subject: [PATCH] Add Hash() and equality operators for various Val types --- src/OpaqueVal.h | 10 +++ src/RE.cc | 32 ++++++++ src/RE.h | 8 ++ src/Type.cc | 5 ++ src/Type.h | 2 + src/Val.cc | 197 ++++++++++++++++++++++++++++++++++++++++++++++++ src/Val.h | 76 ++++++++++++++++++- src/util.h | 8 ++ 8 files changed, 337 insertions(+), 1 deletion(-) diff --git a/src/OpaqueVal.h b/src/OpaqueVal.h index f41ed993fe..75a7f6bde0 100644 --- a/src/OpaqueVal.h +++ b/src/OpaqueVal.h @@ -136,6 +136,16 @@ public: */ static OpaqueValPtr UnserializeData(BrokerListView data); + size_t Hash() const override { + printf("opaqueval hash nop\n"); + return 0; + } + + bool IsSameAs(const Val& other) const override { + printf("opaqueval same nop\n"); + return false; + } + protected: friend class Val; friend class OpaqueMgr; diff --git a/src/RE.cc b/src/RE.cc index 5624d2350c..e106f97d4c 100644 --- a/src/RE.cc +++ b/src/RE.cc @@ -10,6 +10,7 @@ #include "zeek/EquivClass.h" #include "zeek/Reporter.h" #include "zeek/ZeekString.h" +#include "zeek/util.h" #include "zeek/3rdparty/doctest.h" @@ -451,6 +452,37 @@ void RE_Matcher::MakeSingleLine() { is_single_line = true; } +size_t RE_Matcher::Hash() const { + size_t h = std::hash{}(orig_text); + if ( re_anywhere ) + h = util::hash_combine(h, re_anywhere->Hash()); + if ( re_exact ) + h = util::hash_combine(h, re_exact->Hash()); + + return h; +} + +bool RE_Matcher::operator==(const RE_Matcher& other) const { + if ( orig_text != other.orig_text || is_case_insensitive != other.is_case_insensitive || + is_single_line != other.is_single_line ) + return false; + + if ( (! re_anywhere && other.re_anywhere) || (re_anywhere && ! other.re_anywhere) ) + return false; + + if ( re_anywhere && other.re_anywhere && *re_anywhere != *other.re_anywhere ) + return false; + + if ( (! re_exact && other.re_exact) || (re_exact && ! other.re_exact) ) + return false; + + if ( re_exact && other.re_exact && *re_exact != *other.re_exact ) + return false; + + return true; +} + + bool RE_Matcher::Compile(bool lazy) { return re_anywhere->Compile(lazy) && re_exact->Compile(lazy); } TEST_SUITE("re_matcher") { diff --git a/src/RE.h b/src/RE.h index e30c17fce8..4ba6da1f6b 100644 --- a/src/RE.h +++ b/src/RE.h @@ -131,6 +131,10 @@ public: void Dump(FILE* f); + size_t Hash() const { return std::hash{}(pattern_text); } + bool operator==(const Specific_RE_Matcher& other) { return pattern_text == other.pattern_text; } + bool operator!=(const Specific_RE_Matcher& other) { return pattern_text != other.pattern_text; } + protected: void AddAnywherePat(const char* pat); void AddExactPat(const char* pat); @@ -249,6 +253,10 @@ public: // the main ("explicit") constructor was used. const char* OrigText() const { return orig_text.c_str(); } + size_t Hash() const; + + bool operator==(const RE_Matcher& other) const; + protected: std::string orig_text; diff --git a/src/Type.cc b/src/Type.cc index d658d88ef5..b6a396c40d 100644 --- a/src/Type.cc +++ b/src/Type.cc @@ -241,6 +241,11 @@ void Type::DescribeReST(ODesc* d, bool roles_only) const { d->Add(util::fmt(":ze void Type::SetError() { tag = TYPE_ERROR; } +bool Type::operator==(const Type& other) const { + return (tag == other.tag) && (internal_tag == other.internal_tag) && (is_network_order == other.is_network_order) && + (base_type == other.base_type) && (name == other.name); +} + detail::TraversalCode Type::Traverse(detail::TraversalCallback* cb) const { auto tc = cb->PreType(this); HANDLE_TC_TYPE_PRE(tc); diff --git a/src/Type.h b/src/Type.h index f9449d101a..46a46a3b4a 100644 --- a/src/Type.h +++ b/src/Type.h @@ -301,6 +301,8 @@ public: return it->second.emplace(std::move(type)).second; } + bool operator==(const Type& type) const; + protected: virtual void DoDescribe(ODesc* d) const; diff --git a/src/Val.cc b/src/Val.cc index 403e24f166..c032b0002e 100644 --- a/src/Val.cc +++ b/src/Val.cc @@ -44,6 +44,7 @@ #include "zeek/broker/Manager.h" #include "zeek/broker/Store.h" #include "zeek/threading/formatters/detail/json.h" +#include "zeek/util.h" #include "zeek/3rdparty/doctest.h" @@ -703,6 +704,16 @@ ValPtr AddrVal::DoClone(CloneState* state) { return {NewRef{}, this}; } +size_t AddrVal::Hash() const { + auto h = addr_val->MakeHashKey(); + return h->Hash(); +} + +bool AddrVal::IsSameAs(const Val& other) const { + auto op = other.AsAddrVal(); + return *addr_val == *op->addr_val; +} + SubNetVal::SubNetVal(const char* text) : Val(base_type(TYPE_SUBNET)) { subnet_val = new IPPrefix(); @@ -771,6 +782,16 @@ ValPtr SubNetVal::DoClone(CloneState* state) { return {NewRef{}, this}; } +size_t SubNetVal::Hash() const { + auto h = subnet_val->MakeHashKey(); + return h->Hash(); +} + +bool SubNetVal::IsSameAs(const Val& other) const { + auto op = other.AsSubNetVal(); + return *subnet_val == *op->subnet_val; +} + StringVal::StringVal(String* s) : Val(base_type(TYPE_STRING)) { string_val = s; } // The following adds a NUL at the end. @@ -1320,6 +1341,13 @@ ValPtr StringVal::DoClone(CloneState* state) { new String((u_char*)string_val->Bytes(), string_val->Len(), true))); } +size_t StringVal::Hash() const { return std::hash{}(ToStdStringView()); } + +bool StringVal::IsSameAs(const Val& other) const { + auto so = other.AsStringVal(); + return so && Bstr_eq(string_val, so->string_val); +} + FuncVal::FuncVal(FuncPtr f) : Val(f->GetType()) { func_val = std::move(f); } FuncPtr FuncVal::AsFuncPtr() const { return func_val; } @@ -1330,6 +1358,16 @@ void FuncVal::ValDescribe(ODesc* d) const { func_val->Describe(d); } ValPtr FuncVal::DoClone(CloneState* state) { return make_intrusive(func_val->DoClone()); } +size_t FuncVal::Hash() const { + printf("funcval hash nop\n"); + return 0; +} + +bool FuncVal::IsSameAs(const Val& other) const { + printf("funcval same nop\n"); + return false; +} + FileVal::FileVal(FilePtr f) : Val(make_intrusive(base_type(TYPE_STRING))) { file_val = std::move(f); assert(file_val->GetType()->Tag() == TYPE_STRING); @@ -1355,6 +1393,16 @@ ValPtr FileVal::DoClone(CloneState* state) { return {NewRef{}, this}; } +size_t FileVal::Hash() const { + printf("fileval hash nop\n"); + return 0; +} + +bool FileVal::IsSameAs(const Val& other) const { + printf("fileval same nop\n"); + return false; +} + PatternVal::PatternVal(RE_Matcher* re) : Val(base_type(TYPE_PATTERN)) { re_val = re; } PatternVal::~PatternVal() { delete re_val; } @@ -1400,6 +1448,13 @@ ValPtr PatternVal::DoClone(CloneState* state) { return state->NewClone(this, make_intrusive(re)); } +size_t PatternVal::Hash() const { return re_val->Hash(); } + +bool PatternVal::IsSameAs(const Val& other) const { + auto op = other.AsPattern(); + return *re_val == *op; +} + ListVal::ListVal(TypeTag t) : Val(make_intrusive(t == TYPE_ANY ? nullptr : base_type(t))) { tag = t; } ListVal::ListVal(TypeListPtr tl, std::vector _vals) : Val(std::move(tl)) { @@ -1489,6 +1544,35 @@ unsigned int ListVal::ComputeFootprint(std::unordered_set* analyzed_ return fp; } +bool ListVal::IsSameAs(const Val& other) const { + // This will type-check the object as well. + auto lo = (&other)->AsListVal(); + + if ( ! lo || vals.size() != lo->vals.size() ) + return false; + + for ( size_t i = 0; i < vals.size(); i++ ) + if ( ! vals[i]->IsSameAs(*(lo->vals[i])) ) + return false; + + return true; +} + +std::size_t ListVal::Hash() const { + size_t hash = 0; + + for ( const auto& v : vals ) + hash = util::hash_combine(hash, v->Hash()); + + return hash; +} + +size_t ListValHasher::operator()(const zeek::IntrusivePtr& val) const noexcept { return val->Hash(); } + +bool ListValEqualTo::operator()(const IntrusivePtr& a, const IntrusivePtr& b) const noexcept { + return a->IsSameAs(*b); +} + TableEntryVal* TableEntryVal::Clone(Val::CloneState* state) { auto rval = new TableEntryVal(val ? val->Clone(state) : nullptr); rval->expire_access_time = expire_access_time; @@ -2919,6 +3003,24 @@ void TableVal::RebuildTable(ParseTimeTableState ptts) { Assign(std::move(key), std::move(val)); } +size_t TableVal::Hash() const { + size_t h = 0; + for ( auto it = table_val->begin(); it != table_val->end(); ++it ) { + auto v = it->value; + uint64_t k = *(uint32_t*)it->GetKey(); + + h = util::hash_combine(h, v->GetVal()->Hash()); + h = util::hash_combine(h, std::hash{}(k)); + } + + return h; +} + +bool TableVal::IsSameAs(const Val& other) const { + auto op = other.AsTableVal(); + return EqualTo(*op); +} + TableVal::ParseTimeTableStates TableVal::parse_time_table_states; TableVal::TableRecordDependencies TableVal::parse_time_table_record_dependencies; @@ -3203,6 +3305,50 @@ unsigned int RecordVal::ComputeFootprint(std::unordered_set* analyze return fp; } +size_t RecordVal::Hash() const { + const auto* rt = GetRecordType(); + size_t hash = 0; + + int num_fields = rt->NumFields(); + for ( int i = 0; i < num_fields; ++i ) { + auto rv_i = GetField(i); + + detail::Attributes* a = rt->FieldDecl(i)->attrs.get(); + bool optional_attr = (a && a->Find(detail::ATTR_OPTIONAL)); + + if ( ! rv_i || optional_attr ) + continue; + + hash = util::hash_combine(hash, rv_i->Hash()); + } + + return hash; +} + +bool RecordVal::IsSameAs(const Val& other) const { + auto ro = (&other)->AsRecordVal(); + + if ( ! ro || NumFields() != ro->NumFields() ) + return false; + + for ( size_t i = 0; i < NumFields(); i++ ) { + if ( HasField(i) != ro->HasField(i) ) + return false; + + auto f = GetField(i); + auto of = ro->GetField(i); + + if ( ! f && ! of ) + continue; + + if ( ! f || ! of || ! f->IsSameAs(*of) ) + return false; + } + + return true; +} + + ValPtr EnumVal::SizeVal() const { // Negative enums are rejected at parse time, but not internally. Handle the // negative case just like a signed integer, as that is an enum's underlying @@ -3227,6 +3373,12 @@ ValPtr EnumVal::DoClone(CloneState* state) { return {NewRef{}, this}; } +bool EnumVal::IsSameAs(const Val& other) const { + auto eo = other.AsEnumVal(); + + return eo && type == eo->type && int_val == eo->int_val; +} + void TypeVal::ValDescribe(ODesc* d) const { type->AsTypeType()->GetType()->Describe(d); } ValPtr TypeVal::DoClone(CloneState* state) { @@ -3234,6 +3386,16 @@ ValPtr TypeVal::DoClone(CloneState* state) { return {NewRef{}, this}; } +size_t TypeVal::Hash() const { + printf("typeval hash nop\n"); + return 0; +} + +bool TypeVal::IsSameAs(const Val& other) const { + auto op = other.AsTypeVal(); + return *type == *op->Get(); +} + VectorVal::VectorVal(VectorTypePtr t) : Val(t) { yield_type = t->Yield(); @@ -3748,6 +3910,41 @@ void VectorVal::ValDescribe(ODesc* d) const { d->Add("]"); } +size_t VectorVal::Hash() const { + size_t h = 0; + + for ( auto i = 0; i < Size(); i++ ) { + auto v = ValAt(i); + if ( v ) + h = util::hash_combine(h, v->Hash()); + else + h = util::hash_combine(h, 0); + } + + return h; +} + +bool VectorVal::IsSameAs(const Val& other) const { + auto op = other.AsVectorVal(); + + if ( Size() != op->Size() ) + return false; + if ( yield_type != op->RawYieldType() ) + return false; + + for ( auto i = 0; i < Size(); i++ ) { + auto v = ValAt(i); + auto o_v = op->ValAt(i); + + if ( v && o_v && ! v->IsSameAs(*o_v) ) + return false; + if ( v != o_v ) + return false; + } + + return true; +} + ValPtr check_and_promote(ValPtr v, const TypePtr& new_type, bool is_init, const detail::Location* expr_location) { if ( ! v ) return nullptr; diff --git a/src/Val.h b/src/Val.h index 9a8ecf3a02..420e4ec42b 100644 --- a/src/Val.h +++ b/src/Val.h @@ -264,6 +264,14 @@ public: return static_cast(this); } + /** + * Returns a hashed value of this Val, suitable for use when inserting a Val into + * a std container. + */ + virtual size_t Hash() const = 0; + + virtual bool IsSameAs(const Val& other) const = 0; + protected: // Friends with access to Clone(). friend class EnumType; @@ -382,6 +390,12 @@ public: zeek_int_t Get() const { return int_val; } + size_t Hash() const override { return int_val; } + bool IsSameAs(const Val& other) const override { + auto op = other.AsInt(); + return op == int_val; + } + protected: zeek_int_t int_val; }; @@ -392,6 +406,12 @@ public: zeek_uint_t Get() const { return uint_val; } + size_t Hash() const override { return std::hash{}(uint_val); } + bool IsSameAs(const Val& other) const override { + auto op = other.AsCount(); + return op == uint_val; + } + protected: zeek_uint_t uint_val; }; @@ -402,6 +422,12 @@ public: double Get() const { return double_val; } + size_t Hash() const override { return std::hash{}(double_val); } + bool IsSameAs(const Val& other) const override { + auto op = other.AsDouble(); + return op == double_val; + } + protected: double double_val; }; @@ -520,6 +546,9 @@ public: const IPAddr& Get() const { return *addr_val; } + size_t Hash() const override; + bool IsSameAs(const Val& other) const override; + protected: ValPtr DoClone(CloneState* state) override; @@ -547,6 +576,9 @@ public: const IPPrefix& Get() const { return *subnet_val; } + size_t Hash() const override; + bool IsSameAs(const Val& other) const override; + protected: void ValDescribe(ODesc* d) const override; ValPtr DoClone(CloneState* state) override; @@ -572,7 +604,7 @@ public: // Note that one needs to de-allocate the return value of // ExpandedString() to avoid a memory leak. // char* ExpandedString(int format = String::EXPANDED_STRING) - // { return AsString()->ExpandedString(format); } + // { return AsString()->ExpandedString(format); } std::string ToStdString() const; std::string_view ToStdStringView() const; @@ -582,6 +614,10 @@ public: StringValPtr Replace(RE_Matcher* re, const String& repl, bool do_all); + size_t Hash() const override; + + bool IsSameAs(const Val& other) const override; + protected: unsigned int ComputeFootprint(std::unordered_set* analyzed_vals) const override; @@ -602,6 +638,10 @@ public: Func* Get() const { return func_val.get(); } + size_t Hash() const override; + + bool IsSameAs(const Val& other) const override; + protected: void ValDescribe(ODesc* d) const override; ValPtr DoClone(CloneState* state) override; @@ -620,6 +660,10 @@ public: File* Get() const { return file_val.get(); } + size_t Hash() const override; + + bool IsSameAs(const Val& other) const override; + protected: void ValDescribe(ODesc* d) const override; ValPtr DoClone(CloneState* state) override; @@ -642,6 +686,10 @@ public: const RE_Matcher* Get() const { return re_val; } + size_t Hash() const override; + + bool IsSameAs(const Val& other) const override; + protected: void ValDescribe(ODesc* d) const override; ValPtr DoClone(CloneState* state) override; @@ -704,6 +752,10 @@ public: void Describe(ODesc* d) const override; + bool IsSameAs(const Val& other) const override; + + size_t Hash() const override; + protected: unsigned int ComputeFootprint(std::unordered_set* analyzed_vals) const override; @@ -713,6 +765,14 @@ protected: TypeTag tag; }; +struct ListValHasher { + size_t operator()(const IntrusivePtr& val) const noexcept; +}; + +struct ListValEqualTo { + bool operator()(const IntrusivePtr& a, const IntrusivePtr& b) const noexcept; +}; + class TableEntryVal { public: explicit TableEntryVal(ValPtr v) : val(std::move(v)) { @@ -1034,6 +1094,9 @@ public: */ void EnableChangeNotifications() { in_change_func = false; } + size_t Hash() const override; + bool IsSameAs(const Val& other) const override; + protected: void Init(TableTypePtr t, bool ordered = false); @@ -1613,6 +1676,9 @@ public: static void DoneParsing(); + size_t Hash() const override; + bool IsSameAs(const Val& other) const override; + protected: friend class zeek::logging::Manager; friend class zeek::detail::ValTrace; @@ -1724,6 +1790,8 @@ protected: void ValDescribe(ODesc* d) const override; ValPtr DoClone(CloneState* state) override; + + bool IsSameAs(const Val& other) const override; }; class TypeVal final : public Val { @@ -1735,6 +1803,9 @@ public: zeek::Type* Get() const { return type.get(); } + size_t Hash() const override; + bool IsSameAs(const Val& other) const override; + protected: void ValDescribe(ODesc* d) const override; ValPtr DoClone(CloneState* state) override; @@ -1866,6 +1937,9 @@ public: const auto& RawYieldType() const { return yield_type; } const auto& RawYieldTypes() const { return yield_types; } + size_t Hash() const override; + bool IsSameAs(const Val& other) const override; + protected: /** * Returns the element at a given index or nullptr if it does not exist. diff --git a/src/util.h b/src/util.h index 1b0e85d9fe..ccd2dc436e 100644 --- a/src/util.h +++ b/src/util.h @@ -613,5 +613,13 @@ inline std::vector split(const wchar_t* s, const wchar_t* del return split(std::wstring_view(s), std::wstring_view(delim)); } +inline size_t hash_combine(size_t h1, size_t h2) { + // Taken from Boost. See for example + // https://www.boost.org/doc/libs/1_35_0/doc/html/boost/hash_combine_id241013.html + // or + // https://stackoverflow.com/questions/4948780/magic-number-in-boosthash-combine + return h1 ^ (h2 + 0x9e3779b9 + (h1 << 6) + (h1 >> 2)); +} + } // namespace util } // namespace zeek