From 4d58b98c7051546c4b94dcf4671c8c4fe642f7b9 Mon Sep 17 00:00:00 2001 From: Christian Kreibich Date: Thu, 16 Sep 2021 17:19:09 -0700 Subject: [PATCH] Refactor CompHash class to use new HashKey buffering features This preserves the previous hash key buffer layout (so the testsuite still passes) and overall approach but gets rid of the codepath for writing singleton serializations. This code path required a fourth switch block over all types (besides reads, writes, and size computation) and was inconsistent with the one for writing non-atomic types. --- src/CompHash.cc | 1364 +++++++++++++++++++++-------------------------- src/CompHash.h | 55 +- 2 files changed, 611 insertions(+), 808 deletions(-) diff --git a/src/CompHash.cc b/src/CompHash.cc index 837fb15e8d..50e8f446f6 100644 --- a/src/CompHash.cc +++ b/src/CompHash.cc @@ -46,7 +46,13 @@ const HashkeyMapPtr ordered_hashkeys(const TableVal* tv) for ( const auto& entry : *tbl ) { auto k = entry.GetHashKey(); - // Do we have to recreate here? Could we reuse the existing key? + // Potential optimization: we could do without the following if + // the caller uses k directly to determine key length & + // content. But: the way k got serialized might differ somewhat + // from how we'll end up doing it (e.g. singleton vs + // non-singleton), and looking up a table value with the hashkey + // is tricky in case of subnets (consider the special-casing in + // TableVal::Find()). auto lv = tv->RecreateIndex(*k); res->insert_or_assign(std::move(k), lv); } @@ -56,720 +62,95 @@ const HashkeyMapPtr ordered_hashkeys(const TableVal* tv) CompositeHash::CompositeHash(TypeListPtr composite_type) : type(std::move(composite_type)) { - singleton_tag = TYPE_INTERNAL_ERROR; - - // If the only element is a record or vector, don't treat it as a - // singleton, since it needs to be evaluated specially. - if ( type->GetTypes().size() == 1 ) - { - if ( type->GetTypes()[0]->Tag() == TYPE_RECORD || - type->GetTypes()[0]->Tag() == TYPE_VECTOR ) - { - is_complex_type = true; - is_singleton = false; - } - else - { - is_complex_type = false; - is_singleton = true; - } - } - - else - { - is_singleton = false; - is_complex_type = false; - } - - if ( is_singleton ) - { - // Don't do any further key computations - we'll do them - // via the singleton later. - singleton_tag = type->GetTypes()[0]->InternalType(); - size = 0; - key = nullptr; - } - - else - { - size = ComputeKeySize(nullptr, true, true); - - if ( size > 0 ) - // Fixed size. Make sure what we get is fully aligned. - key = reinterpret_cast(new double[size / sizeof(double) + 1]); - else - key = nullptr; - } - } - -CompositeHash::~CompositeHash() - { - delete[] key; - } - -// Computes the piece of the hash for Val*, returning the new kp. -char* CompositeHash::SingleValHash(bool type_check, char* kp0, Type* bt, Val* v, - bool optional) const - { - char* kp1 = nullptr; - InternalTypeTag t = bt->InternalType(); - - if ( optional ) - { - // Add a marker saying whether the optional field is set. - char* kp = AlignAndPadType(kp0); - *kp = (v ? 1 : 0); - kp0 = reinterpret_cast(kp + 1); - - if ( ! v ) - return kp0; - } - - if ( type_check ) - { - InternalTypeTag vt = v->GetType()->InternalType(); - if ( vt != t ) - return nullptr; - } - - switch ( t ) - { - case TYPE_INTERNAL_INT: - { - bro_int_t* kp = AlignAndPadType(kp0); - *kp = v->AsInt(); - kp1 = reinterpret_cast(kp + 1); - } - break; - - case TYPE_INTERNAL_UNSIGNED: - { - bro_uint_t* kp = AlignAndPadType(kp0); - *kp = v->AsCount(); - kp1 = reinterpret_cast(kp + 1); - } - break; - - case TYPE_INTERNAL_ADDR: - { - uint32_t* kp = AlignAndPadType(kp0); - v->AsAddr().CopyIPv6(kp); - kp1 = reinterpret_cast(kp + 4); - } - break; - - case TYPE_INTERNAL_SUBNET: - { - uint32_t* kp = AlignAndPadType(kp0); - v->AsSubNet().Prefix().CopyIPv6(kp); - kp[4] = v->AsSubNet().Length(); - kp1 = reinterpret_cast(kp + 5); - } - break; - - case TYPE_INTERNAL_DOUBLE: - { - double* kp = AlignAndPadType(kp0); - *kp = v->InternalDouble(); - kp1 = reinterpret_cast(kp + 1); - } - break; - - case TYPE_INTERNAL_VOID: - case TYPE_INTERNAL_OTHER: - { - switch ( v->GetType()->Tag() ) - { - case TYPE_FUNC: - { - uint32_t* kp = AlignAndPadType(kp0); - *kp = v->AsFunc()->GetUniqueFuncID(); - kp1 = reinterpret_cast(kp + 1); - break; - } - - case TYPE_PATTERN: - { - const char* texts[2] = {v->AsPattern()->PatternText(), - v->AsPattern()->AnywherePatternText()}; - - uint64_t* kp; - for ( int i = 0; i < 2; i++ ) - { - kp = AlignAndPadType(kp0 + i); - *kp = strlen(texts[i]) + 1; - } - - kp1 = reinterpret_cast(kp + 1); - for ( int i = 0; i < 2; i++ ) - { - memcpy(kp1, texts[i], strlen(texts[i]) + 1); - kp1 += strlen(texts[i]) + 1; - } - - break; - } - - case TYPE_RECORD: - { - char* kp = kp0; - RecordVal* rv = v->AsRecordVal(); - RecordType* rt = bt->AsRecordType(); - int num_fields = rt->NumFields(); - - for ( int i = 0; i < num_fields; ++i ) - { - auto rv_i = rv->GetField(i); - - Attributes* a = rt->FieldDecl(i)->attrs.get(); - bool optional_attr = (a && a->Find(ATTR_OPTIONAL)); - - if ( ! (rv_i || optional_attr) ) - return nullptr; - - if ( ! (kp = - SingleValHash(type_check, kp, rt->GetFieldType(i).get(), - rv_i.get(), optional_attr)) ) - return nullptr; - } - - kp1 = kp; - break; - } - - case TYPE_TABLE: - { - int* kp = AlignAndPadType(kp0); - TableVal* tv = v->AsTableVal(); - *kp = tv->Size(); - kp1 = reinterpret_cast(kp + 1); - - auto hashkeys = ordered_hashkeys(tv); - - for ( auto& kv : *hashkeys ) - { - auto key = kv.second; - - if ( ! (kp1 = SingleValHash(type_check, kp1, key->GetType().get(), - key.get(), false)) ) - return nullptr; - - if ( ! v->GetType()->IsSet() ) - { - auto val = tv->FindOrDefault(key); - - if ( ! (kp1 = - SingleValHash(type_check, kp1, val->GetType().get(), - val.get(), false)) ) - return nullptr; - } - } - } - break; - - case TYPE_VECTOR: - { - unsigned int* kp = AlignAndPadType(kp0); - VectorVal* vv = v->AsVectorVal(); - VectorType* vt = v->GetType()->AsVectorType(); - *kp = vv->Size(); - kp1 = reinterpret_cast(kp + 1); - for ( unsigned int i = 0; i < vv->Size(); ++i ) - { - auto val = vv->ValAt(i); - unsigned int* kp = AlignAndPadType(kp1); - *kp = i; - kp1 = reinterpret_cast(kp + 1); - kp = AlignAndPadType(kp1); - *kp = val ? 1 : 0; - kp1 = reinterpret_cast(kp + 1); - - if ( val ) - { - if ( ! (kp1 = SingleValHash(type_check, kp1, vt->Yield().get(), - val.get(), false)) ) - return nullptr; - } - } - } - break; - - case TYPE_LIST: - { - int* kp = AlignAndPadType(kp0); - ListVal* lv = v->AsListVal(); - *kp = lv->Length(); - kp1 = reinterpret_cast(kp + 1); - for ( int i = 0; i < lv->Length(); ++i ) - { - Val* entry_val = lv->Idx(i).get(); - if ( ! (kp1 = SingleValHash(type_check, kp1, - entry_val->GetType().get(), entry_val, - false)) ) - return nullptr; - } - } - break; - - default: - { - reporter->InternalError( - "bad index type in CompositeHash::SingleValHash"); - return nullptr; - } - } - - break; // case TYPE_INTERNAL_VOID/OTHER - } - - case TYPE_INTERNAL_STRING: - { - // Align to int for the length field. - int* kp = AlignAndPadType(kp0); - const String* sval = v->AsString(); - - *kp = sval->Len(); // so we can recover the value - - kp1 = reinterpret_cast(kp + 1); - - memcpy(kp1, sval->Bytes(), sval->Len()); - kp1 += sval->Len(); - } - break; - - case TYPE_INTERNAL_ERROR: - return nullptr; - } - - return kp1; + is_singleton = true; } std::unique_ptr CompositeHash::MakeHashKey(const Val& argv, bool type_check) const { - auto v = &argv; + auto res = std::make_unique(); + const auto& tl = type->GetTypes(); if ( is_singleton ) - return ComputeSingletonHash(v, type_check); - - if ( is_complex_type && v->GetType()->Tag() != TYPE_LIST ) { - ListVal lv(TYPE_ANY); + const Val* v = &argv; - // Cast away const to use ListVal - but since we - // re-introduce const on the recursive call, it should - // be okay; the only thing is that the ListVal unref's it. - Val* ncv = (Val*)v; - lv.Append({NewRef{}, ncv}); - return MakeHashKey(lv, type_check); + // This is the "singleton" case -- actually just a single value + // that may come bundled in a list. If so, unwrap it. + if ( v->GetType()->Tag() == TYPE_LIST ) + { + auto lv = v->AsListVal(); + + if ( type_check && lv->Length() != 1 ) + return nullptr; + + v = lv->Idx(0).get(); + } + + if ( SingleValHash(*res, v, tl[0].get(), type_check, false, true) ) + return res; + + return nullptr; } - char* k = key; - - if ( ! k ) - { - int sz = ComputeKeySize(v, type_check, false); - if ( sz == 0 ) - return nullptr; - - k = reinterpret_cast(new double[sz / sizeof(double) + 1]); - type_check = false; // no need to type-check again. - } - - const auto& tl = type->GetTypes(); - - if ( type_check && v->GetType()->Tag() != TYPE_LIST ) + if ( type_check && argv.GetType()->Tag() != TYPE_LIST ) return nullptr; - auto lv = v->AsListVal(); - - if ( type_check && lv->Length() != static_cast(tl.size()) ) + if ( ! ReserveKeySize(*res, &argv, type_check, false) ) return nullptr; - char* kp = k; + // Size computation has done requested type-checking, no further need + type_check = false; + + // The size computation resulted in a requested buffer size; allocate it. + res->Allocate(); + for ( auto i = 0u; i < tl.size(); ++i ) { - kp = SingleValHash(type_check, kp, tl[i].get(), lv->Idx(i).get(), false); - if ( ! kp ) + if ( ! SingleValHash(*res, argv.AsListVal()->Idx(i).get(), tl[i].get(), type_check, false, + false) ) return nullptr; } - return std::make_unique((k == key), (void*)k, kp - k); + return res; } -std::unique_ptr CompositeHash::ComputeSingletonHash(const Val* v, bool type_check) const - { - if ( v->GetType()->Tag() == TYPE_LIST ) - { - auto lv = v->AsListVal(); - - if ( type_check && lv->Length() != 1 ) - return nullptr; - - v = lv->Idx(0).get(); - } - - if ( type_check && v->GetType()->InternalType() != singleton_tag ) - return nullptr; - - switch ( singleton_tag ) - { - case TYPE_INTERNAL_INT: - return std::make_unique(v->AsInt()); - - case TYPE_INTERNAL_UNSIGNED: - return std::make_unique(v->AsCount()); - - case TYPE_INTERNAL_ADDR: - return v->AsAddr().MakeHashKey(); - - case TYPE_INTERNAL_SUBNET: - return v->AsSubNet().MakeHashKey(); - - case TYPE_INTERNAL_DOUBLE: - return std::make_unique(v->InternalDouble()); - - case TYPE_INTERNAL_VOID: - case TYPE_INTERNAL_OTHER: - if ( v->GetType()->Tag() == TYPE_FUNC ) - return std::make_unique(v->AsFunc()->GetUniqueFuncID()); - - if ( v->GetType()->Tag() == TYPE_PATTERN ) - { - const char* texts[2] = {v->AsPattern()->PatternText(), - v->AsPattern()->AnywherePatternText()}; - int n = strlen(texts[0]) + strlen(texts[1]) + 2; // 2 for null - char* key = new char[n]; - std::memcpy(key, texts[0], strlen(texts[0]) + 1); - std::memcpy(key + strlen(texts[0]) + 1, texts[1], strlen(texts[1]) + 1); - return std::make_unique(false, key, n); - } - - reporter->InternalError("bad index type in CompositeHash::ComputeSingletonHash"); - return nullptr; - - case TYPE_INTERNAL_STRING: - return std::make_unique(v->AsString()); - - case TYPE_INTERNAL_ERROR: - return nullptr; - - default: - reporter->InternalError("bad internal type in CompositeHash::ComputeSingletonHash"); - return nullptr; - } - } - -int CompositeHash::SingleTypeKeySize(Type* bt, const Val* v, bool type_check, int sz, bool optional, - bool calc_static_size) const - { - InternalTypeTag t = bt->InternalType(); - - if ( optional ) - sz = SizeAlign(sz, sizeof(char)); - - if ( type_check && v ) - { - InternalTypeTag vt = v->GetType()->InternalType(); - if ( vt != t ) - return 0; - } - - switch ( t ) - { - case TYPE_INTERNAL_INT: - case TYPE_INTERNAL_UNSIGNED: - sz = SizeAlign(sz, sizeof(bro_int_t)); - break; - - case TYPE_INTERNAL_ADDR: - sz = SizeAlign(sz, sizeof(uint32_t)); - sz += sizeof(uint32_t) * 3; // to make a total of 4 words - break; - - case TYPE_INTERNAL_SUBNET: - sz = SizeAlign(sz, sizeof(uint32_t)); - sz += sizeof(uint32_t) * 4; // to make a total of 5 words - break; - - case TYPE_INTERNAL_DOUBLE: - sz = SizeAlign(sz, sizeof(double)); - break; - - case TYPE_INTERNAL_VOID: - case TYPE_INTERNAL_OTHER: - { - switch ( bt->Tag() ) - { - case TYPE_FUNC: - { - sz = SizeAlign(sz, sizeof(uint32_t)); - break; - } - - case TYPE_PATTERN: - { - if ( ! v ) - return (optional && ! calc_static_size) ? sz : 0; - - sz = SizeAlign(sz, 2 * sizeof(uint64_t)); - sz += strlen(v->AsPattern()->PatternText()) + - strlen(v->AsPattern()->AnywherePatternText()) + - 2; // 2 for null terminators - break; - } - - case TYPE_RECORD: - { - if ( ! v ) - return (optional && ! calc_static_size) ? sz : 0; - - const RecordVal* rv = v->AsRecordVal(); - RecordType* rt = bt->AsRecordType(); - int num_fields = rt->NumFields(); - - for ( int i = 0; i < num_fields; ++i ) - { - Attributes* a = rt->FieldDecl(i)->attrs.get(); - bool optional_attr = (a && a->Find(ATTR_OPTIONAL)); - - auto rv_v = rv ? rv->GetField(i) : nullptr; - sz = SingleTypeKeySize(rt->GetFieldType(i).get(), rv_v.get(), - type_check, sz, optional_attr, - calc_static_size); - if ( ! sz ) - return 0; - } - - break; - } - - case TYPE_TABLE: - { - if ( ! v ) - return (optional && ! calc_static_size) ? sz : 0; - - sz = SizeAlign(sz, sizeof(int)); - auto tv = v->AsTableVal(); - auto hashkeys = ordered_hashkeys(tv); - - for ( auto& kv : *hashkeys ) - { - auto key = kv.second; - sz = SingleTypeKeySize(key->GetType().get(), key.get(), type_check, - sz, false, calc_static_size); - if ( ! sz ) - return 0; - - if ( ! bt->IsSet() ) - { - auto val = const_cast(tv)->FindOrDefault(key); - sz = SingleTypeKeySize(val->GetType().get(), val.get(), - type_check, sz, false, calc_static_size); - if ( ! sz ) - return 0; - } - } - - break; - } - - case TYPE_VECTOR: - { - if ( ! v ) - return (optional && ! calc_static_size) ? sz : 0; - - sz = SizeAlign(sz, sizeof(unsigned int)); - VectorVal* vv = const_cast(v->AsVectorVal()); - for ( unsigned int i = 0; i < vv->Size(); ++i ) - { - auto val = vv->ValAt(i); - sz = SizeAlign(sz, sizeof(unsigned int)); - sz = SizeAlign(sz, sizeof(unsigned int)); - if ( val ) - sz = SingleTypeKeySize(bt->AsVectorType()->Yield().get(), - val.get(), type_check, sz, false, - calc_static_size); - if ( ! sz ) - return 0; - } - - break; - } - - case TYPE_LIST: - { - if ( ! v ) - return (optional && ! calc_static_size) ? sz : 0; - - sz = SizeAlign(sz, sizeof(int)); - ListVal* lv = const_cast(v->AsListVal()); - for ( int i = 0; i < lv->Length(); ++i ) - { - sz = - SingleTypeKeySize(lv->Idx(i)->GetType().get(), lv->Idx(i).get(), - type_check, sz, false, calc_static_size); - if ( ! sz ) - return 0; - } - - break; - } - - default: - { - reporter->InternalError( - "bad index type in CompositeHash::CompositeHash"); - return 0; - } - } - - break; // case TYPE_INTERNAL_VOID/OTHER - } - - case TYPE_INTERNAL_STRING: - if ( ! v ) - return (optional && ! calc_static_size) ? sz : 0; - - // Factor in length field. - sz = SizeAlign(sz, sizeof(int)); - sz += v->AsString()->Len(); - break; - - case TYPE_INTERNAL_ERROR: - return 0; - } - - return sz; - } - -int CompositeHash::ComputeKeySize(const Val* v, bool type_check, bool calc_static_size) const - { - const auto& tl = type->GetTypes(); - - if ( v ) - { - if ( type_check && v->GetType()->Tag() != TYPE_LIST ) - return 0; - - auto lv = v->AsListVal(); - - if ( type_check && lv->Length() != static_cast(tl.size()) ) - return 0; - } - - int sz = 0; - for ( auto i = 0u; i < tl.size(); ++i ) - { - sz = SingleTypeKeySize(tl[i].get(), v ? v->AsListVal()->Idx(i).get() : nullptr, type_check, - sz, false, calc_static_size); - if ( ! sz ) - return 0; - } - - return sz; - } - -namespace - { -inline bool is_power_of_2(bro_uint_t x) - { - return ((x - 1) & x) == 0; - } - } - -const void* CompositeHash::Align(const char* ptr, unsigned int size) const - { - if ( ! size ) - return ptr; - - ASSERT(is_power_of_2(size)); - - unsigned int mask = size - 1; // Assume size is a power of 2. - unsigned long l_ptr = reinterpret_cast(ptr); - unsigned long offset = l_ptr & mask; - - if ( offset > 0 ) - return reinterpret_cast(ptr - offset + size); - else - return reinterpret_cast(ptr); - } - -void* CompositeHash::AlignAndPad(char* ptr, unsigned int size) const - { - if ( ! size ) - return ptr; - - ASSERT(is_power_of_2(size)); - - unsigned int mask = size - 1; // Assume size is a power of 2. - while ( (reinterpret_cast(ptr) & mask) != 0 ) - // Not aligned - zero pad. - *ptr++ = '\0'; - - return reinterpret_cast(ptr); - } - -int CompositeHash::SizeAlign(int offset, unsigned int size) const - { - if ( ! size ) - return offset; - - ASSERT(is_power_of_2(size)); - - unsigned int mask = size - 1; // Assume size is a power of 2. - if ( offset & mask ) - { - offset &= ~mask; // Round down. - offset += size; // Round up. - } - - offset += size; // Add in size. - - return offset; - } - -ListValPtr CompositeHash::RecoverVals(const HashKey& k) const +ListValPtr CompositeHash::RecoverVals(const HashKey& hk) const { auto l = make_intrusive(TYPE_ANY); const auto& tl = type->GetTypes(); - const char* kp = (const char*)k.Key(); - const char* const k_end = kp + k.Size(); + + hk.ResetRead(); for ( const auto& type : tl ) { ValPtr v; - kp = RecoverOneVal(k, kp, k_end, type.get(), &v, false); + + if ( ! RecoverOneVal(hk, type.get(), &v, false, is_singleton) ) + reporter->InternalError("value recovery failure in CompositeHash::RecoverVals"); + ASSERT(v); l->Append(std::move(v)); } - if ( kp != k_end ) - reporter->InternalError("under-ran key in CompositeHash::DescribeKey %zd", k_end - kp); - return l; } -const char* CompositeHash::RecoverOneVal(const HashKey& k, const char* kp0, const char* const k_end, - Type* t, ValPtr* pval, bool optional) const +bool CompositeHash::RecoverOneVal(const HashKey& hk, Type* t, ValPtr* pval, bool optional, + bool singleton) const { - // k->Size() == 0 for a single empty string. - if ( kp0 >= k_end && k.Size() > 0 ) - reporter->InternalError("over-ran key in CompositeHash::RecoverVals"); - TypeTag tag = t->Tag(); InternalTypeTag it = t->InternalType(); - const char* kp1 = nullptr; if ( optional ) { - const char* kp = AlignType(kp0); - kp0 = kp1 = reinterpret_cast(kp + 1); + bool opt; + hk.Read("optional", opt); - if ( ! *kp ) + if ( ! opt ) { *pval = nullptr; - return kp0; + return true; } } @@ -777,68 +158,69 @@ const char* CompositeHash::RecoverOneVal(const HashKey& k, const char* kp0, cons { case TYPE_INTERNAL_INT: { - const bro_int_t* const kp = AlignType(kp0); - kp1 = reinterpret_cast(kp + 1); + bro_int_t i; + hk.Read("int", i); if ( tag == TYPE_ENUM ) - *pval = t->AsEnumType()->GetEnumVal(*kp); + *pval = t->AsEnumType()->GetEnumVal(i); else if ( tag == TYPE_BOOL ) - *pval = val_mgr->Bool(*kp); + *pval = val_mgr->Bool(i); else if ( tag == TYPE_INT ) - *pval = val_mgr->Int(*kp); + *pval = val_mgr->Int(i); else { reporter->InternalError( "bad internal unsigned int in CompositeHash::RecoverOneVal()"); *pval = nullptr; + return false; } } break; case TYPE_INTERNAL_UNSIGNED: { - const bro_uint_t* const kp = AlignType(kp0); - kp1 = reinterpret_cast(kp + 1); + bro_uint_t u; + hk.Read("unsigned", u); switch ( tag ) { case TYPE_COUNT: - *pval = val_mgr->Count(*kp); + *pval = val_mgr->Count(u); break; case TYPE_PORT: - *pval = val_mgr->Port(*kp); + *pval = val_mgr->Port(u); break; default: reporter->InternalError( "bad internal unsigned int in CompositeHash::RecoverOneVal()"); *pval = nullptr; - break; + return false; } } break; case TYPE_INTERNAL_DOUBLE: { - const double* const kp = AlignType(kp0); - kp1 = reinterpret_cast(kp + 1); + double d; + hk.Read("double", d); if ( tag == TYPE_INTERVAL ) - *pval = make_intrusive(*kp, 1.0); + *pval = make_intrusive(d, 1.0); else if ( tag == TYPE_TIME ) - *pval = make_intrusive(*kp); + *pval = make_intrusive(d); else - *pval = make_intrusive(*kp); + *pval = make_intrusive(d); } break; case TYPE_INTERNAL_ADDR: { - const uint32_t* const kp = AlignType(kp0); - kp1 = reinterpret_cast(kp + 4); - - IPAddr addr(IPv6, kp, IPAddr::Network); + hk.AlignRead(sizeof(uint32_t)); + hk.EnsureReadSpace(sizeof(uint32_t) * 4); + IPAddr addr(IPv6, static_cast(hk.KeyAtRead()), IPAddr::Network); + hk.SkipRead("addr", sizeof(uint32_t) * 4); switch ( tag ) { @@ -850,16 +232,21 @@ const char* CompositeHash::RecoverOneVal(const HashKey& k, const char* kp0, cons reporter->InternalError( "bad internal address in CompositeHash::RecoverOneVal()"); *pval = nullptr; - break; + return false; } } break; case TYPE_INTERNAL_SUBNET: { - const uint32_t* const kp = AlignType(kp0); - kp1 = reinterpret_cast(kp + 5); - *pval = make_intrusive(kp, kp[4]); + hk.AlignRead(sizeof(uint32_t)); + hk.EnsureReadSpace(sizeof(uint32_t) * 4); + IPAddr addr(IPv6, static_cast(hk.KeyAtRead()), IPAddr::Network); + hk.SkipRead("subnet", sizeof(uint32_t) * 4); + + uint32_t width; + hk.Read("subnet-width", width); + *pval = make_intrusive(addr, width); } break; @@ -870,16 +257,15 @@ const char* CompositeHash::RecoverOneVal(const HashKey& k, const char* kp0, cons { case TYPE_FUNC: { - const uint32_t* const kp = AlignType(kp0); - kp1 = reinterpret_cast(kp + 1); - - const auto& f = Func::GetFuncPtrByID(*kp); + uint32_t id; + hk.Read("func", id); + const auto& f = Func::GetFuncPtrByID(id); if ( ! f ) reporter->InternalError( "failed to look up unique function id %" PRIu32 " in CompositeHash::RecoverOneVal()", - *kp); + id); *pval = make_intrusive(f); const auto& pvt = (*pval)->GetType(); @@ -889,37 +275,43 @@ const char* CompositeHash::RecoverOneVal(const HashKey& k, const char* kp0, cons "bad aggregate Val in CompositeHash::RecoverOneVal()"); else if ( t->Tag() != TYPE_FUNC && ! same_type(pvt, t) ) - // ### Maybe fix later, but may be fundamentally - // un-checkable --US + // ### Maybe fix later, but may be fundamentally un-checkable --US + { reporter->InternalError( "inconsistent aggregate Val in CompositeHash::RecoverOneVal()"); + *pval = nullptr; + return false; + } // ### A crude approximation for now. else if ( t->Tag() == TYPE_FUNC && pvt->Tag() != TYPE_FUNC ) + { reporter->InternalError( "inconsistent aggregate Val in CompositeHash::RecoverOneVal()"); + *pval = nullptr; + return false; + } } break; case TYPE_PATTERN: { - RE_Matcher* re = nullptr; - if ( is_singleton ) - { - kp1 = kp0; - int divider = strlen(kp0) + 1; - re = new RE_Matcher(kp1, kp1 + divider); - kp1 += k.Size(); - } - else - { - const uint64_t* const len = AlignType(kp0); + const char* texts[2] = {nullptr, nullptr}; + uint64_t lens[2] = {0, 0}; - kp1 = reinterpret_cast(len + 2); - re = new RE_Matcher(kp1, kp1 + len[0]); - kp1 += len[0] + len[1]; + if ( ! singleton ) + { + hk.Read("pattern-len1", lens[0]); + hk.Read("pattern-len2", lens[1]); } + texts[0] = static_cast(hk.KeyAtRead()); + hk.SkipRead("pattern-string1", strlen(texts[0]) + 1); + texts[1] = static_cast(hk.KeyAtRead()); + hk.SkipRead("pattern-string2", strlen(texts[1]) + 1); + + RE_Matcher* re = new RE_Matcher(texts[0], texts[1]); + if ( ! re->Compile() ) reporter->InternalError( "failed compiling table/set key pattern: %s", @@ -931,8 +323,7 @@ const char* CompositeHash::RecoverOneVal(const HashKey& k, const char* kp0, cons case TYPE_RECORD: { - const char* kp = kp0; - RecordType* rt = t->AsRecordType(); + auto rt = t->AsRecordType(); int num_fields = rt->NumFields(); std::vector values; @@ -940,12 +331,15 @@ const char* CompositeHash::RecoverOneVal(const HashKey& k, const char* kp0, cons for ( i = 0; i < num_fields; ++i ) { ValPtr v; - Attributes* a = rt->FieldDecl(i)->attrs.get(); bool optional = (a && a->Find(ATTR_OPTIONAL)); - kp = RecoverOneVal(k, kp, k_end, rt->GetFieldType(i).get(), &v, - optional); + if ( ! RecoverOneVal(hk, rt->GetFieldType(i).get(), &v, optional, + false) ) + { + *pval = nullptr; + return false; + } // An earlier call to reporter->InternalError would have called // abort() and broken the call tree that clang-tidy is relying on to @@ -955,8 +349,8 @@ const char* CompositeHash::RecoverOneVal(const HashKey& k, const char* kp0, cons { reporter->InternalError( "didn't recover expected number of fields from HashKey"); - pval = nullptr; - break; + *pval = nullptr; + return false; } values.emplace_back(std::move(v)); @@ -970,32 +364,37 @@ const char* CompositeHash::RecoverOneVal(const HashKey& k, const char* kp0, cons rv->Assign(i, std::move(values[i])); *pval = std::move(rv); - kp1 = kp; } break; case TYPE_TABLE: { int n; - const int* const kp = AlignType(kp0); - n = *kp; - kp1 = reinterpret_cast(kp + 1); - TableType* tt = t->AsTableType(); + hk.Read("table-size", n); + auto tt = t->AsTableType(); auto tv = make_intrusive(IntrusivePtr{NewRef{}, tt}); for ( int i = 0; i < n; ++i ) { ValPtr key; - kp1 = RecoverOneVal(k, kp1, k_end, tt->GetIndices().get(), &key, - false); + if ( ! RecoverOneVal(hk, tt->GetIndices().get(), &key, false, + false) ) + { + *pval = nullptr; + return false; + } if ( t->IsSet() ) tv->Assign(std::move(key), nullptr); else { ValPtr value; - kp1 = RecoverOneVal(k, kp1, k_end, tt->Yield().get(), &value, - false); + if ( ! RecoverOneVal(hk, tt->Yield().get(), &value, false, + false) ) + { + *pval = nullptr; + return false; + } tv->Assign(std::move(key), std::move(value)); } } @@ -1007,25 +406,24 @@ const char* CompositeHash::RecoverOneVal(const HashKey& k, const char* kp0, cons case TYPE_VECTOR: { unsigned int n; - const unsigned int* kp = AlignType(kp0); - n = *kp; - kp1 = reinterpret_cast(kp + 1); - VectorType* vt = t->AsVectorType(); + hk.Read("vector-size", n); + auto vt = t->AsVectorType(); auto vv = make_intrusive(IntrusivePtr{NewRef{}, vt}); for ( unsigned int i = 0; i < n; ++i ) { - kp = AlignType(kp1); - unsigned int index = *kp; - kp1 = reinterpret_cast(kp + 1); - kp = AlignType(kp1); - unsigned int have_val = *kp; - kp1 = reinterpret_cast(kp + 1); + unsigned int index; + hk.Read("vector-idx", index); + bool have_val; + hk.Read("vector-idx-present", have_val); ValPtr value; - if ( have_val ) - kp1 = RecoverOneVal(k, kp1, k_end, vt->Yield().get(), &value, - false); + if ( have_val && + ! RecoverOneVal(hk, vt->Yield().get(), &value, false, false) ) + { + *pval = nullptr; + return false; + } vv->Assign(index, std::move(value)); } @@ -1037,17 +435,16 @@ const char* CompositeHash::RecoverOneVal(const HashKey& k, const char* kp0, cons case TYPE_LIST: { int n; - const int* const kp = AlignType(kp0); - n = *kp; - kp1 = reinterpret_cast(kp + 1); - TypeList* tl = t->AsTypeList(); + hk.Read("list-size", n); + auto tl = t->AsTypeList(); auto lv = make_intrusive(TYPE_ANY); for ( int i = 0; i < n; ++i ) { ValPtr v; Type* it = tl->GetTypes()[i].get(); - kp1 = RecoverOneVal(k, kp1, k_end, it, &v, false); + if ( ! RecoverOneVal(hk, it, &v, false, false) ) + return false; lv->Append(std::move(v)); } @@ -1057,7 +454,10 @@ const char* CompositeHash::RecoverOneVal(const HashKey& k, const char* kp0, cons default: { - reporter->InternalError("bad index type in CompositeHash::DescribeKey"); + reporter->InternalError( + "bad index type in CompositeHash::RecoverOneVal"); + *pval = nullptr; + return false; } } } @@ -1065,24 +465,17 @@ const char* CompositeHash::RecoverOneVal(const HashKey& k, const char* kp0, cons case TYPE_INTERNAL_STRING: { - // There is a minor issue here -- the pointer does not have to - // be aligned by int in the singleton case. + int n = hk.Size(); - int n; - if ( is_singleton ) + if ( ! singleton ) { - kp1 = kp0; - n = k.Size(); - } - else - { - const int* const kp = AlignType(kp0); - n = *kp; - kp1 = reinterpret_cast(kp + 1); + hk.Read("string-len", n); + hk.EnsureReadSpace(n); } - *pval = make_intrusive(new String((const byte_vec)kp1, n, true)); - kp1 += n; + *pval = + make_intrusive(new String((const byte_vec)hk.KeyAtRead(), n, true)); + hk.SkipRead("string", n); } break; @@ -1090,7 +483,450 @@ const char* CompositeHash::RecoverOneVal(const HashKey& k, const char* kp0, cons break; } - return kp1; + return true; + } + +bool CompositeHash::SingleValHash(HashKey& hk, const Val* v, Type* bt, bool type_check, + bool optional, bool singleton) const + { + InternalTypeTag t = bt->InternalType(); + + if ( type_check && v ) + { + InternalTypeTag vt = v->GetType()->InternalType(); + if ( vt != t ) + return false; + } + + if ( optional ) + { + // Add a marker saying whether the optional field is set. + hk.Write("optional", v != nullptr); + + if ( ! v ) + return true; + } + + switch ( t ) + { + case TYPE_INTERNAL_INT: + hk.Write("int", v->AsInt()); + break; + + case TYPE_INTERNAL_UNSIGNED: + hk.Write("unsigned", v->AsCount()); + break; + + case TYPE_INTERNAL_ADDR: + if ( ! EnsureTypeReserve(hk, v, bt, type_check) ) + return false; + + hk.AlignWrite(sizeof(uint32_t)); + hk.EnsureWriteSpace(sizeof(uint32_t) * 4); + v->AsAddr().CopyIPv6(static_cast(hk.KeyAtWrite())); + hk.SkipWrite("addr", sizeof(uint32_t) * 4); + break; + + case TYPE_INTERNAL_SUBNET: + if ( ! EnsureTypeReserve(hk, v, bt, type_check) ) + return false; + + hk.AlignWrite(sizeof(uint32_t)); + hk.EnsureWriteSpace(sizeof(uint32_t) * 5); + v->AsSubNet().Prefix().CopyIPv6(static_cast(hk.KeyAtWrite())); + hk.SkipWrite("subnet", sizeof(uint32_t) * 4); + hk.Write("subnet-width", v->AsSubNet().Length()); + break; + + case TYPE_INTERNAL_DOUBLE: + hk.Write("double", v->InternalDouble()); + break; + + case TYPE_INTERNAL_VOID: + case TYPE_INTERNAL_OTHER: + { + switch ( v->GetType()->Tag() ) + { + case TYPE_FUNC: + hk.Write("func", v->AsFunc()->GetUniqueFuncID()); + break; + + case TYPE_PATTERN: + { + const char* texts[2] = {v->AsPattern()->PatternText(), + v->AsPattern()->AnywherePatternText()}; + uint64_t lens[2] = {strlen(texts[0]) + 1, strlen(texts[1]) + 1}; + + if ( ! singleton ) + { + hk.Write("pattern-len1", lens[0]); + hk.Write("pattern-len2", lens[1]); + } + else + { + hk.Reserve("pattern", lens[0] + lens[1]); + hk.Allocate(); + } + + hk.Write("pattern-string1", static_cast(texts[0]), + lens[0]); + hk.Write("pattern-string2", static_cast(texts[1]), + lens[1]); + break; + } + + case TYPE_RECORD: + { + auto rv = v->AsRecordVal(); + auto rt = bt->AsRecordType(); + int num_fields = rt->NumFields(); + + if ( ! EnsureTypeReserve(hk, v, bt, type_check) ) + return false; + + for ( int i = 0; i < num_fields; ++i ) + { + auto rv_i = rv->GetField(i); + + Attributes* a = rt->FieldDecl(i)->attrs.get(); + bool optional_attr = (a && a->Find(ATTR_OPTIONAL)); + + if ( ! (rv_i || optional_attr) ) + return false; + + if ( ! SingleValHash(hk, rv_i.get(), rt->GetFieldType(i).get(), + type_check, optional_attr, false) ) + return false; + } + break; + } + + case TYPE_TABLE: + { + if ( ! EnsureTypeReserve(hk, v, bt, type_check) ) + return false; + + auto tv = v->AsTableVal(); + auto hashkeys = ordered_hashkeys(tv); + + hk.Write("table-size", tv->Size()); + + for ( auto& kv : *hashkeys ) + { + auto key = kv.second; + + if ( ! SingleValHash(hk, key.get(), key->GetType().get(), + type_check, false, false) ) + return false; + + if ( ! v->GetType()->IsSet() ) + { + auto val = const_cast(tv)->FindOrDefault(key); + + if ( ! SingleValHash(hk, val.get(), val->GetType().get(), + type_check, false, false) ) + return false; + } + } + } + break; + + case TYPE_VECTOR: + { + if ( ! EnsureTypeReserve(hk, v, bt, type_check) ) + return false; + + auto vv = v->AsVectorVal(); + auto vt = v->GetType()->AsVectorType(); + + hk.Write("vector-size", vv->Size()); + + for ( unsigned int i = 0; i < vv->Size(); ++i ) + { + auto val = vv->ValAt(i); + hk.Write("vector-idx", i); + hk.Write("vector-idx-present", val != nullptr); + + if ( val && ! SingleValHash(hk, val.get(), vt->Yield().get(), + type_check, false, false) ) + return false; + } + } + break; + + case TYPE_LIST: + { + if ( ! hk.IsAllocated() ) + { + if ( ! ReserveSingleTypeKeySize(hk, bt, v, type_check, false, false, + false) ) + return false; + + hk.Allocate(); + } + + auto lv = v->AsListVal(); + + hk.Write("list-size", lv->Length()); + + for ( int i = 0; i < lv->Length(); ++i ) + { + Val* entry_val = lv->Idx(i).get(); + if ( ! SingleValHash(hk, entry_val, entry_val->GetType().get(), + type_check, false, false) ) + return false; + } + } + break; + + default: + { + reporter->InternalError( + "bad index type in CompositeHash::SingleValHash"); + return false; + } + } + + break; // case TYPE_INTERNAL_VOID/OTHER + } + + case TYPE_INTERNAL_STRING: + { + if ( ! EnsureTypeReserve(hk, v, bt, type_check) ) + return false; + + const auto sval = v->AsString(); + + if ( ! singleton ) + hk.Write("string-len", sval->Len()); + + hk.Write("string", sval->Bytes(), sval->Len()); + } + break; + + case TYPE_INTERNAL_ERROR: + return false; + } + + return true; + } + +bool CompositeHash::EnsureTypeReserve(HashKey& hk, const Val* v, Type* bt, bool type_check) const + { + if ( hk.IsAllocated() ) + return true; + + if ( ! ReserveSingleTypeKeySize(hk, bt, v, type_check, false, false, true) ) + return false; + + hk.Allocate(); + return true; + } + +bool CompositeHash::ReserveKeySize(HashKey& hk, const Val* v, bool type_check, + bool calc_static_size) const + { + const auto& tl = type->GetTypes(); + + for ( auto i = 0u; i < tl.size(); ++i ) + { + if ( ! ReserveSingleTypeKeySize(hk, tl[i].get(), v ? v->AsListVal()->Idx(i).get() : nullptr, + type_check, false, calc_static_size, is_singleton) ) + return false; + } + + return true; + } + +bool CompositeHash::ReserveSingleTypeKeySize(HashKey& hk, Type* bt, const Val* v, bool type_check, + bool optional, bool calc_static_size, + bool singleton) const + { + InternalTypeTag t = bt->InternalType(); + + if ( optional ) + { + hk.ReserveType("optional"); + if ( ! v ) + return true; + } + + if ( type_check && v ) + { + InternalTypeTag vt = v->GetType()->InternalType(); + if ( vt != t ) + return false; + } + + switch ( t ) + { + case TYPE_INTERNAL_INT: + hk.ReserveType("int"); + break; + + case TYPE_INTERNAL_UNSIGNED: + hk.ReserveType("unsigned"); + break; + + case TYPE_INTERNAL_ADDR: + hk.Reserve("addr", sizeof(uint32_t) * 4, sizeof(uint32_t)); + break; + + case TYPE_INTERNAL_SUBNET: + hk.Reserve("subnet", sizeof(uint32_t) * 5, sizeof(uint32_t)); + break; + + case TYPE_INTERNAL_DOUBLE: + hk.ReserveType("double"); + break; + + case TYPE_INTERNAL_VOID: + case TYPE_INTERNAL_OTHER: + { + switch ( bt->Tag() ) + { + case TYPE_FUNC: + { + hk.ReserveType("func"); + break; + } + + case TYPE_PATTERN: + { + if ( ! v ) + return (optional && ! calc_static_size); + + if ( ! singleton ) + { + hk.ReserveType("pattern-len1"); + hk.ReserveType("pattern-len2"); + } + + // +1 in the following to include null terminators + hk.Reserve("pattern-string1", strlen(v->AsPattern()->PatternText()) + 1, + 0); + hk.Reserve("pattern-string1", + strlen(v->AsPattern()->AnywherePatternText()) + 1, 0); + break; + } + + case TYPE_RECORD: + { + if ( ! v ) + return (optional && ! calc_static_size); + + const RecordVal* rv = v->AsRecordVal(); + RecordType* rt = bt->AsRecordType(); + int num_fields = rt->NumFields(); + + for ( int i = 0; i < num_fields; ++i ) + { + Attributes* a = rt->FieldDecl(i)->attrs.get(); + bool optional_attr = (a && a->Find(ATTR_OPTIONAL)); + + auto rv_v = rv ? rv->GetField(i) : nullptr; + if ( ! ReserveSingleTypeKeySize( + hk, rt->GetFieldType(i).get(), rv_v.get(), type_check, + optional_attr, calc_static_size, false) ) + return false; + } + break; + } + + case TYPE_TABLE: + { + if ( ! v ) + return (optional && ! calc_static_size); + + auto tv = v->AsTableVal(); + auto hashkeys = ordered_hashkeys(tv); + + hk.ReserveType("table-size"); + + for ( auto& kv : *hashkeys ) + { + auto key = kv.second; + + if ( ! ReserveSingleTypeKeySize(hk, key->GetType().get(), key.get(), + type_check, false, calc_static_size, + false) ) + return false; + + if ( ! bt->IsSet() ) + { + auto val = const_cast(tv)->FindOrDefault(key); + if ( ! ReserveSingleTypeKeySize(hk, val->GetType().get(), + val.get(), type_check, false, + calc_static_size, false) ) + return false; + } + } + + break; + } + + case TYPE_VECTOR: + { + if ( ! v ) + return (optional && ! calc_static_size); + + hk.ReserveType("vector-size"); + VectorVal* vv = const_cast(v->AsVectorVal()); + for ( unsigned int i = 0; i < vv->Size(); ++i ) + { + auto val = vv->ValAt(i); + hk.ReserveType("vector-idx"); + hk.ReserveType("vector-idx-present"); + if ( val && ! ReserveSingleTypeKeySize( + hk, bt->AsVectorType()->Yield().get(), val.get(), + type_check, false, calc_static_size, false) ) + return false; + } + break; + } + + case TYPE_LIST: + { + if ( ! v ) + return (optional && ! calc_static_size); + + hk.ReserveType("list-size"); + ListVal* lv = const_cast(v->AsListVal()); + for ( int i = 0; i < lv->Length(); ++i ) + { + if ( ! ReserveSingleTypeKeySize(hk, lv->Idx(i)->GetType().get(), + lv->Idx(i).get(), type_check, false, + calc_static_size, false) ) + return false; + } + + break; + } + + default: + { + reporter->InternalError( + "bad index type in CompositeHash::ReserveSingleTypeKeySize"); + return 0; + } + } + + break; // case TYPE_INTERNAL_VOID/OTHER + } + + case TYPE_INTERNAL_STRING: + if ( ! v ) + return (optional && ! calc_static_size); + if ( ! singleton ) + hk.ReserveType("string-len"); + hk.Reserve("string", v->AsString()->Len()); + break; + + case TYPE_INTERNAL_ERROR: + return false; + } + + return true; } } // namespace zeek::detail diff --git a/src/CompHash.h b/src/CompHash.h index 5a9ebd65d1..cbbefe5a02 100644 --- a/src/CompHash.h +++ b/src/CompHash.h @@ -24,7 +24,6 @@ class CompositeHash { public: explicit CompositeHash(TypeListPtr composite_type); - ~CompositeHash(); // Compute the hash corresponding to the given index val, // or nullptr if it fails to typecheck. @@ -37,65 +36,33 @@ public: "GHI-572.")]] unsigned int MemoryAllocation() const { - return padded_sizeof(*this) + util::pad_size(size); + return padded_sizeof(*this); } protected: - std::unique_ptr ComputeSingletonHash(const Val* v, bool type_check) const; - - // Computes the piece of the hash for Val*, returning the new kp. - // Used as a helper for ComputeHash in the non-singleton case. - char* SingleValHash(bool type_check, char* kp, Type* bt, Val* v, bool optional) const; + bool SingleValHash(HashKey& hk, const Val* v, Type* bt, bool type_check, bool optional, + bool singleton) const; // Recovers just one Val of possibly many; called from RecoverVals. // Upon return, pval will point to the recovered Val of type t. // Returns and updated kp for the next Val. Calls reporter->InternalError() // upon errors, so there is no return value for invalid input. - const char* RecoverOneVal(const HashKey& k, const char* kp, const char* const k_end, Type* t, - ValPtr* pval, bool optional) const; - - // Rounds the given pointer up to the nearest multiple of the - // given size, if not already a multiple. - const void* Align(const char* ptr, unsigned int size) const; - - // Rounds the given pointer up to the nearest multiple of the - // given size, padding the skipped region with 0 bytes. - void* AlignAndPad(char* ptr, unsigned int size) const; - - // Returns offset+size rounded up so it can correctly align data - // of the given size. - int SizeAlign(int offset, unsigned int size) const; - - template T* AlignAndPadType(char* ptr) const - { - return reinterpret_cast(AlignAndPad(ptr, sizeof(T))); - } - - template const T* AlignType(const char* ptr) const - { - return reinterpret_cast(Align(ptr, sizeof(T))); - } - - template int SizeAlignType(int offset) const { return SizeAlign(offset, sizeof(T)); } + bool RecoverOneVal(const HashKey& k, Type* t, ValPtr* pval, bool optional, + bool singleton) const; // Compute the size of the composite key. If v is non-nil then // the value is computed for the particular list of values. // Returns 0 if the key has an indeterminant size (if v not given), // or if v doesn't match the index type (if given). - int ComputeKeySize(const Val* v, bool type_check, bool calc_static_size) const; + bool ReserveKeySize(HashKey& hk, const Val* v, bool type_check, bool calc_static_size) const; - int SingleTypeKeySize(Type*, const Val*, bool type_check, int sz, bool optional, - bool calc_static_size) const; + bool ReserveSingleTypeKeySize(HashKey& hk, Type*, const Val* v, bool type_check, bool optional, + bool calc_static_size, bool singleton) const; + + bool EnsureTypeReserve(HashKey& hk, const Val* v, Type* bt, bool type_check) const; TypeListPtr type; - char* key; // space for composite key - int size; - bool is_singleton; // if just one type in index - - // If one type, but not normal "singleton", e.g. record. - bool is_complex_type; - - InternalTypeTag singleton_tag; + bool is_singleton = false; // if just one type in index }; } // namespace zeek::detail