From 5e9dad04dbd5ce075f5ff160b25d7d8579ef9a72 Mon Sep 17 00:00:00 2001 From: Arne Welzel Date: Sat, 27 Sep 2025 17:38:51 +0200 Subject: [PATCH 01/11] RecordVal: Use const pointer in parse_time_records It's just an index. --- src/Val.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Val.h b/src/Val.h index 1ee4cd510b..17cc735d0e 100644 --- a/src/Val.h +++ b/src/Val.h @@ -1484,7 +1484,7 @@ protected: Obj* origin = nullptr; - using RecordTypeValMap = std::unordered_map>; + using RecordTypeValMap = std::unordered_map>; static RecordTypeValMap parse_time_records; private: From 00ad3e31c69b06e5bd68a5de74184ab33193edcc Mon Sep 17 00:00:00 2001 From: Arne Welzel Date: Sat, 26 Jul 2025 12:24:57 +0200 Subject: [PATCH 02/11] Modifiable: No virtual destructor There's only three classes that inherit from Modifiable today. The virtual destructor adds an extra 8 byte vtable to every instance of table, record or vector values. Calling Unregister() explicitly from the destructors explicitly saves 8 bytes of memory for each instance. --- src/Notifier.cc | 5 ----- src/Notifier.h | 13 +++++++++++-- src/Val.cc | 7 +++++++ 3 files changed, 18 insertions(+), 7 deletions(-) diff --git a/src/Notifier.cc b/src/Notifier.cc index 0554284314..16296f4f59 100644 --- a/src/Notifier.cc +++ b/src/Notifier.cc @@ -62,9 +62,4 @@ void Registry::Terminate() { } } -Modifiable::~Modifiable() { - if ( num_receivers ) - registry.Unregister(this); -} - } // namespace zeek::notifier::detail diff --git a/src/Notifier.h b/src/Notifier.h index 6da7d7c59f..f4c0402fac 100644 --- a/src/Notifier.h +++ b/src/Notifier.h @@ -99,8 +99,6 @@ extern Registry registry; */ class Modifiable { public: - virtual ~Modifiable(); - /** * Calling this method signals to all registered receivers that the * object has been modified. @@ -110,6 +108,17 @@ public: registry.Modified(this); } + /** + * Unregister this instance from the registry. + * + * Classes inheriting from Modifiable are required to call + * this from the destructor explicitly. + */ + void Unregister() { + if ( num_receivers ) + registry.Unregister(this); + } + protected: friend class Registry; diff --git a/src/Val.cc b/src/Val.cc index d34426a0e7..9c5da7e962 100644 --- a/src/Val.cc +++ b/src/Val.cc @@ -31,6 +31,7 @@ #include "zeek/IPAddr.h" #include "zeek/IntrusivePtr.h" #include "zeek/NetVar.h" +#include "zeek/Notifier.h" #include "zeek/Overflow.h" #include "zeek/PrefixTable.h" #include "zeek/RE.h" @@ -1712,6 +1713,8 @@ void TableVal::Init(TableTypePtr t, bool ordered) { } TableVal::~TableVal() { + notifier::detail::Modifiable::Unregister(); + if ( timer ) detail::timer_mgr->Cancel(timer); @@ -2952,6 +2955,8 @@ RecordVal::RecordVal(RecordTypePtr t, std::vector> init_vals } RecordVal::~RecordVal() { + notifier::detail::Modifiable::Unregister(); + auto n = record_val.size(); for ( unsigned int i = 0; i < n; ++i ) { @@ -3234,6 +3239,8 @@ VectorVal::VectorVal(VectorTypePtr t, std::vector>* vals) : } VectorVal::~VectorVal() { + notifier::detail::Modifiable::Unregister(); + if ( yield_types ) { int n = yield_types->size(); for ( auto i = 0; i < n; ++i ) { From 27777ac214ef0f610d88015e36c9b700cf2f3cc6 Mon Sep 17 00:00:00 2001 From: Arne Welzel Date: Sat, 27 Sep 2025 15:28:32 +0200 Subject: [PATCH 03/11] Val: Introduce ZValSlot This introduces a new class for replacing the std::optional in RecordVal's record_val vector. It may also be useful within VectorVal. --- src/Val.cc | 107 ++++++++++++++++++++++++++++++++++++++++++++ src/Val.h | 129 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 236 insertions(+) diff --git a/src/Val.cc b/src/Val.cc index 9c5da7e962..d07b8a3288 100644 --- a/src/Val.cc +++ b/src/Val.cc @@ -44,6 +44,8 @@ #include "zeek/broker/Store.h" #include "zeek/threading/formatters/detail/json.h" +#include "zeek/3rdparty/doctest.h" + using namespace std; namespace zeek { @@ -4120,3 +4122,108 @@ const PortValPtr& ValManager::Port(uint32_t port_num) { } } // namespace zeek + +TEST_SUITE_BEGIN("ZValSlot"); + +TEST_CASE("default constructor") { + // default constructor doesn't do anything. + zeek::ZValSlot slot; +} + +TEST_CASE("slot hold CountVal") { + auto t = zeek::base_type(zeek::TYPE_COUNT); + auto v = zeek::make_intrusive(104242); + zeek::ZValSlot slot = zeek::ZValSlot(v, t); + + CHECK(slot.IsSet()); + CHECK(slot.Tag() == zeek::TYPE_COUNT); + CHECK(! slot.IsManaged()); + CHECK(v->RefCnt() == 1); // Not managed, so the slot does not hold a ref to the original value. + CHECK(slot->AsCount() == 104242); + + auto nv = slot->ToVal(t); + CHECK(nv->RefCnt() == 1); // Not managed, so the slot does not hold a ref to the original value. +} + +TEST_CASE("slot hold RecordVal") { + auto t = zeek::id::find_type("conn_id_ctx"); + auto v = zeek::make_intrusive(t); + CHECK(v->RefCnt() == 1); + zeek::ZValSlot slot = zeek::ZValSlot(v, t); + + CHECK(slot.IsSet()); + CHECK(slot.Tag() == zeek::TYPE_RECORD); + CHECK(slot.IsManaged()); + CHECK(v->RefCnt() == 2); // Managed, slot takes a ref. + + slot.Reset(); + + CHECK(v->RefCnt() == 1); + v = nullptr; +} + +TEST_CASE("assign count ZVal to slot") { + auto t = zeek::base_type(zeek::TYPE_COUNT); + auto v1 = zeek::make_intrusive(42); + auto v2 = zeek::make_intrusive(100000); + + zeek::ZValSlot slot = zeek::ZValSlot(v1, t); + CHECK(v1->RefCnt() == 1); + + slot = zeek::ZVal(v2, t); + + // Unmanaged + CHECK(v1->RefCnt() == 1); + CHECK(v2->RefCnt() == 1); + + auto v3 = slot->ToVal(t); + + // New CountVal for 100000 + CHECK(v3 != v2); + CHECK(v3->RefCnt() == 1); +} + +TEST_CASE("assign record ZVal to slot") { + auto t = zeek::id::find_type("conn_id_ctx"); + auto v1 = zeek::make_intrusive(t); + auto v2 = zeek::make_intrusive(t); + + zeek::ZValSlot slot = zeek::ZValSlot(v1, t); + CHECK(v1->RefCnt() == 2); // v1 and slot + + slot = zeek::ZVal(v2, t); + CHECK(v1->RefCnt() == 1); // slot released + CHECK(v2->RefCnt() == 2); // v2 and slot + + auto v3 = slot->ToVal(t); + CHECK(v3 == v2); + CHECK(v2->RefCnt() == 3); // v2, slot and v3 +} + +TEST_CASE("assign slot assignment") { + auto t = zeek::id::find_type("conn_id_ctx"); + auto v1 = zeek::make_intrusive(t); + auto v2 = zeek::make_intrusive(t); + + zeek::ZValSlot slot1 = zeek::ZValSlot(v1, t); + zeek::ZValSlot slot2 = zeek::ZValSlot(v2, t); + + CHECK(v1->RefCnt() == 2); // v1 and slot1 + CHECK(v2->RefCnt() == 2); // v2 and slot2 + + slot1 = slot2; + CHECK(v1->RefCnt() == 1); // slot1 released + CHECK(v2->RefCnt() == 3); // v2, slot1 and slot2 +} + +TEST_CASE("copy slot") { + auto t = zeek::id::find_type("conn_id_ctx"); + auto v = zeek::make_intrusive(t); + + zeek::ZValSlot slot1 = zeek::ZValSlot(v, t); + zeek::ZValSlot slot2 = slot1; + + CHECK(v->RefCnt() == 3); // v1, slot1 and slot2 +} + +TEST_SUITE_END(); diff --git a/src/Val.h b/src/Val.h index 17cc735d0e..517ad51a3b 100644 --- a/src/Val.h +++ b/src/Val.h @@ -1114,6 +1114,135 @@ struct is_zeek_val { template inline constexpr bool is_zeek_val_v = is_zeek_val::value; +/** + * A ZValSlot holds a ZVal instance and some auxiliary information that allows automatic + * memory management as well as acting as an optional unset field. + * + * This class originated from the observation that a std::optional + * as previously used in VectorVal and RecordVal objects already takes up + * 16 bytes on 64 bit architectures with GCC. The ZValSlot class essentially + * uses the left-over 7 bytes from the std::optional to allow easier memory + * management without needing to keep external auxilarly information around. + * + * The is_managed flag and type_tag flags are meant to be immutable except + * when a ZValSlot is reassigned. + * + * A ZValSlot instance holds a reference to a managed value. Such ZValSlot + * instances can be copied or assigned and the reference count of the ZVal + * will be updated accordingly. + */ +class ZValSlot { +public: + /** + * Totally uninitialized, watch out! + */ + ZValSlot() {} + + /** + * Initialize a set ZValSlot given a ValPtr and corresponding TypePtr. + * + * This has the same ref counting semantics as the corresponding ZVal + * constructor, increasing the ref count of any managed value. + */ + ZValSlot(ValPtr v, const TypePtr& t) + : is_set(true), is_managed(ZVal::IsManagedType(t)), type_tag(t->Tag()), zval(v, t) {} + + /** + * Initialize a ZValSlot with just the TypePtr. + * + * This is useful for optional fields in a record value where + * the type is known at construction time. + */ + ZValSlot(const TypePtr& t) : is_set(false), is_managed(ZVal::IsManagedType(t)), type_tag(t->Tag()) {} + + /** + * Copy constructor. + */ + ZValSlot(const ZValSlot& s) : is_set(s.is_set), is_managed(s.is_managed), type_tag(s.type_tag), zval(s.zval) { + if ( is_set && is_managed ) + Ref(zval.ManagedVal()); + } + + /** + * Destructor. + */ + ~ZValSlot() { Reset(); } + + ZValSlot& operator=(const ZValSlot& s) { + if ( is_set && is_managed ) + Unref(zval.ManagedVal()); + + is_set = s.is_set; + is_managed = s.is_managed; + type_tag = s.type_tag; + zval = s.zval; + + if ( is_set && is_managed ) + Ref(zval.ManagedVal()); + + return *this; + } + + /** + * Assign a ZVal to a slot. + * + * This uses the is_managed member to determine if the + * given ZVal should be treated as managed and whether. + * + * Assigning a ZVal to a managed slot adopts a reference! + * This is a bit quirky, but it's what the plain ZVal + * constructors also do. + */ + ZValSlot& operator=(const ZVal& zv) { + if ( is_set && is_managed ) + Unref(zval.ManagedVal()); + + is_set = true; + zval = zv; + + return *this; + } + + operator bool() const noexcept { return is_set; } + const ZVal* operator->() const noexcept { return &zval; } + ZVal& operator*() noexcept { return zval; } + const ZVal& operator*() const noexcept { return zval; } + + bool IsSet() const noexcept { return is_set; } + bool IsManaged() const noexcept { return is_managed; } + TypeTag Tag() const noexcept { return type_tag; } + + void Reset() { + if ( is_set && is_managed ) + Unref(zval.ManagedVal()); + + is_set = false; + } + + /** + * Convert a slot's ZVal to a ValPtr given a TypePtr. + * + * @param t Type to use for conversion to Val. Needs to agree with the type that was used to initialize the + * slot. + * + * @return A ValPtr instance for the slot. + */ + ValPtr ToVal(const TypePtr& t) { + assert(IsSet()); + // assert(Tag() == TYPE_ANY || Tag() == t->Tag()); + + return zval.ToVal(t); + } + +private: + bool is_set; + bool is_managed; + TypeTag type_tag; + ZVal zval; +}; + +static_assert(sizeof(ZValSlot) <= 16); + class RecordVal final : public Val, public notifier::detail::Modifiable { public: explicit RecordVal(RecordTypePtr t, bool init_fields = true); From f30e0166a71afcfa65ac87be960714e644c5be6a Mon Sep 17 00:00:00 2001 From: Arne Welzel Date: Sat, 27 Sep 2025 16:57:14 +0200 Subject: [PATCH 04/11] RecordVal: Use ZValSlot for fields --- src/Val.cc | 62 +++++++++++++++++++++++++++++++++++------------------- src/Val.h | 33 +++++++++++------------------ 2 files changed, 52 insertions(+), 43 deletions(-) diff --git a/src/Val.cc b/src/Val.cc index d07b8a3288..29f4c191d8 100644 --- a/src/Val.cc +++ b/src/Val.cc @@ -2924,7 +2924,7 @@ TableVal::TableRecordDependencies TableVal::parse_time_table_record_dependencies RecordVal::RecordTypeValMap RecordVal::parse_time_records; -RecordVal::RecordVal(RecordTypePtr t, bool init_fields) : Val(t), is_managed(t->ManagedFields()) { +RecordVal::RecordVal(RecordTypePtr t, bool init_fields) : Val(t) { rt = std::move(t); int n = rt->NumFields(); @@ -2935,6 +2935,10 @@ RecordVal::RecordVal(RecordTypePtr t, bool init_fields) : Val(t), is_managed(t-> if ( init_fields ) { record_val.resize(n); + // Properly initialize all slots. + for ( size_t i = 0; i < record_val.size(); i++ ) + record_val[i] = ZValSlot(rt->GetFieldType(i)); + for ( auto& e : rt->CreationInits() ) { try { record_val[e.first] = e.second->Generate(); @@ -2947,35 +2951,34 @@ RecordVal::RecordVal(RecordTypePtr t, bool init_fields) : Val(t), is_managed(t-> } else + // This needs to go through AppendField() which will do the right thing + // for the individual slots. record_val.reserve(n); } -RecordVal::RecordVal(RecordTypePtr t, std::vector> init_vals) - : Val(t), is_managed(t->ManagedFields()) { +RecordVal::RecordVal(RecordTypePtr t, std::vector> init_vals) : Val(t) { rt = std::move(t); - record_val = std::move(init_vals); -} -RecordVal::~RecordVal() { - notifier::detail::Modifiable::Unregister(); + // TODO: Change so that callers pass init_vals as ZValSlot instead? + record_val.reserve(rt->NumFields()); + size_t n = rt->NumFields(); - auto n = record_val.size(); + for ( size_t i = 0; i < n; i++ ) { + record_val.emplace_back(ZValSlot(rt->GetFieldType(i))); - for ( unsigned int i = 0; i < n; ++i ) { - auto f_i = record_val[i]; - if ( f_i && IsManaged(i) ) - ZVal::DeleteManagedType(*f_i); + if ( init_vals[i].has_value() ) + record_val[i] = init_vals[i].value(); } } +RecordVal::~RecordVal() { notifier::detail::Modifiable::Unregister(); } + ValPtr RecordVal::SizeVal() const { return val_mgr->Count(GetType()->AsRecordType()->NumFields()); } void RecordVal::Assign(int field, ValPtr new_val) { if ( new_val ) { - DeleteFieldIfManaged(field); - auto t = rt->GetFieldType(field); - record_val[field] = ZVal(new_val, t); + record_val[field] = ZValSlot(new_val, t); Modified(); } else @@ -2983,15 +2986,11 @@ void RecordVal::Assign(int field, ValPtr new_val) { } void RecordVal::Remove(int field) { - auto& f_i = record_val[field]; - if ( f_i ) { - if ( IsManaged(field) ) - ZVal::DeleteManagedType(*f_i); - - f_i = std::nullopt; + bool was_set = record_val[field].IsSet(); + record_val[field].Reset(); + if ( was_set ) Modified(); - } } ValPtr RecordVal::GetFieldOrDefault(int field) const { @@ -4227,3 +4226,22 @@ TEST_CASE("copy slot") { } TEST_SUITE_END(); + +TEST_SUITE_BEGIN("RecordVal"); + +TEST_CASE("assign string") { + auto t = zeek::id::find_type("PacketSource"); + auto v = zeek::make_intrusive(t); + + std::string path = "test-path"; + + v->Assign(1, path); + + auto sv = v->GetField(1); + CHECK(sv->RefCnt() == 2); // sv and v + + v.reset(); + CHECK(sv->RefCnt() == 1); // record was destroyed +} + +TEST_SUITE_END(); diff --git a/src/Val.h b/src/Val.h index 517ad51a3b..25cdaff0ca 100644 --- a/src/Val.h +++ b/src/Val.h @@ -1315,10 +1315,7 @@ public: void AssignInterval(int field, double new_val) { Assign(field, new_val); } void Assign(int field, StringVal* new_val) { - auto& fv = record_val[field]; - if ( fv ) - ZVal::DeleteManagedType(*fv); - fv = ZVal(new_val); + record_val[field] = ZVal(new_val); AddedField(field); } void Assign(int field, const char* new_val) { Assign(field, new StringVal(new_val)); } @@ -1383,7 +1380,7 @@ public: fv = fi->Generate(); } - return fv->ToVal(rt->GetFieldType(field)); + return fv.ToVal(rt->GetFieldType(field)); } /** @@ -1450,7 +1447,7 @@ public: // Returns true if the slot for the given field is initialized. // This helper can be used to guard GetFieldAs() accesses. - bool HasRawField(int field) const { return record_val[field].has_value(); } + bool HasRawField(int field) const { return record_val[field].IsSet(); } // The following return the given field converted to a particular // underlying value. We provide these to enable efficient @@ -1580,16 +1577,18 @@ protected: */ void AppendField(ValPtr v, const TypePtr& t) { if ( v ) - record_val.emplace_back(ZVal(v, t)); + record_val.emplace_back(ZValSlot(v, t)); else - record_val.emplace_back(std::nullopt); + record_val.emplace_back(ZValSlot(t)); } // For internal use by low-level ZAM instructions and event tracing. // Caller assumes responsibility for memory management. The first // version allows manipulation of whether the field is present at all. // The second version ensures that the optional value is present. - std::optional& RawOptField(int field) { + // + // TODO: Fix name! + ZValSlot& RawOptField(int field) { auto& f = record_val[field]; if ( ! f ) { const auto& fi = rt->DeferredInits()[field]; @@ -1600,10 +1599,13 @@ protected: return f; } + // TODO: Fix name! ZVal& RawField(int field) { auto& f = RawOptField(field); if ( ! f ) f = ZVal(); + + assert(f.IsSet()); return *f; } @@ -1617,14 +1619,6 @@ protected: static RecordTypeValMap parse_time_records; private: - void DeleteFieldIfManaged(unsigned int field) { - auto& f = record_val[field]; - if ( f && IsManaged(field) ) - ZVal::DeleteManagedType(*f); - } - - bool IsManaged(unsigned int offset) const { return is_managed[offset]; } - // Just for template inferencing. RecordVal* Get() { return this; } @@ -1636,10 +1630,7 @@ private: // Low-level values of each of the fields. // // Lazily modified during GetField(), so mutable. - mutable std::vector> record_val; - - // Whether a given field requires explicit memory management. - const std::vector& is_managed; + mutable std::vector record_val; }; class EnumVal final : public detail::IntValImplementation { From f17ca010bc38bd646e179fec7e6f355dba83f9f4 Mon Sep 17 00:00:00 2001 From: Arne Welzel Date: Sat, 27 Sep 2025 17:41:26 +0200 Subject: [PATCH 05/11] RecordVal: Remove rt member This doesn't seem to be really needed and costs 8 bytes per record value --- src/Val.cc | 29 +++++++++++++---------------- src/Val.h | 21 +++++++++++---------- 2 files changed, 24 insertions(+), 26 deletions(-) diff --git a/src/Val.cc b/src/Val.cc index 29f4c191d8..dc8ab0c420 100644 --- a/src/Val.cc +++ b/src/Val.cc @@ -2924,13 +2924,12 @@ TableVal::TableRecordDependencies TableVal::parse_time_table_record_dependencies RecordVal::RecordTypeValMap RecordVal::parse_time_records; -RecordVal::RecordVal(RecordTypePtr t, bool init_fields) : Val(t) { - rt = std::move(t); - +RecordVal::RecordVal(RecordTypePtr t, bool init_fields) : Val(std::move(t)) { + auto* rt = GetRecordType(); int n = rt->NumFields(); if ( run_state::is_parsing ) - parse_time_records[rt.get()].emplace_back(NewRef{}, this); + parse_time_records[rt].emplace_back(NewRef{}, this); if ( init_fields ) { record_val.resize(n); @@ -2944,7 +2943,7 @@ RecordVal::RecordVal(RecordTypePtr t, bool init_fields) : Val(t) { record_val[e.first] = e.second->Generate(); } catch ( InterpreterException& e ) { if ( run_state::is_parsing ) - parse_time_records[rt.get()].pop_back(); + parse_time_records[rt].pop_back(); throw; } } @@ -2956,15 +2955,13 @@ RecordVal::RecordVal(RecordTypePtr t, bool init_fields) : Val(t) { record_val.reserve(n); } -RecordVal::RecordVal(RecordTypePtr t, std::vector> init_vals) : Val(t) { - rt = std::move(t); - +RecordVal::RecordVal(RecordTypePtr t, std::vector> init_vals) : Val(std::move(t)) { // TODO: Change so that callers pass init_vals as ZValSlot instead? - record_val.reserve(rt->NumFields()); - size_t n = rt->NumFields(); + size_t n = GetRecordType()->NumFields(); + record_val.reserve(n); for ( size_t i = 0; i < n; i++ ) { - record_val.emplace_back(ZValSlot(rt->GetFieldType(i))); + record_val.emplace_back(ZValSlot(GetRecordType()->GetFieldType(i))); if ( init_vals[i].has_value() ) record_val[i] = init_vals[i].value(); @@ -2977,7 +2974,7 @@ ValPtr RecordVal::SizeVal() const { return val_mgr->Count(GetType()->AsRecordTyp void RecordVal::Assign(int field, ValPtr new_val) { if ( new_val ) { - auto t = rt->GetFieldType(field); + const auto& t = GetRecordType()->GetFieldType(field); record_val[field] = ZValSlot(new_val, t); Modified(); } @@ -3105,7 +3102,7 @@ void RecordVal::Describe(ODesc* d) const { auto n = record_val.size(); if ( d->IsBinary() ) { - rt->Describe(d); + GetRecordType()->Describe(d); d->SP(); d->Add(static_cast(n)); d->SP(); @@ -3117,7 +3114,7 @@ void RecordVal::Describe(ODesc* d) const { if ( ! d->IsBinary() && i > 0 ) d->Add(", "); - d->Add(rt->FieldName(i)); + d->Add(GetRecordType()->FieldName(i)); if ( ! d->IsBinary() ) d->Add("="); @@ -3166,14 +3163,14 @@ ValPtr RecordVal::DoClone(CloneState* state) { // record. As we cannot guarantee that it will be zeroed out at the // appropriate time (as it seems to be guaranteed for the original record) // we don't touch it. - auto rv = make_intrusive(rt, false); + auto rv = make_intrusive(GetType(), false); state->NewClone(this, rv); int n = NumFields(); for ( auto i = 0; i < n; ++i ) { auto f_i = GetField(i); auto v = f_i ? f_i->Clone(state) : nullptr; - rv->AppendField(std::move(v), rt->GetFieldType(i)); + rv->AppendField(std::move(v), GetRecordType()->GetFieldType(i)); } return rv; diff --git a/src/Val.h b/src/Val.h index 25cdaff0ca..74b244cdf4 100644 --- a/src/Val.h +++ b/src/Val.h @@ -1329,7 +1329,7 @@ public: */ template void AssignField(const char* field_name, T&& val) { - int idx = rt->FieldOffset(field_name); + int idx = GetRecordType()->FieldOffset(field_name); if ( idx < 0 ) reporter->InternalError("missing record field: %s", field_name); Assign(idx, std::forward(val)); @@ -1351,7 +1351,7 @@ public: if ( record_val[field] ) return true; - return rt->DeferredInits()[field] != nullptr; + return GetRecordType()->DeferredInits()[field] != nullptr; } /** @@ -1361,7 +1361,7 @@ public: * @return Whether there's a value for the given field name. */ bool HasField(const char* field) const { - int idx = rt->FieldOffset(field); + int idx = GetRecordType()->FieldOffset(field); return (idx != -1) && HasField(idx); } @@ -1373,14 +1373,14 @@ public: ValPtr GetField(int field) const { auto& fv = record_val[field]; if ( ! fv ) { - const auto& fi = rt->DeferredInits()[field]; + const auto& fi = GetRecordType()->DeferredInits()[field]; if ( ! fi ) return nullptr; fv = fi->Generate(); } - return fv.ToVal(rt->GetFieldType(field)); + return fv.ToVal(GetRecordType()->GetFieldType(field)); } /** @@ -1510,7 +1510,7 @@ public: template auto GetFieldAs(const char* field) const { - int idx = rt->FieldOffset(field); + int idx = GetRecordType()->FieldOffset(field); if ( idx < 0 ) reporter->InternalError("missing record field: %s", field); @@ -1591,7 +1591,7 @@ protected: ZValSlot& RawOptField(int field) { auto& f = record_val[field]; if ( ! f ) { - const auto& fi = rt->DeferredInits()[field]; + const auto& fi = GetRecordType()->DeferredInits()[field]; if ( fi ) f = fi->Generate(); } @@ -1621,12 +1621,13 @@ protected: private: // Just for template inferencing. RecordVal* Get() { return this; } + const RecordType* GetRecordType() const noexcept { + assert(GetType()->Tag() == TYPE_RECORD); + return static_cast(GetType().get()); + } unsigned int ComputeFootprint(std::unordered_set* analyzed_vals) const override; - // Keep this handy for quick access during low-level operations. - RecordTypePtr rt; - // Low-level values of each of the fields. // // Lazily modified during GetField(), so mutable. From 7ae53aea049e5768f090a88b9257d116126c5d87 Mon Sep 17 00:00:00 2001 From: Arne Welzel Date: Sun, 28 Sep 2025 19:09:48 +0200 Subject: [PATCH 06/11] logging/Manager: Fix slot logic --- src/logging/Manager.cc | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/logging/Manager.cc b/src/logging/Manager.cc index 98e29bbaa7..ab55adcb64 100644 --- a/src/logging/Manager.cc +++ b/src/logging/Manager.cc @@ -1654,14 +1654,16 @@ detail::LogRecord Manager::RecordToLogRecord(WriterInfo* info, Filter* filter, c for ( int index : indices ) { auto vr = val->AsRecord(); - val = vr->RawOptField(index); + const auto& slot = vr->RawOptField(index); - if ( ! val ) { + if ( ! slot.IsSet() ) { // Value, or any of its parents, is not set. vals.emplace_back(filter->fields[i]->type, false); + val = std::nullopt; break; } + val = *slot; vt = cast_intrusive(vr->GetType())->GetFieldType(index).get(); } From ae86da62f3f4fcf05ac2568811dae853297191cb Mon Sep 17 00:00:00 2001 From: Arne Welzel Date: Sun, 28 Sep 2025 19:39:26 +0200 Subject: [PATCH 07/11] ZAM: Drop some unneeded DeleteManagedTyped() calls The delete happens by assigning to a slot now. --- src/script_opt/ZAM/OPs/constructors.op | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/script_opt/ZAM/OPs/constructors.op b/src/script_opt/ZAM/OPs/constructors.op index 23e7c2faaa..02fc4ac58a 100644 --- a/src/script_opt/ZAM/OPs/constructors.op +++ b/src/script_opt/ZAM/OPs/constructors.op @@ -81,8 +81,6 @@ macro AssignFromRec(rhs) if ( is_managed[i] ) { zeek::Ref(rhs_i.ManagedVal()); - if ( init_i ) - ZVal::DeleteManagedType(*init_i); } init_i = rhs_i; } @@ -166,8 +164,6 @@ macro DoManagedRecAssign(lhs, rhs) auto& lhs_i = lhs->RawOptField(lhs_map[i]); auto rhs_i = FieldValWithCheck(rhs, rhs_map[i]); zeek::Ref(rhs_i.ManagedVal()); - if ( lhs_i ) - ZVal::DeleteManagedType(*lhs_i); lhs_i = rhs_i; } else @@ -190,8 +186,6 @@ eval SetUpRecFieldOps(map) auto& lhs_i = $1->RawOptField(lhs_map[i]); auto rhs_i = FieldValWithCheck($2, rhs_map[i]); zeek::Ref(rhs_i.ManagedVal()); - if ( lhs_i ) - ZVal::DeleteManagedType(*lhs_i); lhs_i = rhs_i; } From 108e9fca4c73d27d429c132546e29bd5c8c9eb6f Mon Sep 17 00:00:00 2001 From: Arne Welzel Date: Mon, 29 Sep 2025 11:21:28 +0200 Subject: [PATCH 08/11] RecordType: Carry field_properties This should allow IsManagedType() calls within the RecordVal constructor and instead initialize slots based on the properties only. Could maybe also place into TypeDecl directly. --- src/Type.cc | 11 +++++++++++ src/Type.h | 18 +++++++++++++++++- 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/src/Type.cc b/src/Type.cc index 397a27ebe8..82185faa77 100644 --- a/src/Type.cc +++ b/src/Type.cc @@ -18,6 +18,7 @@ #include "zeek/Traverse.h" #include "zeek/Val.h" #include "zeek/Var.h" +#include "zeek/ZVal.h" #include "zeek/module_util.h" #include "zeek/zeekygen/IdentifierInfo.h" #include "zeek/zeekygen/Manager.h" @@ -1105,6 +1106,7 @@ void RecordType::AddField(unsigned int field, const TypeDecl* td) { ASSERT(field == managed_fields.size()); managed_fields.push_back(ZVal::IsManagedType(td->type)); + field_properties.push_back({.is_managed = ZVal::IsManagedType(td->type), .type_tag = td->type->Tag()}); // We defer error-checking until here so that we can keep deferred_inits // and managed_fields correctly tracking the associated fields. @@ -1539,6 +1541,15 @@ bool RecordType::IsDeferrable() const { return std::ranges::all_of(creation_inits, is_deferrable); } +void RecordType::InitSlots(std::span slots) const { + int n = NumFields(); + if ( slots.size() != field_properties.size() ) + zeek::reporter->InternalError("wrong number of slots and slot properties"); + + for ( int i = 0; i < n; i++ ) + slots[i] = field_properties[i]; +} + FileType::FileType(TypePtr yield_type) : Type(TYPE_FILE), yield(std::move(yield_type)) {} FileType::~FileType() = default; diff --git a/src/Type.h b/src/Type.h index cc30238708..4a9ec8c101 100644 --- a/src/Type.h +++ b/src/Type.h @@ -21,6 +21,7 @@ namespace zeek { class Val; union ZVal; +class ZValSlot; class EnumVal; class RecordVal; class TableVal; @@ -635,6 +636,11 @@ private: using type_decl_list = PList; +struct RecordFieldProperties { + bool is_managed = false; + TypeTag type_tag = TYPE_VOID; +}; + class RecordType final : public Type { public: explicit RecordType(type_decl_list* types); @@ -692,7 +698,11 @@ public: // Returns flags corresponding to which fields in the record // have types requiring memory management (reference counting). - const std::vector& ManagedFields() const { return managed_fields; } + [[deprecated("Remove in v9.1: Unused and optimization related internal. Use FieldProperties() instead.")]] + const std::vector& ManagedFields() const { + return managed_fields; + } + const std::vector& FieldProperties() const { return field_properties; } int NumFields() const { return num_fields; } int NumOrigFields() const { return num_orig_fields; } @@ -771,10 +781,16 @@ private: const auto& DeferredInits() const { return deferred_inits; } const auto& CreationInits() const { return creation_inits; } + // Initialize the slots using slot_properties. + void InitSlots(std::span slots) const; + // If we were willing to bound the size of records, then we could // use std::bitset here instead. std::vector managed_fields; + // Field properties for ZvalSlot initialization + std::vector field_properties; + // Number of fields in the type. int num_fields = 0; From e595dbc9bffb71025c9bb044ff9b314a837b4273 Mon Sep 17 00:00:00 2001 From: Arne Welzel Date: Mon, 29 Sep 2025 11:34:09 +0200 Subject: [PATCH 09/11] RecordVal: Avoid IsManagedType() calls during construction --- src/Val.cc | 9 +++------ src/Val.h | 5 +++++ 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/Val.cc b/src/Val.cc index dc8ab0c420..9eb0ef1c2a 100644 --- a/src/Val.cc +++ b/src/Val.cc @@ -2926,17 +2926,14 @@ RecordVal::RecordTypeValMap RecordVal::parse_time_records; RecordVal::RecordVal(RecordTypePtr t, bool init_fields) : Val(std::move(t)) { auto* rt = GetRecordType(); - int n = rt->NumFields(); if ( run_state::is_parsing ) parse_time_records[rt].emplace_back(NewRef{}, this); if ( init_fields ) { - record_val.resize(n); + record_val.resize(rt->NumFields()); - // Properly initialize all slots. - for ( size_t i = 0; i < record_val.size(); i++ ) - record_val[i] = ZValSlot(rt->GetFieldType(i)); + rt->InitSlots(record_val); for ( auto& e : rt->CreationInits() ) { try { @@ -2952,7 +2949,7 @@ RecordVal::RecordVal(RecordTypePtr t, bool init_fields) : Val(std::move(t)) { else // This needs to go through AppendField() which will do the right thing // for the individual slots. - record_val.reserve(n); + record_val.reserve(rt->NumFields()); } RecordVal::RecordVal(RecordTypePtr t, std::vector> init_vals) : Val(std::move(t)) { diff --git a/src/Val.h b/src/Val.h index 74b244cdf4..3dd1293c06 100644 --- a/src/Val.h +++ b/src/Val.h @@ -1147,6 +1147,11 @@ public: ZValSlot(ValPtr v, const TypePtr& t) : is_set(true), is_managed(ZVal::IsManagedType(t)), type_tag(t->Tag()), zval(v, t) {} + /** + * Initialize a ZValSlot using its properties. + */ + ZValSlot(const RecordFieldProperties p) : is_set(false), is_managed(p.is_managed), type_tag(p.type_tag) {} + /** * Initialize a ZValSlot with just the TypePtr. * From df02b989acf7673645a186973c55bccf03b4adff Mon Sep 17 00:00:00 2001 From: Arne Welzel Date: Mon, 29 Sep 2025 11:52:04 +0200 Subject: [PATCH 10/11] Val: Introduce vector32 A std::vector replacement for RecordVal that takes up 16 bytes instead of 24 bytes. --- src/Val.h | 74 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) diff --git a/src/Val.h b/src/Val.h index 3dd1293c06..28d5ea87cb 100644 --- a/src/Val.h +++ b/src/Val.h @@ -4,6 +4,7 @@ #include // for u_char #include +#include #include #include #include @@ -1248,6 +1249,79 @@ private: static_assert(sizeof(ZValSlot) <= 16); +namespace detail { +/** + * A std::vector replacement for record slots with 32bit size and capacity so we + * end up with a 16 byte vector instead of 24 bytes. Anyone trying to put more than + * 500mio fields into a record likely has other problems to solve, first :-) + * + * Going to 16 bit doesn't save much as the whole class will still take 16 bytes + * with padding on a 64bit system. + */ +template +class vector32 { +public: + vector32() {} + vector32(size_t size) : d(std::make_unique(size)), sz(size), cap(size) {} + + const T& operator[](size_t i) const noexcept { return d[i]; } + T& operator[](size_t i) noexcept { return d[i]; } + const T* data() const noexcept { return d.get(); } + T* data() noexcept { return d.get(); } + + void push_back(T slot) { + // No automatic resizing + if ( cap <= sz ) + throw std::logic_error("capacity exceeded"); + + d[sz++] = slot; + } + + void resize(size_t new_size) { + if ( new_size < sz || new_size < cap ) + throw std::length_error("cannot truncate"); + + if ( new_size > std::numeric_limits::max() ) + throw std::length_error("new_size too large"); + + if ( new_size > cap ) { + std::unique_ptr new_data = std::make_unique(new_size); + for ( size_t i = 0; i < sz; i++ ) + new_data[i] = std::move(d[i]); + d = std::move(new_data); + sz = new_size; + cap = new_size; + } + + sz = new_size; + } + + void reserve(size_t new_capacity) { + if ( cap > new_capacity ) + throw std::length_error("cannot truncate"); + + if ( new_capacity > std::numeric_limits::max() ) + throw std::length_error("new_capacity too large"); + + if ( new_capacity > cap ) { + std::unique_ptr new_data = std::make_unique(new_capacity); + for ( size_t i = 0; i < sz; i++ ) + new_data[i] = std::move(d[i]); + d = std::move(new_data); + cap = new_capacity; + } + } + + size_t size() const noexcept { return sz; } + size_t capacity() const noexcept { return cap; } + +private: + std::unique_ptr d = nullptr; + uint32_t sz = 0; + uint32_t cap = 0; +}; +} // namespace detail + class RecordVal final : public Val, public notifier::detail::Modifiable { public: explicit RecordVal(RecordTypePtr t, bool init_fields = true); From 81f4726fc3b2b64015481e8a3d4a14345fb9bed2 Mon Sep 17 00:00:00 2001 From: Arne Welzel Date: Mon, 29 Sep 2025 12:19:37 +0200 Subject: [PATCH 11/11] RecordVal: Use vector32 This reduces the RecordVal size to 64 bytes on 64 bit systems. --- src/Val.cc | 8 ++++---- src/Val.h | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/Val.cc b/src/Val.cc index 9eb0ef1c2a..a33f022a82 100644 --- a/src/Val.cc +++ b/src/Val.cc @@ -2933,7 +2933,7 @@ RecordVal::RecordVal(RecordTypePtr t, bool init_fields) : Val(std::move(t)) { if ( init_fields ) { record_val.resize(rt->NumFields()); - rt->InitSlots(record_val); + rt->InitSlots(std::span{record_val.data(), record_val.size()}); for ( auto& e : rt->CreationInits() ) { try { @@ -2955,11 +2955,10 @@ RecordVal::RecordVal(RecordTypePtr t, bool init_fields) : Val(std::move(t)) { RecordVal::RecordVal(RecordTypePtr t, std::vector> init_vals) : Val(std::move(t)) { // TODO: Change so that callers pass init_vals as ZValSlot instead? size_t n = GetRecordType()->NumFields(); - record_val.reserve(n); + record_val.resize(n); + GetRecordType()->InitSlots(std::span{record_val.data(), record_val.size()}); for ( size_t i = 0; i < n; i++ ) { - record_val.emplace_back(ZValSlot(GetRecordType()->GetFieldType(i))); - if ( init_vals[i].has_value() ) record_val[i] = init_vals[i].value(); } @@ -3009,6 +3008,7 @@ void RecordVal::ResizeParseTimeRecords(RecordType* revised_rt) { auto required_length = revised_rt->NumFields(); if ( required_length > current_length ) { + rv->record_val.reserve(required_length); for ( auto i = current_length; i < required_length; ++i ) rv->AppendField(revised_rt->FieldDefault(i), revised_rt->GetFieldType(i)); } diff --git a/src/Val.h b/src/Val.h index 28d5ea87cb..91b248e086 100644 --- a/src/Val.h +++ b/src/Val.h @@ -1656,9 +1656,9 @@ protected: */ void AppendField(ValPtr v, const TypePtr& t) { if ( v ) - record_val.emplace_back(ZValSlot(v, t)); + record_val.push_back(ZValSlot(v, t)); else - record_val.emplace_back(ZValSlot(t)); + record_val.push_back(ZValSlot(t)); } // For internal use by low-level ZAM instructions and event tracing. @@ -1710,7 +1710,7 @@ private: // Low-level values of each of the fields. // // Lazily modified during GetField(), so mutable. - mutable std::vector record_val; + mutable detail::vector32 record_val; }; class EnumVal final : public detail::IntValImplementation {