diff --git a/CHANGES b/CHANGES index 8fff6e451f..199e43e2d2 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,23 @@ +8.0.0-dev.748 | 2025-07-25 10:58:16 +0200 + + * RecordType: Allow field init deferral of deferrable record constructors (Arne Welzel, Corelight) + + The ctx: conn_id_ctx &default=conn_id_ctx() field was not optimized + as deferrable even though by default its an empty record and so deferring + initialization seems safe. Open-code the special record constructor + expression case in ExprFieldInit so that the ctx field is not default + initialized at record construction anymore. I am wondering a bit if the + same applies to &default=set() and &default=table(). + + * Conn: Use conn_id_ctx singleton (Arne Welzel, Corelight) + + * Conn: Add InitPostScript() and conn_id_ctx singleton (Arne Welzel, Corelight) + + * ID: Add conn_id_ctx (Arne Welzel, Corelight) + + Seemed a bit unfortunate to use id::connection and id::conn_id, but + then do something different for conn_id_ctx. + 8.0.0-dev.742 | 2025-07-24 12:46:16 -0700 * Update broker submodule to pull in clang 20 fix [nomail] (Tim Wojtulewicz, Corelight) diff --git a/VERSION b/VERSION index 8de72f7945..789d6192f5 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -8.0.0-dev.742 +8.0.0-dev.748 diff --git a/src/Conn.cc b/src/Conn.cc index 757f7bbede..d1336803a7 100644 --- a/src/Conn.cc +++ b/src/Conn.cc @@ -6,6 +6,7 @@ #include #include "zeek/Desc.h" +#include "zeek/ID.h" #include "zeek/NetVar.h" #include "zeek/Reporter.h" #include "zeek/RunState.h" @@ -22,6 +23,12 @@ namespace zeek { uint64_t Connection::total_connections = 0; uint64_t Connection::current_connections = 0; +zeek::RecordValPtr Connection::conn_id_ctx_singleton; + +void Connection::InitPostScript() { + if ( id::conn_id_ctx->NumFields() == 0 ) + conn_id_ctx_singleton = zeek::make_intrusive(id::conn_id_ctx); +} Connection::Connection(zeek::IPBasedConnKeyPtr k, double t, uint32_t flow, const Packet* pkt) : Session(t, connection_timeout, connection_status_update, detail::connection_status_update_interval), @@ -188,7 +195,17 @@ const RecordValPtr& Connection::GetVal() { conn_val = make_intrusive(id::connection); auto id_val = make_intrusive(id::conn_id); - auto* ctx = id_val->GetFieldAs(5); + + constexpr int ctx_offset = 5; + + // If the conn_id_ctx type has no fields at all, set it to the singleton instance, + // otherwise the instance is initialized on first access through GetField() below. + if ( conn_id_ctx_singleton ) { + assert(id::conn_id_ctx->NumFields() == 0); + id_val->Assign(ctx_offset, conn_id_ctx_singleton); + } + + auto ctx = id_val->GetField(ctx_offset); // Allow customized ConnKeys to augment conn_id and ctx. key->PopulateConnIdVal(*id_val, *ctx); diff --git a/src/Conn.h b/src/Conn.h index a35c419b91..4afb044e6b 100644 --- a/src/Conn.h +++ b/src/Conn.h @@ -209,6 +209,9 @@ public: // Returns true once Done() is called. bool IsFinished() { return finished; } + // Runs after all scripts have been parsed. + static void InitPostScript(); + private: // Common initialization for the constructors. This can move back into the // (sole) constructor when we remove the deprecated one in 8.1. @@ -244,6 +247,11 @@ private: // Count number of connections. static uint64_t total_connections; static uint64_t current_connections; + + // When the conn_id_ctx record type has no fields, + // this holds a singleton record value for it that + // is shared among all conn_id record values. + static RecordValPtr conn_id_ctx_singleton; }; // The following is used by script optimization. diff --git a/src/ID.cc b/src/ID.cc index bd49d135bf..f666c86cf0 100644 --- a/src/ID.cc +++ b/src/ID.cc @@ -24,6 +24,7 @@ namespace zeek { RecordTypePtr id::conn_id; +RecordTypePtr id::conn_id_ctx; RecordTypePtr id::endpoint; RecordTypePtr id::connection; RecordTypePtr id::fa_file; @@ -81,6 +82,7 @@ FuncPtr id::find_func(std::string_view name) { void id::detail::init_types() { conn_id = id::find_type("conn_id"); + conn_id_ctx = id::find_type("conn_id_ctx"); endpoint = id::find_type("endpoint"); connection = id::find_type("connection"); fa_file = id::find_type("fa_file"); diff --git a/src/ID.h b/src/ID.h index b1cc83a383..41498df005 100644 --- a/src/ID.h +++ b/src/ID.h @@ -259,6 +259,7 @@ IntrusivePtr find_const(std::string_view name) { FuncPtr find_func(std::string_view name); extern RecordTypePtr conn_id; +extern RecordTypePtr conn_id_ctx; extern RecordTypePtr endpoint; extern RecordTypePtr connection; extern RecordTypePtr fa_file; diff --git a/src/Type.cc b/src/Type.cc index 5628470e74..10104dbc74 100644 --- a/src/Type.cc +++ b/src/Type.cc @@ -915,7 +915,22 @@ public: return {v, init_type}; } - bool IsDeferrable() const override { return false; } + bool IsDeferrable() const override { + if ( init_expr->Tag() == EXPR_RECORD_CONSTRUCTOR ) { + // Special-case deferrable record construction. + auto rce = zeek::cast_intrusive(init_expr); + auto rt = rce->GetType(); + + // The empty constructor_list check here is a short-cut: If the + // constructor expression contained only further const expressions + // or only further deferrable record constructors, this could be + // more aggressively deferring initializations. + auto constructor_list = rce->Op(); + return rt->IsDeferrable() && constructor_list->Exprs().empty(); + } + + return false; + } ExprPtr InitExpr() const override { return init_expr; } @@ -995,6 +1010,8 @@ private: if ( ! ci.second->IsDeferrable() ) rt->creation_inits[i++] = std::move(ci); else { + // std::fprintf(stderr, "deferred %s$%s: %s\n", obj_desc_short(rt).c_str(), rt->FieldName(ci.first), + // ci.second->InitExpr() ? obj_desc_short(ci.second->InitExpr()).c_str() : ""); assert(! rt->deferred_inits[ci.first]); rt->deferred_inits[ci.first].swap(ci.second); } diff --git a/src/packet_analysis/protocol/ip/conn_key/vlan_fivetuple/Factory.cc b/src/packet_analysis/protocol/ip/conn_key/vlan_fivetuple/Factory.cc index 7e243422a7..cf365e2eb3 100644 --- a/src/packet_analysis/protocol/ip/conn_key/vlan_fivetuple/Factory.cc +++ b/src/packet_analysis/protocol/ip/conn_key/vlan_fivetuple/Factory.cc @@ -54,18 +54,16 @@ protected: }; std::pair GetConnCtxFieldOffsets() { - static const auto& conn_id_ctx = zeek::id::find_type("conn_id_ctx"); - static int vlan_offset = -2; static int inner_vlan_offset = -2; if ( vlan_offset == -2 && inner_vlan_offset == -2 ) { - vlan_offset = conn_id_ctx->FieldOffset("vlan"); - if ( vlan_offset < 0 || conn_id_ctx->GetFieldType(vlan_offset)->Tag() != TYPE_INT ) + vlan_offset = id::conn_id_ctx->FieldOffset("vlan"); + if ( vlan_offset < 0 || id::conn_id_ctx->GetFieldType(vlan_offset)->Tag() != TYPE_INT ) vlan_offset = -1; - inner_vlan_offset = conn_id_ctx->FieldOffset("inner_vlan"); - if ( inner_vlan_offset < 0 || conn_id_ctx->GetFieldType(inner_vlan_offset)->Tag() != TYPE_INT ) + inner_vlan_offset = id::conn_id_ctx->FieldOffset("inner_vlan"); + if ( inner_vlan_offset < 0 || id::conn_id_ctx->GetFieldType(inner_vlan_offset)->Tag() != TYPE_INT ) inner_vlan_offset = -1; } diff --git a/src/zeek-setup.cc b/src/zeek-setup.cc index 985bd753b0..9b1253bf88 100644 --- a/src/zeek-setup.cc +++ b/src/zeek-setup.cc @@ -25,6 +25,7 @@ #include +#include "zeek/Conn.h" #include "zeek/DNS_Mgr.h" #include "zeek/Debug.h" #include "zeek/Desc.h" @@ -842,6 +843,7 @@ SetupResult setup(int argc, char** argv, Options* zopts) { exit(1); RecordType::InitPostScript(); + Connection::InitPostScript(); conn_key_mgr->InitPostScript(); telemetry_mgr->InitPostScript();