optimize record construction by deferring initializations of aggregates

This commit is contained in:
Vern Paxson 2023-04-10 11:44:11 -07:00
parent 2e2afa5e11
commit 0787c130d0
5 changed files with 190 additions and 158 deletions

View file

@ -992,39 +992,102 @@ void TypeDecl::DescribeReST(ODesc* d, bool roles_only) const
}
}
// The following tracks how to initialize a given field, for fast execution
// of Create().
class FieldInit
class DirectFieldInit final : public FieldInit
{
public:
// The type of initialization for the field.
enum
DirectFieldInit(ZVal _init_val) : init_val(_init_val) { }
ZVal Generate() const override { return init_val; }
private:
ZVal init_val;
};
class DirectManagedFieldInit final : public FieldInit
{
public:
DirectManagedFieldInit(ZVal _init_val) : init_val(_init_val) { }
~DirectManagedFieldInit() { ZVal::DeleteManagedType(init_val); }
ZVal Generate() const override
{
R_INIT_NONE, // skip this entry
zeek::Ref(init_val.ManagedVal());
return init_val;
}
R_INIT_DIRECT, // look in direct_init for raw value
R_INIT_DIRECT_MANAGED, // same, but managed type
private:
ZVal init_val;
};
R_INIT_DEF, // look in def_expr for expression
class ExprFieldInit final : public FieldInit
{
public:
// Initialization requires evaluating the given expression,
// yielding the a value of the given type (which might require
// coercion for some records).
ExprFieldInit(detail::ExprPtr _init_expr, TypePtr _init_type)
: init_expr(std::move(_init_expr)), init_type(std::move(_init_type))
{
if ( init_type->Tag() == TYPE_RECORD && ! same_type(init_expr->GetType(), init_type) )
coerce_type = cast_intrusive<RecordType>(init_type);
}
R_INIT_RECORD, // field requires a new record
R_INIT_TABLE, // field requires a new table/set
R_INIT_VECTOR, // field requires a new vector
} init_type = R_INIT_NONE;
ZVal Generate() const override
{
auto v = init_expr->Eval(nullptr);
if ( ! v )
{
reporter->Error("failed &default in record creation");
return ZVal();
}
bool def_coerce = false; // whether coercion's required
if ( coerce_type )
v = v->AsRecordVal()->CoerceTo(coerce_type);
// For R_INIT_DIRECT/R_INIT_DIRECT_MANAGED:
ZVal direct_init;
return ZVal(v, init_type);
}
detail::ExprPtr def_expr;
TypePtr def_type;
private:
detail::ExprPtr init_expr;
TypePtr init_type;
RecordTypePtr coerce_type; // non-nil iff coercion is required
};
RecordTypePtr r_type; // for R_INIT_RECORD
TableTypePtr t_type; // for R_INIT_TABLE
detail::AttributesPtr attrs; // attributes for R_INIT_TABLE
VectorTypePtr v_type; // for R_INIT_VECTOR
class RecordFieldInit final : public FieldInit
{
public:
RecordFieldInit(RecordTypePtr _init_type) : init_type(std::move(_init_type)) { }
ZVal Generate() const override { return ZVal(new RecordVal(init_type)); }
private:
RecordTypePtr init_type;
};
class TableFieldInit final : public FieldInit
{
public:
TableFieldInit(TableTypePtr _init_type, detail::AttributesPtr _attrs)
: init_type(std::move(_init_type)), attrs(std::move(_attrs))
{
}
ZVal Generate() const override { return ZVal(new TableVal(init_type, attrs)); }
private:
TableTypePtr init_type;
detail::AttributesPtr attrs;
};
class VectorFieldInit final : public FieldInit
{
public:
VectorFieldInit(VectorTypePtr _init_type) : init_type(std::move(_init_type)) { }
ZVal Generate() const override { return ZVal(new VectorVal(init_type)); }
private:
VectorTypePtr init_type;
};
RecordType::RecordType(type_decl_list* arg_types) : Type(TYPE_RECORD)
@ -1064,7 +1127,8 @@ RecordType::~RecordType()
}
for ( auto fi : field_inits )
delete fi;
if ( fi )
delete *fi;
}
void RecordType::AddField(unsigned int field, const TypeDecl* td)
@ -1074,53 +1138,43 @@ void RecordType::AddField(unsigned int field, const TypeDecl* td)
managed_fields.push_back(ZVal::IsManagedType(td->type));
auto init = new FieldInit();
init->init_type = FieldInit::R_INIT_NONE;
init->attrs = td->attrs;
// We defer error-checking until here so that we can keep field_inits
// and managed_fields correctly tracking the associated fields.
if ( field_ids.count(td->id) != 0 )
{
reporter->Error("duplicate field '%s' found in record definition", td->id);
field_inits.push_back(init);
field_inits.push_back(std::nullopt);
return;
}
field_ids.insert(std::string(td->id));
auto a = init->attrs;
auto a = td->attrs;
auto type = td->type;
auto def_attr = a ? a->Find(detail::ATTR_DEFAULT) : nullptr;
auto def_expr = def_attr ? def_attr->GetExpr() : nullptr;
std::optional<FieldInit*> init;
if ( def_expr && ! IsErrorType(type->Tag()) )
{
if ( type->Tag() == TYPE_RECORD && def_expr->GetType()->Tag() == TYPE_RECORD &&
! same_type(def_expr->GetType(), type) )
init->def_coerce = true;
if ( def_expr->Tag() == detail::EXPR_CONST )
{
auto v = def_expr->Eval(nullptr);
auto zv = ZVal(v, type);
if ( ZVal::IsManagedType(type) )
init->init_type = FieldInit::R_INIT_DIRECT_MANAGED;
init = new DirectManagedFieldInit(zv);
else
init->init_type = FieldInit::R_INIT_DIRECT;
init->direct_init = ZVal(v, type);
init = new DirectFieldInit(zv);
}
else
{
init->init_type = FieldInit::R_INIT_DEF;
init->def_expr = def_expr;
init->def_type = def_expr->GetType();
auto efi = new ExprFieldInit(def_expr, type);
field_expr_inits.emplace_back(std::make_pair(field, efi));
}
}
@ -1129,22 +1183,13 @@ void RecordType::AddField(unsigned int field, const TypeDecl* td)
TypeTag tag = type->Tag();
if ( tag == TYPE_RECORD )
{
init->init_type = FieldInit::R_INIT_RECORD;
init->r_type = cast_intrusive<RecordType>(type);
}
init = new RecordFieldInit(cast_intrusive<RecordType>(type));
else if ( tag == TYPE_TABLE )
{
init->init_type = FieldInit::R_INIT_TABLE;
init->t_type = cast_intrusive<TableType>(type);
}
init = new TableFieldInit(cast_intrusive<TableType>(type), a);
else if ( tag == TYPE_VECTOR )
{
init->init_type = FieldInit::R_INIT_VECTOR;
init->v_type = cast_intrusive<VectorType>(type);
}
init = new VectorFieldInit(cast_intrusive<VectorType>(type));
}
field_inits.push_back(init);
@ -1342,68 +1387,6 @@ void RecordType::AddFieldsDirectly(const type_decl_list& others, bool add_log_at
num_fields = types->length();
}
void RecordType::Create(std::vector<std::optional<ZVal>>& r) const
{
int n = NumFields();
for ( int i = 0; i < n; ++i )
{
auto* init = field_inits[i];
ZVal r_i;
switch ( init->init_type )
{
case FieldInit::R_INIT_NONE:
r.push_back(std::nullopt);
continue;
case FieldInit::R_INIT_DIRECT:
r_i = init->direct_init;
break;
case FieldInit::R_INIT_DIRECT_MANAGED:
r_i = init->direct_init;
zeek::Ref(r_i.ManagedVal());
break;
case FieldInit::R_INIT_DEF:
{
auto v = init->def_expr->Eval(nullptr);
if ( v )
{
const auto& t = init->def_type;
if ( init->def_coerce )
{
auto rt = cast_intrusive<RecordType>(t);
v = v->AsRecordVal()->CoerceTo(rt);
}
r_i = ZVal(v, t);
}
else
reporter->Error("failed &default in record creation");
}
break;
case FieldInit::R_INIT_RECORD:
r_i = ZVal(new RecordVal(init->r_type));
break;
case FieldInit::R_INIT_TABLE:
r_i = ZVal(new TableVal(init->t_type, init->attrs));
break;
case FieldInit::R_INIT_VECTOR:
r_i = ZVal(new VectorVal(init->v_type));
break;
}
r.push_back(r_i);
}
}
void RecordType::DescribeFields(ODesc* d) const
{
if ( d->IsReadable() )

View file

@ -15,14 +15,15 @@
#include "zeek/IntrusivePtr.h"
#include "zeek/Obj.h"
#include "zeek/Traverse.h"
#include "zeek/ZVal.h"
#include "zeek/ZeekList.h"
namespace zeek
{
class Val;
union ZVal;
class EnumVal;
class RecordVal;
class TableVal;
using ValPtr = IntrusivePtr<Val>;
using EnumValPtr = IntrusivePtr<EnumVal>;
@ -600,10 +601,16 @@ public:
using type_decl_list = PList<TypeDecl>;
// The following tracks how to initialize a given field. We don't define
// it here because it requires pulling in a bunch of low-level headers that
// would be nice to avoid.
class FieldInit;
// The following tracks how to initialize a given field.
class FieldInit
{
public:
virtual ~FieldInit() { }
// Return the initialization value of the field.
virtual ZVal Generate() const = 0;
};
class RecordType final : public Type
{
@ -683,12 +690,8 @@ public:
void AddFieldsDirectly(const type_decl_list& types, bool add_log_attr = false);
/**
*
* Populates a new instance of the record with its initial values.
* @param r The record's underlying value vector.
*/
void Create(std::vector<std::optional<ZVal>>& r) const;
const auto& FieldInits() const { return field_inits; }
const auto& FieldExprInits() const { return field_expr_inits; }
void DescribeReST(ODesc* d, bool roles_only = false) const override;
void DescribeFields(ODesc* d) const;
@ -719,7 +722,15 @@ protected:
// Maps each field to how to initialize it. Uses pointers due to
// keeping the FieldInit definition private to Type.cc (see above).
std::vector<FieldInit*> field_inits;
std::vector<std::optional<FieldInit*>> field_inits;
// Holds initializations defined in terms of evaluating expressions,
// in <fieldoffset, init> pairs (we use pairs instead of a vector
// with per-field expressions because such expressions are not often
// used). These need to be evaluated at record construction time,
// rather than deferring until first use, because the value of the
// expression can change between the two.
std::vector<std::pair<int, const FieldInit*>> field_expr_inits;
// If we were willing to bound the size of records, then we could
// use std::bitset here instead.

View file

@ -2756,25 +2756,32 @@ RecordVal::RecordVal(RecordTypePtr t, bool init_fields) : Val(t), is_managed(t->
int n = rt->NumFields();
record_val = new std::vector<std::optional<ZVal>>;
record_val->reserve(n);
if ( run_state::is_parsing )
parse_time_records[rt.get()].emplace_back(NewRef{}, this);
record_val = new std::vector<std::optional<ZVal>>;
if ( init_fields )
{
try
record_val->resize(n);
for ( auto& e : rt->FieldExprInits() )
{
rt->Create(*record_val);
}
catch ( InterpreterException& e )
{
if ( run_state::is_parsing )
parse_time_records[rt.get()].pop_back();
throw;
try
{
(*record_val)[e.first] = e.second->Generate();
}
catch ( InterpreterException& e )
{
if ( run_state::is_parsing )
parse_time_records[rt.get()].pop_back();
throw;
}
}
}
else
record_val->reserve(n);
}
RecordVal::~RecordVal()
@ -2782,8 +2789,11 @@ RecordVal::~RecordVal()
auto n = record_val->size();
for ( unsigned int i = 0; i < n; ++i )
if ( HasField(i) && IsManaged(i) )
ZVal::DeleteManagedType(*(*record_val)[i]);
{
auto f_i = (*record_val)[i];
if ( f_i && IsManaged(i) )
ZVal::DeleteManagedType(*f_i);
}
delete record_val;
}
@ -2809,12 +2819,13 @@ void RecordVal::Assign(int field, ValPtr new_val)
void RecordVal::Remove(int field)
{
if ( HasField(field) )
auto& f_i = (*record_val)[field];
if ( f_i )
{
if ( IsManaged(field) )
ZVal::DeleteManagedType(*(*record_val)[field]);
ZVal::DeleteManagedType(*f_i);
(*record_val)[field] = std::nullopt;
f_i = std::nullopt;
Modified();
}

View file

@ -1179,9 +1179,10 @@ public:
void Assign(int field, StringVal* new_val)
{
if ( HasField(field) )
ZVal::DeleteManagedType(*(*record_val)[field]);
(*record_val)[field] = ZVal(new_val);
auto& fv = (*record_val)[field];
if ( fv )
ZVal::DeleteManagedType(*fv);
fv = ZVal(new_val);
AddedField(field);
}
void Assign(int field, const char* new_val) { Assign(field, new StringVal(new_val)); }
@ -1194,7 +1195,7 @@ public:
*/
template <class T> void AssignField(const char* field_name, T&& val)
{
int idx = GetType()->AsRecordType()->FieldOffset(field_name);
int idx = rt->FieldOffset(field_name);
if ( idx < 0 )
reporter->InternalError("missing record field: %s", field_name);
Assign(idx, std::forward<T>(val));
@ -1212,7 +1213,13 @@ public:
* @param field The field index to retrieve.
* @return Whether there's a value for the given field index.
*/
bool HasField(int field) const { return (*record_val)[field] ? true : false; }
bool HasField(int field) const
{
if ( (*record_val)[field] )
return true;
return bool(rt->FieldInits()[field]);
}
/**
* Returns true if the given field is in the record, false if
@ -1222,7 +1229,7 @@ public:
*/
bool HasField(const char* field) const
{
int idx = GetType()->AsRecordType()->FieldOffset(field);
int idx = rt->FieldOffset(field);
return (idx != -1) && HasField(idx);
}
@ -1233,10 +1240,17 @@ public:
*/
ValPtr GetField(int field) const
{
if ( ! HasField(field) )
return nullptr;
auto& fv = (*record_val)[field];
if ( ! fv )
{
const auto& fi = rt->FieldInits()[field];
if ( ! fi )
return nullptr;
return (*record_val)[field]->ToVal(rt->GetFieldType(field));
fv = (*fi)->Generate();
}
return fv->ToVal(rt->GetFieldType(field));
}
/**
@ -1364,7 +1378,7 @@ public:
template <typename T> auto GetFieldAs(const char* field) const
{
int idx = GetType()->AsRecordType()->FieldOffset(field);
int idx = rt->FieldOffset(field);
if ( idx < 0 )
reporter->InternalError("missing record field: %s", field);
@ -1437,7 +1451,18 @@ protected:
// Caller assumes responsibility for memory management. The first
// version allows manipulation of whether the field is present at all.
// The second version ensures that the optional value is present.
std::optional<ZVal>& RawOptField(int field) { return (*record_val)[field]; }
std::optional<ZVal>& RawOptField(int field)
{
auto& f = (*record_val)[field];
if ( ! f )
{
const auto& fi = rt->FieldInits()[field];
if ( fi )
f = (*fi)->Generate();
}
return f;
}
ZVal& RawField(int field)
{
@ -1459,8 +1484,9 @@ protected:
private:
void DeleteFieldIfManaged(unsigned int field)
{
if ( HasField(field) && IsManaged(field) )
ZVal::DeleteManagedType(*(*record_val)[field]);
auto& f = (*record_val)[field];
if ( f && IsManaged(field) )
ZVal::DeleteManagedType(*f);
}
bool IsManaged(unsigned int offset) const { return is_managed[offset]; }

View file

@ -10,6 +10,7 @@ namespace zeek
{
class AddrVal;
class EnumVal;
class File;
class Func;
class ListVal;