extensive rewrite of generation & execution of run-time initialization

This commit is contained in:
Vern Paxson 2021-11-07 17:00:19 -08:00
parent bc3bf4ea6c
commit e1a760e674
26 changed files with 3459 additions and 1580 deletions

View file

@ -14,12 +14,30 @@ namespace zeek::detail
using namespace std;
void CPPCompile::GenInitExpr(const ExprPtr& e)
std::shared_ptr<CPP_InitInfo> CPPCompile::RegisterInitExpr(const ExprPtr& ep)
{
auto ename = InitExprName(ep);
if ( init_infos.count(ename) )
return init_infos[ename];
auto wrapper_cl = string("wrapper_") + ename + "_cl";
auto gi = make_shared<CallExprInitInfo>(this, ep, ename, wrapper_cl);
call_exprs_info->AddInstance(gi);
init_infos[ename] = gi;
return gi;
}
void CPPCompile::GenInitExpr(std::shared_ptr<CallExprInitInfo> ce_init)
{
NL();
const auto& e = ce_init->GetExpr();
const auto& t = e->GetType();
auto ename = InitExprName(e);
const auto& ename = ce_init->Name();
const auto& wc = ce_init->WrapperClass();
// First, create a CPPFunc that we can compile to compute 'e'.
auto name = string("wrapper_") + ename;
@ -29,18 +47,17 @@ void CPPCompile::GenInitExpr(const ExprPtr& e)
// Create the Func subclass that can be used in a CallExpr to
// evaluate 'e'.
Emit("class %s_cl : public CPPFunc", name);
Emit("class %s : public CPPFunc", wc);
StartBlock();
Emit("public:");
Emit("%s_cl() : CPPFunc(\"%s\", %s)", name, name, e->IsPure() ? "true" : "false");
Emit("%s() : CPPFunc(\"%s\", %s)", wc, name, e->IsPure() ? "true" : "false");
StartBlock();
Emit("type = make_intrusive<FuncType>(make_intrusive<RecordType>(new type_decl_list()), %s, "
"FUNC_FLAVOR_FUNCTION);",
GenTypeName(t));
NoteInitDependency(e, TypeRep(t));
EndBlock();
Emit("ValPtr Invoke(zeek::Args* args, Frame* parent) const override final");
@ -62,15 +79,9 @@ void CPPCompile::GenInitExpr(const ExprPtr& e)
EndBlock();
Emit("CallExprPtr %s;", ename);
NoteInitDependency(e, TypeRep(t));
AddInit(e, ename,
string("make_intrusive<CallExpr>(make_intrusive<ConstExpr>(make_intrusive<FuncVal>("
"make_intrusive<") +
name + "_cl>())), make_intrusive<ListExpr>(), false)");
}
bool CPPCompile::IsSimpleInitExpr(const ExprPtr& e) const
bool CPPCompile::IsSimpleInitExpr(const ExprPtr& e)
{
switch ( e->Tag() )
{
@ -101,360 +112,83 @@ string CPPCompile::InitExprName(const ExprPtr& e)
return init_exprs.KeyName(e);
}
void CPPCompile::GenGlobalInit(const ID* g, string& gl, const ValPtr& v)
{
const auto& t = v->GetType();
auto tag = t->Tag();
if ( tag == TYPE_FUNC )
// This should get initialized by recognizing hash of
// the function's body.
return;
string init_val;
if ( tag == TYPE_OPAQUE )
{
// We can only generate these by reproducing the expression
// (presumably a function call) used to create the value.
// That isn't fully sound, since if the global's value
// was redef'd in terms of its original value (e.g.,
// "redef x = f(x)"), then we'll wind up with a broken
// expression. It's difficult to detect that in full
// generality, so um Don't Do That. (Note that this
// only affects execution of standalone compiled code,
// where the original scripts are replaced by load-stubs.
// If the scripts are available, then the HasVal() test
// we generate will mean we don't wind up using this
// expression anyway.)
// Use the final initialization expression.
auto& init_exprs = g->GetOptInfo()->GetInitExprs();
init_val = GenExpr(init_exprs.back(), GEN_VAL_PTR, false);
}
else
init_val = BuildConstant(g, v);
auto& attrs = g->GetAttrs();
AddInit(g, string("if ( ! ") + gl + "->HasVal() )");
if ( attrs )
{
RegisterAttributes(attrs);
AddInit(g, "\t{");
AddInit(g, "\t" + gl + "->SetVal(" + init_val + ");");
AddInit(g, "\t" + gl + "->SetAttrs(" + AttrsName(attrs) + ");");
AddInit(g, "\t}");
}
else
AddInit(g, "\t" + gl + "->SetVal(" + init_val + ");");
}
void CPPCompile::GenFuncVarInits()
{
for ( const auto& fv_init : func_vars )
{
auto& fv = fv_init.first;
auto& const_name = fv_init.second;
auto f = fv->AsFunc();
const auto& fn = f->Name();
const auto& ft = f->GetType();
NoteInitDependency(fv, TypeRep(ft));
const auto& bodies = f->GetBodies();
string hashes = "{";
for ( const auto& b : bodies )
{
auto body = b.stmts.get();
ASSERT(body_names.count(body) > 0);
auto& body_name = body_names[body];
ASSERT(body_hashes.count(body_name) > 0);
NoteInitDependency(fv, body);
if ( hashes.size() > 1 )
hashes += ", ";
hashes += Fmt(body_hashes[body_name]);
}
hashes += "}";
auto init = string("lookup_func__CPP(\"") + fn + "\", " + hashes + ", " + GenTypeName(ft) +
")";
AddInit(fv, const_name, init);
}
}
void CPPCompile::GenPreInit(const Type* t)
{
string pre_init;
switch ( t->Tag() )
{
case TYPE_ADDR:
case TYPE_ANY:
case TYPE_BOOL:
case TYPE_COUNT:
case TYPE_DOUBLE:
case TYPE_ERROR:
case TYPE_INT:
case TYPE_INTERVAL:
case TYPE_PATTERN:
case TYPE_PORT:
case TYPE_STRING:
case TYPE_TIME:
case TYPE_TIMER:
case TYPE_VOID:
pre_init = string("base_type(") + TypeTagName(t->Tag()) + ")";
break;
case TYPE_ENUM:
pre_init = string("get_enum_type__CPP(\"") + t->GetName() + "\")";
break;
case TYPE_SUBNET:
pre_init = string("make_intrusive<SubNetType>()");
break;
case TYPE_FILE:
pre_init = string("make_intrusive<FileType>(") + GenTypeName(t->AsFileType()->Yield()) +
")";
break;
case TYPE_OPAQUE:
pre_init = string("make_intrusive<OpaqueType>(\"") + t->AsOpaqueType()->Name() + "\")";
break;
case TYPE_RECORD:
{
string name;
if ( t->GetName() != "" )
name = string("\"") + t->GetName() + string("\"");
else
name = "nullptr";
pre_init = string("get_record_type__CPP(") + name + ")";
}
break;
case TYPE_LIST:
pre_init = string("make_intrusive<TypeList>()");
break;
case TYPE_TYPE:
case TYPE_VECTOR:
case TYPE_TABLE:
case TYPE_FUNC:
// Nothing to do for these, pre-initialization-wise.
return;
default:
reporter->InternalError("bad type in CPPCompile::GenType");
}
pre_inits.emplace_back(GenTypeName(t) + " = " + pre_init + ";");
}
void CPPCompile::GenPreInits()
{
NL();
Emit("void pre_init__CPP()");
StartBlock();
for ( const auto& i : pre_inits )
Emit(i);
EndBlock();
}
void CPPCompile::AddInit(const Obj* o, const string& init)
{
obj_inits[o].emplace_back(init);
}
void CPPCompile::AddInit(const Obj* o)
{
if ( obj_inits.count(o) == 0 )
obj_inits[o] = {};
}
void CPPCompile::NoteInitDependency(const Obj* o1, const Obj* o2)
{
obj_deps[o1].emplace(o2);
}
void CPPCompile::CheckInitConsistency(unordered_set<const Obj*>& to_do)
{
for ( const auto& od : obj_deps )
{
const auto& o = od.first;
if ( to_do.count(o) == 0 )
{
fprintf(stderr, "object not in to_do: %s\n", obj_desc(o).c_str());
exit(1);
}
for ( const auto& d : od.second )
{
if ( to_do.count(d) == 0 )
{
fprintf(stderr, "dep object for %s not in to_do: %s\n", obj_desc(o).c_str(),
obj_desc(d).c_str());
exit(1);
}
}
}
}
int CPPCompile::GenDependentInits(unordered_set<const Obj*>& to_do)
{
int n = 0;
// The basic approach is fairly brute force: find elements of
// to_do that don't have any pending dependencies; generate those;
// and remove them from the to_do list, freeing up other to_do entries
// to now not having any pending dependencies. Iterate until there
// are no more to-do items.
while ( to_do.size() > 0 )
{
unordered_set<const Obj*> cohort;
for ( const auto& o : to_do )
{
const auto& od = obj_deps.find(o);
bool has_pending_dep = false;
if ( od != obj_deps.end() )
{
for ( const auto& d : od->second )
if ( to_do.count(d) > 0 )
{
has_pending_dep = true;
break;
}
}
if ( has_pending_dep )
continue;
cohort.insert(o);
}
ASSERT(cohort.size() > 0);
GenInitCohort(++n, cohort);
for ( const auto& o : cohort )
{
ASSERT(to_do.count(o) > 0);
to_do.erase(o);
}
}
return n;
}
void CPPCompile::GenInitCohort(int nc, unordered_set<const Obj*>& cohort)
{
NL();
Emit("void init_%s__CPP()", Fmt(nc));
StartBlock();
// If any script/BiF functions are used for initializing globals,
// the code generated from that will expect the presence of a
// frame pointer, even if nil.
Emit("Frame* f__CPP = nullptr;");
// The following is just for making the output readable/pretty:
// add space between initializations for distinct objects, taking
// into account that some objects have empty initializations.
bool did_an_init = false;
for ( auto o : cohort )
{
if ( did_an_init )
{
NL();
did_an_init = false;
}
for ( const auto& i : obj_inits.find(o)->second )
{
Emit("%s", i);
did_an_init = true;
}
}
EndBlock();
}
void CPPCompile::InitializeFieldMappings()
{
Emit("int fm_offset;");
Emit("std::vector<CPP_FieldMapping> CPP__field_mappings__ = ");
StartBlock();
for ( const auto& mapping : field_decls )
{
auto rt = mapping.first;
auto rt_arg = Fmt(mapping.first);
auto td = mapping.second;
auto fn = td->id;
auto rt_name = GenTypeName(rt) + "->AsRecordType()";
auto type_arg = Fmt(TypeOffset(td->type));
auto attrs_arg = Fmt(AttributesOffset(td->attrs));
Emit("fm_offset = %s->FieldOffset(\"%s\");", rt_name, fn);
Emit("if ( fm_offset < 0 )");
StartBlock();
Emit("// field does not exist, create it");
Emit("fm_offset = %s->NumFields();", rt_name);
Emit("type_decl_list tl;");
Emit(GenTypeDecl(td));
Emit("%s->AddFieldsDirectly(tl);", rt_name);
EndBlock();
Emit("field_mapping.push_back(fm_offset);");
Emit("CPP_FieldMapping(%s, \"%s\", %s, %s),", rt_arg, td->id, type_arg, attrs_arg);
}
EndBlock(true);
}
void CPPCompile::InitializeEnumMappings()
{
int n = 0;
Emit("std::vector<CPP_EnumMapping> CPP__enum_mappings__ = ");
StartBlock();
for ( const auto& mapping : enum_names )
InitializeEnumMappings(mapping.first, mapping.second, n++);
Emit("CPP_EnumMapping(%s, \"%s\"),", Fmt(mapping.first), mapping.second);
EndBlock(true);
}
void CPPCompile::InitializeEnumMappings(const EnumType* et, const string& e_name, int index)
void CPPCompile::InitializeBiFs()
{
AddInit(et, "{");
Emit("std::vector<CPP_LookupBiF> CPP__BiF_lookups__ = ");
auto et_name = GenTypeName(et) + "->AsEnumType()";
AddInit(et, "int em_offset = " + et_name + "->Lookup(\"" + e_name + "\");");
AddInit(et, "if ( em_offset < 0 )");
StartBlock();
AddInit(et, "\t{");
AddInit(et, "\tem_offset = " + et_name + "->Names().size();");
// The following is to catch the case where the offset is already
// in use due to it being specified explicitly for an existing enum.
AddInit(et, "\tif ( " + et_name + "->Lookup(em_offset) )");
AddInit(
et,
"\t\treporter->InternalError(\"enum inconsistency while initializing compiled scripts\");");
AddInit(et, "\t" + et_name + "->AddNameInternal(\"" + e_name + "\", em_offset);");
AddInit(et, "\t}");
for ( const auto& b : BiFs )
Emit("CPP_LookupBiF(%s, \"%s\"),", b.first, b.second);
AddInit(et, "enum_mapping[" + Fmt(index) + "] = em_offset;");
EndBlock(true);
}
AddInit(et, "}");
void CPPCompile::InitializeStrings()
{
Emit("std::vector<const char*> CPP__Strings =");
StartBlock();
for ( const auto& s : ordered_tracked_strings )
Emit("\"%s\",", s);
EndBlock(true);
}
void CPPCompile::InitializeHashes()
{
Emit("std::vector<p_hash_type> CPP__Hashes =");
StartBlock();
for ( const auto& h : ordered_tracked_hashes )
Emit(Fmt(h) + ",");
EndBlock(true);
}
void CPPCompile::InitializeConsts()
{
Emit("std::vector<CPP_ValElem> CPP__ConstVals =");
StartBlock();
for ( const auto& c : consts )
Emit("CPP_ValElem(%s, %s),", TypeTagName(c.first), Fmt(c.second));
EndBlock(true);
}
void CPPCompile::GenInitHook()
@ -482,11 +216,13 @@ void CPPCompile::GenStandaloneActivation()
{
NL();
#if 0
Emit("void standalone_activation__CPP()");
StartBlock();
for ( auto& a : activations )
Emit(a);
EndBlock();
#endif
NL();
Emit("void standalone_init__CPP()");