extensive rewrite of generation & execution of run-time initialization

This commit is contained in:
Vern Paxson 2021-11-07 17:00:19 -08:00
parent bc3bf4ea6c
commit e1a760e674
26 changed files with 3459 additions and 1580 deletions

View file

@ -12,14 +12,13 @@ namespace zeek::detail
using namespace std;
CPPCompile::CPPCompile(vector<FuncInfo>& _funcs, ProfileFuncs& _pfs, const string& gen_name,
const string& _addl_name, CPPHashManager& _hm, bool _update,
bool _standalone, bool report_uncompilable)
: funcs(_funcs), pfs(_pfs), hm(_hm), update(_update), standalone(_standalone)
const string& _addl_name, CPPHashManager& _hm, bool _standalone,
bool report_uncompilable)
: funcs(_funcs), pfs(_pfs), hm(_hm), standalone(_standalone)
{
addl_name = _addl_name;
bool is_addl = hm.IsAppend();
auto target_name = is_addl ? addl_name.c_str() : gen_name.c_str();
auto mode = is_addl ? "a" : "w";
auto target_name = gen_name.c_str();
auto mode = "w";
write_file = fopen(target_name, mode);
if ( ! write_file )
@ -27,30 +26,6 @@ CPPCompile::CPPCompile(vector<FuncInfo>& _funcs, ProfileFuncs& _pfs, const strin
reporter->Error("can't open C++ target file %s", target_name);
exit(1);
}
if ( is_addl )
{
// We need a unique number to associate with the name
// space for the code we're adding. A convenient way to
// generate this safely is to use the present size of the
// file we're appending to. That guarantees that every
// incremental compilation will wind up with a different
// number.
struct stat st;
if ( fstat(fileno(write_file), &st) != 0 )
{
char buf[256];
util::zeek_strerror_r(errno, buf, sizeof(buf));
reporter->Error("fstat failed on %s: %s", target_name, buf);
exit(1);
}
// We use a value of "0" to mean "we're not appending,
// we're generating from scratch", so make sure we're
// distinct from that.
addl_tag = st.st_size + 1;
}
else
{
// Create an empty "additional" file.
@ -83,10 +58,6 @@ void CPPCompile::Compile(bool report_uncompilable)
working_dir = buf;
if ( update && addl_tag > 0 && CheckForCollisions() )
// Inconsistent compilation environment.
exit(1);
GenProlog();
// Determine which functions we can call directly, and reuse
@ -100,9 +71,13 @@ void CPPCompile::Compile(bool report_uncompilable)
const char* reason;
if ( IsCompilable(func, &reason) )
compilable_funcs.insert(BodyName(func));
else if ( reason && report_uncompilable )
fprintf(stderr, "%s cannot be compiled to C++ due to %s\n", func.Func()->Name(),
reason);
else
{
if ( reason && report_uncompilable )
fprintf(stderr, "%s cannot be compiled to C++ due to %s\n", func.Func()->Name(),
reason);
not_fully_compilable.insert(func.Func()->Name());
}
auto h = func.Profile()->HashVal();
if ( hm.HasHash(h) )
@ -119,39 +94,24 @@ void CPPCompile::Compile(bool report_uncompilable)
{
TypePtr tp{NewRef{}, (Type*)(t)};
types.AddKey(tp, pfs.HashType(t));
(void)RegisterType(tp);
}
for ( const auto& t : types.DistinctKeys() )
if ( ! types.IsInherited(t) )
// Type is new to this compilation, so we'll
// be generating it.
Emit("TypePtr %s;", types.KeyName(t));
// ### This doesn't work for -O add-C++
Emit("TypePtr types__CPP[%s];", Fmt(static_cast<int>(types.DistinctKeys().size())));
NL();
for ( const auto& c : pfs.Constants() )
AddConstant(c);
#if 0
for ( auto gi : all_global_info )
Emit(gi->Declare());
NL();
#endif
for ( auto& g : pfs.AllGlobals() )
CreateGlobal(g);
// Now that the globals are created, register their attributes,
// if any, and generate their initialization for use in standalone
// scripts. We can't do these in CreateGlobal() because at that
// point it's possible that some of the globals refer to other
// globals not-yet-created.
for ( auto& g : pfs.AllGlobals() )
{
RegisterAttributes(g->GetAttrs());
if ( g->HasVal() )
{
auto gn = string(g->Name());
GenGlobalInit(g, globals[gn], g->GetVal());
}
}
for ( const auto& e : pfs.Events() )
if ( AddGlobal(e, "gl", false) )
Emit("EventHandlerPtr %s_ev;", globals[string(e)]);
@ -201,10 +161,13 @@ void CPPCompile::Compile(bool report_uncompilable)
lambda_names.insert(n);
}
NL();
Emit("std::vector<CPP_RegisterBody> CPP__bodies_to_register = {");
for ( const auto& f : compiled_funcs )
RegisterCompiledBody(f);
GenFuncVarInits();
Emit("};");
GenEpilog();
}
@ -217,12 +180,75 @@ void CPPCompile::GenProlog()
Emit("namespace zeek::detail { //\n");
}
Emit("namespace CPP_%s { // %s\n", Fmt(addl_tag), working_dir.c_str());
Emit("namespace CPP_%s { // %s\n", Fmt(addl_tag), working_dir);
// The following might-or-might-not wind up being populated/used.
Emit("std::vector<int> field_mapping;");
Emit("std::vector<int> enum_mapping;");
NL();
const_info[TYPE_BOOL] = CreateConstInitInfo("Bool", "ValPtr", "bool");
const_info[TYPE_INT] = CreateConstInitInfo("Int", "ValPtr", "bro_int_t");
const_info[TYPE_COUNT] = CreateConstInitInfo("Count", "ValPtr", "bro_uint_t");
const_info[TYPE_DOUBLE] = CreateConstInitInfo("Double", "ValPtr", "double");
const_info[TYPE_TIME] = CreateConstInitInfo("Time", "ValPtr", "double");
const_info[TYPE_INTERVAL] = CreateConstInitInfo("Interval", "ValPtr", "double");
const_info[TYPE_ADDR] = CreateConstInitInfo("Addr", "ValPtr", "");
const_info[TYPE_SUBNET] = CreateConstInitInfo("SubNet", "ValPtr", "");
const_info[TYPE_PORT] = CreateConstInitInfo("Port", "ValPtr", "uint32_t");
const_info[TYPE_ENUM] = CreateCompoundInitInfo("Enum", "ValPtr");
const_info[TYPE_STRING] = CreateCompoundInitInfo("String", "ValPtr");
const_info[TYPE_LIST] = CreateCompoundInitInfo("List", "ValPtr");
const_info[TYPE_PATTERN] = CreateCompoundInitInfo("Pattern", "ValPtr");
const_info[TYPE_VECTOR] = CreateCompoundInitInfo("Vector", "ValPtr");
const_info[TYPE_RECORD] = CreateCompoundInitInfo("Record", "ValPtr");
const_info[TYPE_TABLE] = CreateCompoundInitInfo("Table", "ValPtr");
const_info[TYPE_FUNC] = CreateCompoundInitInfo("Func", "ValPtr");
const_info[TYPE_FILE] = CreateCompoundInitInfo("File", "ValPtr");
type_info = CreateCompoundInitInfo("Type", "Ptr");
attr_info = CreateCompoundInitInfo("Attr", "Ptr");
attrs_info = CreateCompoundInitInfo("Attributes", "Ptr");
call_exprs_info = CreateCustomInitInfo("CallExpr", "Ptr");
lambda_reg_info = CreateCustomInitInfo("LambdaRegistration", "");
global_id_info = CreateCustomInitInfo("GlobalID", "");
NL();
DeclareDynCPPStmt();
NL();
}
shared_ptr<CPP_InitsInfo> CPPCompile::CreateConstInitInfo(const char* tag, const char* type,
const char* c_type)
{
auto gi = make_shared<CPP_BasicConstInitsInfo>(tag, type, c_type);
return RegisterInitInfo(tag, type, gi);
}
shared_ptr<CPP_InitsInfo> CPPCompile::CreateCompoundInitInfo(const char* tag, const char* type)
{
auto gi = make_shared<CPP_CompoundInitsInfo>(tag, type);
return RegisterInitInfo(tag, type, gi);
}
shared_ptr<CPP_InitsInfo> CPPCompile::CreateCustomInitInfo(const char* tag, const char* type)
{
auto gi = make_shared<CPP_CustomInitsInfo>(tag, type);
if ( type[0] == '\0' )
gi->SetCPPType("void*");
return RegisterInitInfo(tag, type, gi);
}
shared_ptr<CPP_InitsInfo> CPPCompile::RegisterInitInfo(const char* tag, const char* type,
shared_ptr<CPP_InitsInfo> gi)
{
string v_type = type[0] ? (string(tag) + type) : "void*";
Emit("std::vector<%s> CPP__%s__;", v_type, string(tag));
all_global_info.insert(gi);
return gi;
}
void CPPCompile::RegisterCompiledBody(const string& f)
@ -252,74 +278,135 @@ void CPPCompile::RegisterCompiledBody(const string& f)
// same binary).
h = merge_p_hashes(h, p_hash(cf_locs[f]));
auto init = string("register_body__CPP(make_intrusive<") + f + "_cl>(\"" + f + "\"), " +
Fmt(p) + ", " + Fmt(h) + ", " + events + ");";
AddInit(names_to_bodies[f], init);
if ( update )
{
fprintf(hm.HashFile(), "func\n%s%s\n", scope_prefix(addl_tag).c_str(), f.c_str());
fprintf(hm.HashFile(), "%llu\n", h);
}
ASSERT(func_index.count(f) > 0);
auto type_signature = casting_index[func_index[f]];
Emit("\tCPP_RegisterBody(\"%s\", (void*) %s, %s, %s, %s, std::vector<std::string>(%s)),", f, f,
Fmt(type_signature), Fmt(p), Fmt(h), events);
}
void CPPCompile::GenEpilog()
{
NL();
for ( const auto& ii : init_infos )
GenInitExpr(ii.second);
for ( const auto& e : init_exprs.DistinctKeys() )
NL();
Emit("ValPtr CPPDynStmt::Exec(Frame* f, StmtFlowType& flow)");
StartBlock();
Emit("flow = FLOW_RETURN;");
Emit("switch ( type_signature )");
StartBlock();
for ( auto i = 0U; i < func_casting_glue.size(); ++i )
{
GenInitExpr(e);
if ( update )
init_exprs.LogIfNew(e, addl_tag, hm.HashFile());
Emit("case %s:", to_string(i));
StartBlock();
auto& glue = func_casting_glue[i];
auto invoke = string("(*(") + glue.cast + ")(func))(" + glue.args + ")";
if ( glue.is_hook )
{
Emit("if ( ! %s )", invoke);
StartBlock();
Emit("flow = FLOW_BREAK;");
EndBlock();
Emit("return nullptr;");
}
else if ( IsNativeType(glue.yield) )
GenInvokeBody(invoke, glue.yield);
else
Emit("return %s;", invoke);
EndBlock();
}
for ( const auto& a : attributes.DistinctKeys() )
{
GenAttrs(a);
if ( update )
attributes.LogIfNew(a, addl_tag, hm.HashFile());
}
Emit("default:");
Emit("\treporter->InternalError(\"invalid type in CPPDynStmt::Exec\");");
Emit("\treturn nullptr;");
// Generate the guts of compound types, and preserve type names
// if present.
for ( const auto& t : types.DistinctKeys() )
{
ExpandTypeVar(t);
if ( update )
types.LogIfNew(t, addl_tag, hm.HashFile());
}
EndBlock();
EndBlock();
InitializeEnumMappings();
NL();
GenPreInits();
unordered_set<const Obj*> to_do;
for ( const auto& oi : obj_inits )
to_do.insert(oi.first);
CheckInitConsistency(to_do);
auto nc = GenDependentInits(to_do);
for ( auto gi : all_global_info )
gi->GenerateInitializers(this);
if ( standalone )
GenStandaloneActivation();
NL();
InitializeEnumMappings();
NL();
InitializeFieldMappings();
NL();
InitializeBiFs();
NL();
indices_mgr.Generate(this);
NL();
InitializeStrings();
NL();
InitializeHashes();
NL();
InitializeConsts();
NL();
Emit("void init__CPP()");
StartBlock();
Emit("enum_mapping.resize(%s);\n", Fmt(int(enum_names.size())));
Emit("pre_init__CPP();");
Emit("std::vector<std::vector<int>> InitIndices;");
Emit("generate_indices_set(CPP__Indices__init, InitIndices);");
Emit("std::map<TypeTag, std::shared_ptr<CPP_AbstractInitAccessor>> InitConsts;");
NL();
for ( auto i = 1; i <= nc; ++i )
Emit("init_%s__CPP();", Fmt(i));
for ( const auto& ci : const_info )
{
auto& gi = ci.second;
Emit("InitConsts.emplace(%s, std::make_shared<CPP_InitAccessor<%s>>(%s));",
TypeTagName(ci.first), gi->CPPType(), gi->InitsName());
}
Emit("InitsManager im(CPP__ConstVals, InitConsts, InitIndices, CPP__Strings, CPP__Hashes, "
"CPP__Type__, CPP__Attributes__, CPP__Attr__, CPP__CallExpr__);");
NL();
Emit("for ( auto& b : CPP__bodies_to_register )");
StartBlock();
Emit("auto f = make_intrusive<CPPDynStmt>(b.func_name.c_str(), b.func, b.type_signature);");
Emit("register_body__CPP(f, b.priority, b.h, b.events);");
EndBlock();
NL();
int max_cohort = 0;
for ( auto gi : all_global_info )
max_cohort = std::max(max_cohort, gi->MaxCohort());
for ( auto c = 0; c <= max_cohort; ++c )
for ( auto gi : all_global_info )
if ( gi->CohortSize(c) > 0 )
Emit("%s.InitializeCohort(&im, %s);", gi->InitializersName(), Fmt(c));
NL();
Emit("for ( auto& b : CPP__BiF_lookups__ )");
Emit("\tb.ResolveBiF();");
// Populate mappings for dynamic offsets.
NL();
InitializeFieldMappings();
Emit("for ( auto& em : CPP__enum_mappings__ )");
Emit("\tenum_mapping.push_back(em.ComputeOffset(&im));");
NL();
Emit("for ( auto& fm : CPP__field_mappings__ )");
Emit("\tfield_mapping.push_back(fm.ComputeOffset(&im));");
if ( standalone )
Emit("standalone_init__CPP();");
@ -328,10 +415,7 @@ void CPPCompile::GenEpilog()
GenInitHook();
Emit("} // %s\n\n", scope_prefix(addl_tag).c_str());
if ( update )
UpdateGlobalHashes();
Emit("} // %s\n\n", scope_prefix(addl_tag));
if ( addl_tag > 0 )
return;