From bbae2cf36c7ef0f43cd885da225e4fec68c7f5fd Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sun, 12 Dec 2021 12:31:28 -0800 Subject: [PATCH 01/11] minor efficiency tweak for ZAM record construction --- src/script_opt/ZAM/Ops.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/script_opt/ZAM/Ops.in b/src/script_opt/ZAM/Ops.in index f57ce19947..71f1760694 100644 --- a/src/script_opt/ZAM/Ops.in +++ b/src/script_opt/ZAM/Ops.in @@ -1064,7 +1064,7 @@ macro EvalConstructRecord(map_init, map_accessor) const auto& t_ind = rt->GetFieldType(ind); v_i->AsVectorVal()->Concretize(t_ind->Yield()); } - new_r->Assign(ind, aux->ToVal(frame, i)); + new_r->Assign(ind, v_i); } auto& r = frame[z.v1].record_val; Unref(r); From ce7f886077c2aa0d3ad8b0aca3c95f0213c96479 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sun, 12 Dec 2021 12:32:27 -0800 Subject: [PATCH 02/11] removing vestigial methods --- src/script_opt/CPP/Compile.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/script_opt/CPP/Compile.h b/src/script_opt/CPP/Compile.h index 87ebe786a8..6e3ccf71e8 100644 --- a/src/script_opt/CPP/Compile.h +++ b/src/script_opt/CPP/Compile.h @@ -943,10 +943,6 @@ private: // the set of attributes. void BuildAttrs(const AttributesPtr& attrs, std::string& attr_tags, std::string& attr_vals); - // Generates code to create the given attributes at run-time. - void GenAttrs(const AttributesPtr& attrs); - std::string GenAttrExpr(const ExprPtr& e); - // Returns a string representation of the name associated with // different attribute tags (e.g., "ATTR_DEFAULT"). static const char* AttrName(AttrTag t); From 4ea5785908b86539fe9962e995db084f7ee047b8 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sun, 12 Dec 2021 12:33:06 -0800 Subject: [PATCH 03/11] fixed for profiling missing some profile elements --- src/script_opt/ProfileFunc.cc | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/src/script_opt/ProfileFunc.cc b/src/script_opt/ProfileFunc.cc index 0716a885e3..c2bf948304 100644 --- a/src/script_opt/ProfileFunc.cc +++ b/src/script_opt/ProfileFunc.cc @@ -470,16 +470,22 @@ ProfileFuncs::ProfileFuncs(std::vector& funcs, is_compilable_pred pred // functions. Recursively compute their hashes. ComputeTypeHashes(main_types); - // Computing the hashes can have marked expressions (seen in - // record attributes) for further analysis. Likewise, when - // doing the profile merges above we may have noted lambda - // expressions. Analyze these, and iteratively any further - // expressions that that analysis uncovers. - DrainPendingExprs(); + do + { + // Computing the hashes can have marked expressions (seen in + // record attributes) for further analysis. Likewise, when + // doing the profile merges above we may have noted lambda + // expressions. Analyze these, and iteratively any further + // expressions that that analysis uncovers. + DrainPendingExprs(); - // We now have all the information we need to form definitive, - // deterministic hashes. - ComputeBodyHashes(funcs); + // We now have all the information we need to form definitive, + // deterministic hashes. + ComputeBodyHashes(funcs); + + // Computing those hashes could have led to traversals that + // create more pending expressions to analyze. + } while ( ! pending_exprs.empty() ); } void ProfileFuncs::MergeInProfile(ProfileFunc* pf) From 52ed9351a9c44353f62d4d14f0a4af68f2d468bc Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sun, 12 Dec 2021 12:33:38 -0800 Subject: [PATCH 04/11] fixes for compiling vector operations to C++ --- src/script_opt/CPP/Exprs.cc | 20 ++++----- src/script_opt/CPP/RuntimeVec.cc | 77 +++++++++++++------------------- src/script_opt/CPP/RuntimeVec.h | 11 +++-- 3 files changed, 46 insertions(+), 62 deletions(-) diff --git a/src/script_opt/CPP/Exprs.cc b/src/script_opt/CPP/Exprs.cc index ad8f49e513..f65be5eb84 100644 --- a/src/script_opt/CPP/Exprs.cc +++ b/src/script_opt/CPP/Exprs.cc @@ -580,13 +580,13 @@ string CPPCompile::GenArithCoerceExpr(const Expr* e, GenType gt) if ( same_type(t, op->GetType()) ) return GenExpr(op, gt); - bool is_vec = t->Tag() == TYPE_VECTOR; - - auto coerce_t = is_vec ? t->Yield() : t; + if ( t->Tag() == TYPE_VECTOR ) + return string("vector_coerce_to__CPP(") + GenExpr(op, GEN_NATIVE) + ", " + GenTypeName(t) + + ")"; string cast_name; - switch ( coerce_t->InternalType() ) + switch ( t->InternalType() ) { case TYPE_INTERNAL_INT: cast_name = "bro_int_t"; @@ -602,10 +602,6 @@ string CPPCompile::GenArithCoerceExpr(const Expr* e, GenType gt) reporter->InternalError("bad type in arithmetic coercion"); } - if ( is_vec ) - return string("vec_coerce_to_") + cast_name + "__CPP(" + GenExpr(op, GEN_NATIVE) + ", " + - GenTypeName(t) + ")"; - return NativeToGT(cast_name + "(" + GenExpr(op, GEN_NATIVE) + ")", t, gt); } @@ -1115,10 +1111,12 @@ string CPPCompile::GenListAssign(const ExprPtr& lhs, const ExprPtr& rhs) string CPPCompile::GenVectorOp(const Expr* e, string op, const char* vec_op) { - auto gen = string("vec_op_") + vec_op + "__CPP(" + op + ")"; + auto t = e->GetType(); + auto gen_t = GenTypeName(t); + auto gen = string("vec_op_") + vec_op + "__CPP(" + op + ", " + gen_t + ")"; - if ( ! IsArithmetic(e->GetType()->Yield()->Tag()) ) - gen = string("vector_coerce_to__CPP(") + gen + ", " + GenTypeName(e->GetType()) + ")"; + if ( ! IsArithmetic(t->Yield()->Tag()) ) + gen = string("vector_coerce_to__CPP(") + gen + ", " + gen_t + ")"; return gen; } diff --git a/src/script_opt/CPP/RuntimeVec.cc b/src/script_opt/CPP/RuntimeVec.cc index c8279e7988..2607a2fb87 100644 --- a/src/script_opt/CPP/RuntimeVec.cc +++ b/src/script_opt/CPP/RuntimeVec.cc @@ -47,8 +47,9 @@ static VectorTypePtr base_vector_type__CPP(const VectorTypePtr& vt) #define VEC_OP1_KERNEL(accessor, type, op) \ for ( unsigned int i = 0; i < v->Size(); ++i ) \ { \ - auto v_i = v->ValAt(i)->accessor(); \ - v_result->Assign(i, make_intrusive(op v_i)); \ + auto v_i = v->ValAt(i); \ + if ( v_i ) \ + v_result->Assign(i, make_intrusive(op v_i->accessor())); \ } // A macro (since it's beyond my templating skillz to deal with the @@ -58,9 +59,9 @@ static VectorTypePtr base_vector_type__CPP(const VectorTypePtr& vt) // is "double". It needs to be optional because C++ will (rightfully) // complain about applying certain C++ unary operations to doubles. #define VEC_OP1(name, op, double_kernel) \ - VectorValPtr vec_op_##name##__CPP(const VectorValPtr& v) \ + VectorValPtr vec_op_##name##__CPP(const VectorValPtr& v, const TypePtr& t) \ { \ - auto vt = base_vector_type__CPP(v->GetType()); \ + auto vt = base_vector_type__CPP(cast_intrusive(t)); \ auto v_result = make_intrusive(vt); \ \ switch ( vt->Yield()->InternalType() ) \ @@ -105,9 +106,10 @@ VEC_OP1(comp, ~, ) #define VEC_OP2_KERNEL(accessor, type, op) \ for ( unsigned int i = 0; i < v1->Size(); ++i ) \ { \ - auto v1_i = v1->ValAt(i)->accessor(); \ - auto v2_i = v2->ValAt(i)->accessor(); \ - v_result->Assign(i, make_intrusive(v1_i op v2_i)); \ + auto v1_i = v1->ValAt(i); \ + auto v2_i = v2->ValAt(i); \ + if ( v1_i && v2_i ) \ + v_result->Assign(i, make_intrusive(v1_i->accessor() op v2_i->accessor())); \ } // Analogous to VEC_OP1, instantiates a function for a given binary operation, @@ -387,27 +389,42 @@ VectorValPtr vector_coerce_to__CPP(const VectorValPtr& v, const TypePtr& targ) for ( unsigned int i = 0; i < n; ++i ) { ValPtr v_i = v->ValAt(i); + if ( ! v_i ) + continue; + ValPtr r_i; switch ( ytag ) { case TYPE_BOOL: - r_i = val_mgr->Bool(v_i->AsBool()); + r_i = val_mgr->Bool(v_i->CoerceToInt() != 0); + break; + + case TYPE_INT: + r_i = val_mgr->Int(v_i->CoerceToInt()); + break; + + case TYPE_COUNT: + r_i = val_mgr->Count(v_i->CoerceToUnsigned()); break; case TYPE_ENUM: - r_i = yt->AsEnumType()->GetEnumVal(v_i->AsInt()); + r_i = yt->AsEnumType()->GetEnumVal(v_i->CoerceToInt()); break; case TYPE_PORT: - r_i = make_intrusive(v_i->AsCount()); + r_i = make_intrusive(v_i->CoerceToUnsigned()); + break; + + case TYPE_DOUBLE: + r_i = make_intrusive(v_i->CoerceToDouble()); break; case TYPE_INTERVAL: - r_i = make_intrusive(v_i->AsDouble()); + r_i = make_intrusive(v_i->CoerceToDouble()); break; case TYPE_TIME: - r_i = make_intrusive(v_i->AsDouble()); + r_i = make_intrusive(v_i->CoerceToDouble()); break; default: @@ -420,40 +437,10 @@ VectorValPtr vector_coerce_to__CPP(const VectorValPtr& v, const TypePtr& targ) return v_result; } -VectorValPtr vec_coerce_to_bro_int_t__CPP(const VectorValPtr& v, TypePtr targ) +VectorValPtr vec_scalar_mixed_with_vector() { - auto res_t = cast_intrusive(targ); - auto v_result = make_intrusive(move(res_t)); - auto n = v->Size(); - - for ( unsigned int i = 0; i < n; ++i ) - v_result->Assign(i, val_mgr->Int(v->IntAt(i))); - - return v_result; - } - -VectorValPtr vec_coerce_to_bro_uint_t__CPP(const VectorValPtr& v, TypePtr targ) - { - auto res_t = cast_intrusive(targ); - auto v_result = make_intrusive(move(res_t)); - auto n = v->Size(); - - for ( unsigned int i = 0; i < n; ++i ) - v_result->Assign(i, val_mgr->Count(v->CountAt(i))); - - return v_result; - } - -VectorValPtr vec_coerce_to_double__CPP(const VectorValPtr& v, TypePtr targ) - { - auto res_t = cast_intrusive(targ); - auto v_result = make_intrusive(move(res_t)); - auto n = v->Size(); - - for ( unsigned int i = 0; i < n; ++i ) - v_result->Assign(i, make_intrusive(v->DoubleAt(i))); - - return v_result; + reporter->CPPRuntimeError("vector-mixed-with-scalar operations not supported"); + return nullptr; } } // namespace zeek::detail diff --git a/src/script_opt/CPP/RuntimeVec.h b/src/script_opt/CPP/RuntimeVec.h index 96cc1cfc67..2dfe1374c3 100644 --- a/src/script_opt/CPP/RuntimeVec.h +++ b/src/script_opt/CPP/RuntimeVec.h @@ -22,10 +22,10 @@ inline ValPtr vector_append__CPP(VectorValPtr v1, ValPtr v2) } // Unary vector operations. -extern VectorValPtr vec_op_pos__CPP(const VectorValPtr& v); -extern VectorValPtr vec_op_neg__CPP(const VectorValPtr& v); -extern VectorValPtr vec_op_not__CPP(const VectorValPtr& v); -extern VectorValPtr vec_op_comp__CPP(const VectorValPtr& v); +extern VectorValPtr vec_op_pos__CPP(const VectorValPtr& v, const TypePtr& t); +extern VectorValPtr vec_op_neg__CPP(const VectorValPtr& v, const TypePtr& t); +extern VectorValPtr vec_op_not__CPP(const VectorValPtr& v, const TypePtr& t); +extern VectorValPtr vec_op_comp__CPP(const VectorValPtr& v, const TypePtr& t); // Binary vector operations. extern VectorValPtr vec_op_add__CPP(const VectorValPtr& v1, const VectorValPtr& v2); @@ -81,8 +81,7 @@ extern VectorValPtr vec_coerce_to_bro_uint_t__CPP(const VectorValPtr& v, TypePtr extern VectorValPtr vec_coerce_to_double__CPP(const VectorValPtr& v, TypePtr targ); // A dummy function used during code generation for unsupported operations -// that mix vector and scalar arguments. We don't define it in RuntimeVec.cc -// so that it'll generate a linking error. +// that mix vector and scalar arguments. extern VectorValPtr vec_scalar_mixed_with_vector(); } // namespace zeek::detail From 96ed94457111004b435c8b98ed586b16468d8542 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sun, 12 Dec 2021 12:34:23 -0800 Subject: [PATCH 05/11] fix for compiling record constructors to C++ --- src/script_opt/CPP/RuntimeOps.cc | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/src/script_opt/CPP/RuntimeOps.cc b/src/script_opt/CPP/RuntimeOps.cc index 2aa55f3f1c..13cb3f5092 100644 --- a/src/script_opt/CPP/RuntimeOps.cc +++ b/src/script_opt/CPP/RuntimeOps.cc @@ -215,22 +215,43 @@ TableValPtr table_constructor__CPP(vector indices, vector vals, RecordValPtr record_constructor__CPP(vector vals, RecordTypePtr t) { - auto rv = make_intrusive(move(t)); + auto rv = make_intrusive(t); auto n = vals.size(); for ( auto i = 0u; i < n; ++i ) - rv->Assign(i, vals[i]); + { + auto& v_i = vals[i]; + + if ( v_i && v_i->GetType()->Tag() == TYPE_VECTOR && v_i->AsVectorVal()->Size() == 0 ) + { + const auto& t_ind = t->GetFieldType(i); + v_i->AsVectorVal()->Concretize(t_ind->Yield()); + } + + rv->Assign(i, v_i); + } return rv; } RecordValPtr record_constructor_map__CPP(vector vals, vector map, RecordTypePtr t) { - auto rv = make_intrusive(move(t)); + auto rv = make_intrusive(t); auto n = vals.size(); for ( auto i = 0u; i < n; ++i ) - rv->Assign(map[i], vals[i]); + { + auto& v_i = vals[i]; + auto ind = map[i]; + + if ( v_i && v_i->GetType()->Tag() == TYPE_VECTOR && v_i->AsVectorVal()->Size() == 0 ) + { + const auto& t_ind = t->GetFieldType(ind); + v_i->AsVectorVal()->Concretize(t_ind->Yield()); + } + + rv->Assign(ind, v_i); + } return rv; } From d5e7db1070dc01a287123ab214068609fa3f395f Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sun, 12 Dec 2021 12:34:50 -0800 Subject: [PATCH 06/11] new ZEEK_FILE_ONLY and ZEEK_FUNC_ONLY environment variables for debugging script optimization - replaces ZEEK_ONLY --- src/script_opt/ScriptOpt.cc | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/script_opt/ScriptOpt.cc b/src/script_opt/ScriptOpt.cc index cf688f32d4..d35413d281 100644 --- a/src/script_opt/ScriptOpt.cc +++ b/src/script_opt/ScriptOpt.cc @@ -99,16 +99,16 @@ bool should_analyze(const ScriptFuncPtr& f, const StmtPtr& body) if ( ofiles.empty() && ofuncs.empty() ) return true; - auto fn = f->Name(); + auto fun = f->Name(); for ( auto& o : ofuncs ) - if ( std::regex_match(fn, o) ) + if ( std::regex_match(fun, o) ) return true; - fn = body->GetLocationInfo()->filename; + auto fin = util::detail::normalize_path(body->GetLocationInfo()->filename); for ( auto& o : ofiles ) - if ( std::regex_match(fn, o) ) + if ( std::regex_match(fin, o) ) return true; return false; @@ -296,7 +296,14 @@ static void init_options() if ( analysis_options.only_funcs.empty() ) { - auto zo = getenv("ZEEK_ONLY"); + auto zo = getenv("ZEEK_FUNC_ONLY"); + if ( zo ) + add_func_analysis_pattern(analysis_options, zo); + } + + if ( analysis_options.only_files.empty() ) + { + auto zo = getenv("ZEEK_FILE_ONLY"); if ( zo ) add_file_analysis_pattern(analysis_options, zo); } From 3b3cea799be3374f8be43739de68dba93c45a547 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sun, 12 Dec 2021 12:36:08 -0800 Subject: [PATCH 07/11] fixes for -O gen-standalone-C++ --- src/script_opt/CPP/CPP-load.bif | 3 --- src/script_opt/CPP/Inits.cc | 23 ++++++----------------- 2 files changed, 6 insertions(+), 20 deletions(-) diff --git a/src/script_opt/CPP/CPP-load.bif b/src/script_opt/CPP/CPP-load.bif index 4e6105b0bb..87528ce658 100644 --- a/src/script_opt/CPP/CPP-load.bif +++ b/src/script_opt/CPP/CPP-load.bif @@ -39,8 +39,5 @@ function load_CPP%(h: count%): bool // compiled scripts. detail::standalone_activations.push_back(cb->second); - // Proceed with activation. - (*detail::CPP_init_hook)(); - return zeek::val_mgr->True(); %} diff --git a/src/script_opt/CPP/Inits.cc b/src/script_opt/CPP/Inits.cc index f1d7a52338..376fc93ea3 100644 --- a/src/script_opt/CPP/Inits.cc +++ b/src/script_opt/CPP/Inits.cc @@ -213,17 +213,11 @@ void CPPCompile::GenStandaloneActivation() { NL(); -#if 0 Emit("void standalone_activation__CPP()"); StartBlock(); - for ( auto& a : activations ) - Emit(a); - EndBlock(); -#endif + Emit("finish_init__CPP();"); NL(); - Emit("void standalone_init__CPP()"); - StartBlock(); // For events and hooks, we need to add each compiled body *unless* // it's already there (which could be the case if the standalone @@ -283,10 +277,14 @@ void CPPCompile::GenStandaloneActivation() GenTypeName(ft), hashes); } + EndBlock(); + NL(); + Emit("void standalone_init__CPP()"); + StartBlock(); + Emit("init__CPP();"); Emit("CPP_activation_funcs.push_back(standalone_activation__CPP);"); Emit("CPP_activation_hook = activate__CPPs;"); - EndBlock(); } @@ -300,15 +298,6 @@ void CPPCompile::GenLoad() Emit("register_scripts__CPP(%s, standalone_init__CPP);", Fmt(total_hash)); - // Spit out the placeholder script, and any associated module - // definitions. - for ( const auto& m : module_names ) - if ( m != "GLOBAL" ) - printf("module %s;\n", m.c_str()); - - if ( module_names.size() > 0 ) - printf("module GLOBAL;\n\n"); - printf("global init_CPP_%llu = load_CPP(%llu);\n", total_hash, total_hash); } From 2f7137999fe7262fa1f8489db29b2172abe6824c Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sun, 12 Dec 2021 12:36:45 -0800 Subject: [PATCH 08/11] restored support for incremental compilation of scripts to C++ --- src/script_opt/CPP/Compile.h | 4 ---- src/script_opt/CPP/DeclFunc.cc | 3 --- src/script_opt/CPP/Driver.cc | 13 +++---------- src/script_opt/CPP/Func.h | 1 + src/script_opt/CPP/InitsInfo.cc | 4 ++-- src/script_opt/CPP/RuntimeInitSupport.cc | 12 +++++++----- src/script_opt/CPP/RuntimeInitSupport.h | 5 +++-- src/script_opt/CPP/RuntimeInits.cc | 3 ++- src/script_opt/ProfileFunc.cc | 16 ++++++++++++++-- src/script_opt/ProfileFunc.h | 11 ----------- src/script_opt/ScriptOpt.cc | 21 +++++---------------- 11 files changed, 37 insertions(+), 56 deletions(-) diff --git a/src/script_opt/CPP/Compile.h b/src/script_opt/CPP/Compile.h index 6e3ccf71e8..3b91c60659 100644 --- a/src/script_opt/CPP/Compile.h +++ b/src/script_opt/CPP/Compile.h @@ -585,10 +585,6 @@ private: // function name, and maps to the C++ name. std::unordered_map compiled_simple_funcs; - // Maps those to their associated files - used to make add-C++ body - // hashes distinct. - std::unordered_map cf_locs; - // Maps function bodies to the names we use for them. std::unordered_map body_names; diff --git a/src/script_opt/CPP/DeclFunc.cc b/src/script_opt/CPP/DeclFunc.cc index cdc95b5f2f..cd884bf62c 100644 --- a/src/script_opt/CPP/DeclFunc.cc +++ b/src/script_opt/CPP/DeclFunc.cc @@ -99,9 +99,6 @@ void CPPCompile::CreateFunction(const FuncTypePtr& ft, const ProfileFunc* pf, co // for lambdas that don't take any arguments, but that // seems potentially more confusing than beneficial. compiled_funcs.emplace(fname); - - auto loc_f = script_specific_filename(body); - cf_locs[fname] = loc_f; } auto h = pf->HashVal(); diff --git a/src/script_opt/CPP/Driver.cc b/src/script_opt/CPP/Driver.cc index fc3490c5c2..bfc4ef6f8e 100644 --- a/src/script_opt/CPP/Driver.cc +++ b/src/script_opt/CPP/Driver.cc @@ -197,10 +197,10 @@ void CPPCompile::Compile(bool report_uncompilable) void CPPCompile::GenProlog() { - if ( addl_tag == 0 ) - { + if ( addl_tag <= 1 ) + // This is either a compilation via gen-C++, or + // one using add-C++ and an empty CPP-gen.cc file. Emit("#include \"zeek/script_opt/CPP/Runtime.h\"\n"); - } Emit("namespace zeek::detail { //\n"); Emit("namespace CPP_%s { // %s\n", Fmt(addl_tag), working_dir); @@ -292,13 +292,6 @@ void CPPCompile::RegisterCompiledBody(const string& f) events = string("{") + events + "}"; - if ( addl_tag > 0 ) - // Hash in the location associated with this compilation - // pass, to get a final hash that avoids conflicts with - // identical-but-in-a-different-context function bodies - // when compiling potentially conflicting additional code. - h = merge_p_hashes(h, p_hash(cf_locs[f])); - auto fi = func_index.find(f); ASSERT(fi != func_index.end()); auto type_signature = casting_index[fi->second]; diff --git a/src/script_opt/CPP/Func.h b/src/script_opt/CPP/Func.h index 0de2edfbc6..28b348d62b 100644 --- a/src/script_opt/CPP/Func.h +++ b/src/script_opt/CPP/Func.h @@ -105,6 +105,7 @@ struct CompiledScript CPPStmtPtr body; int priority; std::vector events; + void (*finish_init_func)(); }; // Maps hashes to compiled information. diff --git a/src/script_opt/CPP/InitsInfo.cc b/src/script_opt/CPP/InitsInfo.cc index 2f2ab1ed8c..3f3b4f2593 100644 --- a/src/script_opt/CPP/InitsInfo.cc +++ b/src/script_opt/CPP/InitsInfo.cc @@ -239,14 +239,14 @@ void FuncConstInfo::InitializerVals(std::vector& ivs) const { auto f = fv->AsFunc(); const auto& fn = f->Name(); + const auto& bodies = f->GetBodies(); ivs.emplace_back(Fmt(type)); ivs.emplace_back(Fmt(c->TrackString(fn))); + ivs.emplace_back(to_string(bodies.size())); if ( ! c->NotFullyCompilable(fn) ) { - const auto& bodies = f->GetBodies(); - for ( const auto& b : bodies ) { auto h = c->BodyHash(b.stmts.get()); diff --git a/src/script_opt/CPP/RuntimeInitSupport.cc b/src/script_opt/CPP/RuntimeInitSupport.cc index 6a657d0713..eea927d192 100644 --- a/src/script_opt/CPP/RuntimeInitSupport.cc +++ b/src/script_opt/CPP/RuntimeInitSupport.cc @@ -62,9 +62,10 @@ void register_type__CPP(TypePtr t, const string& name) id->MakeType(); } -void register_body__CPP(CPPStmtPtr body, int priority, p_hash_type hash, vector events) +void register_body__CPP(CPPStmtPtr body, int priority, p_hash_type hash, vector events, + void (*finish_init)()) { - compiled_scripts[hash] = {move(body), priority, move(events)}; + compiled_scripts[hash] = {move(body), priority, move(events), finish_init}; } void register_lambda__CPP(CPPStmtPtr body, p_hash_type hash, const char* name, TypePtr t, @@ -89,7 +90,7 @@ void register_lambda__CPP(CPPStmtPtr body, p_hash_type hash, const char* name, T if ( ! has_captures ) // Note, no support for lambdas that themselves refer // to events. - register_body__CPP(body, 0, hash, {}); + register_body__CPP(body, 0, hash, {}, nullptr); } void register_scripts__CPP(p_hash_type h, void (*callback)()) @@ -190,11 +191,12 @@ Func* lookup_bif__CPP(const char* bif) return b ? b->GetVal()->AsFunc() : nullptr; } -FuncValPtr lookup_func__CPP(string name, vector hashes, const TypePtr& t) +FuncValPtr lookup_func__CPP(string name, int num_bodies, vector hashes, + const TypePtr& t) { auto ft = cast_intrusive(t); - if ( hashes.empty() ) + if ( static_cast(hashes.size()) < num_bodies ) { // This happens for functions that have at least one // uncompilable body. diff --git a/src/script_opt/CPP/RuntimeInitSupport.h b/src/script_opt/CPP/RuntimeInitSupport.h index 284290c95b..6b5ea46014 100644 --- a/src/script_opt/CPP/RuntimeInitSupport.h +++ b/src/script_opt/CPP/RuntimeInitSupport.h @@ -37,7 +37,7 @@ extern void register_type__CPP(TypePtr t, const std::string& name); // relevant for the function body, which should be registered if the // function body is going to be used. extern void register_body__CPP(CPPStmtPtr body, int priority, p_hash_type hash, - std::vector events); + std::vector events, void (*finish_init)()); // Registers a lambda body as associated with the given hash. Includes // the name of the lambda (so it can be made available as a quasi-global @@ -67,7 +67,8 @@ extern Func* lookup_bif__CPP(const char* bif); // returns an associated FuncVal. It's a fatal error for the hash // not to exist, because this function should only be called by compiled // code that has ensured its existence. -extern FuncValPtr lookup_func__CPP(std::string name, std::vector h, const TypePtr& t); +extern FuncValPtr lookup_func__CPP(std::string name, int num_bodies, std::vector h, + const TypePtr& t); // Returns the record corresponding to the given name, as long as the // name is indeed a record type. Otherwise (or if the name is nil) diff --git a/src/script_opt/CPP/RuntimeInits.cc b/src/script_opt/CPP/RuntimeInits.cc index 4f52171ffe..5286540997 100644 --- a/src/script_opt/CPP/RuntimeInits.cc +++ b/src/script_opt/CPP/RuntimeInits.cc @@ -147,13 +147,14 @@ void CPP_IndexedInits::Generate(InitsManager* im, std::vector& iv auto iv_end = init_vals.end(); auto t = *(iv_it++); auto fn = im->Strings(*(iv_it++)); + auto num_bodies = *(iv_it++); std::vector hashes; while ( iv_it != iv_end ) hashes.push_back(im->Hashes(*(iv_it++))); - ivec[offset] = lookup_func__CPP(fn, hashes, im->Types(t)); + ivec[offset] = lookup_func__CPP(fn, num_bodies, hashes, im->Types(t)); } template diff --git a/src/script_opt/ProfileFunc.cc b/src/script_opt/ProfileFunc.cc index c2bf948304..b354930516 100644 --- a/src/script_opt/ProfileFunc.cc +++ b/src/script_opt/ProfileFunc.cc @@ -23,7 +23,13 @@ p_hash_type p_hash(const Obj* o) return p_hash(d.Description()); } -std::string script_specific_filename(const StmtPtr& body) +// Returns a filename associated with the given function body. Used to +// provide distinctness to identical function bodies seen in separate, +// potentially conflicting incremental compilations. An example of this +// is a function named foo() that calls bar(), for which in two different +// compilation contexts bar() has differing semantics, even though foo()'s +// (shallow) semantics are the same. +static std::string script_specific_filename(const Stmt* body) { // The specific filename is taken from the location filename, making // it absolute if necessary. @@ -52,12 +58,15 @@ std::string script_specific_filename(const StmtPtr& body) return bl_f; } -p_hash_type script_specific_hash(const StmtPtr& body, p_hash_type generic_hash) +// Returns a incremental-compilation-specific hash for the given function +// body, given its non-specific hash is "generic_hash". +static p_hash_type script_specific_hash(const Stmt* body, p_hash_type generic_hash) { auto bl_f = script_specific_filename(body); return merge_p_hashes(generic_hash, p_hash(bl_f)); } + ProfileFunc::ProfileFunc(const Func* func, const StmtPtr& body, bool _abs_rec_fields) { profiled_func = func; @@ -698,6 +707,9 @@ void ProfileFuncs::ComputeProfileHash(std::shared_ptr pf) for ( auto i : pf->AdditionalHashes() ) h = merge_p_hashes(h, i); + if ( ! pf->Stmts().empty() ) + h = script_specific_hash(pf->Stmts()[0], h); + pf->SetHashVal(h); } diff --git a/src/script_opt/ProfileFunc.h b/src/script_opt/ProfileFunc.h index 2ccb47f6d6..36de4ee5ca 100644 --- a/src/script_opt/ProfileFunc.h +++ b/src/script_opt/ProfileFunc.h @@ -74,17 +74,6 @@ inline p_hash_type merge_p_hashes(p_hash_type h1, p_hash_type h2) return h1 ^ (h2 + 0x9e3779b9 + (h1 << 6) + (h1 >> 2)); } -// Returns a filename associated with the given function body. Used to -// provide distinctness to identical function bodies seen in separate, -// potentially conflicting incremental compilations. This is only germane -// for allowing incremental compilation of subsets of the test suite, so -// if we decide to forgo that capability, we can remove this. -extern std::string script_specific_filename(const StmtPtr& body); - -// Returns a incremental-compilation-specific hash for the given function -// body, given it's non-specific hash is "generic_hash". -extern p_hash_type script_specific_hash(const StmtPtr& body, p_hash_type generic_hash); - // Class for profiling the components of a single function (or expression). class ProfileFunc : public TraversalCallback { diff --git a/src/script_opt/ScriptOpt.cc b/src/script_opt/ScriptOpt.cc index d35413d281..05ef655016 100644 --- a/src/script_opt/ScriptOpt.cc +++ b/src/script_opt/ScriptOpt.cc @@ -350,17 +350,8 @@ static void report_CPP() auto name = f.Func()->Name(); auto hash = f.Profile()->HashVal(); bool have = compiled_scripts.count(hash) > 0; - auto specific = ""; - if ( ! have ) - { - hash = script_specific_hash(f.Body(), hash); - have = compiled_scripts.count(hash) > 0; - if ( have ) - specific = " - specific"; - } - - printf("script function %s (hash %llu%s): %s\n", name, hash, specific, have ? "yes" : "no"); + printf("script function %s (hash %llu): %s\n", name, hash, have ? "yes" : "no"); if ( have ) already_reported.insert(hash); @@ -387,12 +378,6 @@ static void use_CPP() auto hash = f.Profile()->HashVal(); auto s = compiled_scripts.find(hash); - if ( s == compiled_scripts.end() ) - { // Look for script-specific body. - hash = script_specific_hash(f.Body(), hash); - s = compiled_scripts.find(hash); - } - if ( s != compiled_scripts.end() ) { auto b = s->second.body; @@ -418,6 +403,10 @@ static void use_CPP() auto h = event_registry->Register(e); h->SetUsed(); } + + auto finish = s->second.finish_init_func; + if ( finish ) + (*finish)(); } } From 86288426fb89493ed7f3860f1ba2edfdbfc3f442 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sun, 12 Dec 2021 12:41:16 -0800 Subject: [PATCH 09/11] factoring of generating C++ initializations, no semantic changes --- src/script_opt/CPP/Compile.h | 14 ++++ src/script_opt/CPP/Driver.cc | 138 +++++++++++++++++++++++------------ 2 files changed, 104 insertions(+), 48 deletions(-) diff --git a/src/script_opt/CPP/Compile.h b/src/script_opt/CPP/Compile.h index 3b91c60659..11d8808a75 100644 --- a/src/script_opt/CPP/Compile.h +++ b/src/script_opt/CPP/Compile.h @@ -301,6 +301,20 @@ private: // in support of run-time initialization of various dynamic values. void GenEpilog(); + // Generate the main method of the CPPDynStmt class, doing dynamic + // dispatch for function invocation. + void GenCPPDynStmt(); + + // Generate a function to load BiFs. + void GenLoadBiFs(); + + // Generate the main initialization function, which finalizes + // the run-time environment. + void GenFinishInit(); + + // Generate the function that registers compiled script bodies. + void GenRegisterBodies(); + // True if the given function (plus body and profile) is one // that should be compiled. If non-nil, sets reason to the // the reason why, if there's a fundamental problem. If however diff --git a/src/script_opt/CPP/Driver.cc b/src/script_opt/CPP/Driver.cc index bfc4ef6f8e..f9e97da5b4 100644 --- a/src/script_opt/CPP/Driver.cc +++ b/src/script_opt/CPP/Driver.cc @@ -306,9 +306,65 @@ void CPPCompile::GenEpilog() GenInitExpr(ii.second); NL(); - Emit("ValPtr CPPDynStmt::Exec(Frame* f, StmtFlowType& flow)"); + GenCPPDynStmt(); + + NL(); + for ( auto gi : all_global_info ) + gi->GenerateInitializers(this); + + NL(); + InitializeEnumMappings(); + + NL(); + InitializeFieldMappings(); + + NL(); + InitializeBiFs(); + + NL(); + indices_mgr.Generate(this); + + NL(); + InitializeStrings(); + + NL(); + InitializeHashes(); + + NL(); + InitializeConsts(); + + NL(); + GenLoadBiFs(); + + NL(); + GenFinishInit(); + + NL(); + GenRegisterBodies(); + + NL(); + Emit("void init__CPP()"); StartBlock(); + Emit("register_bodies__CPP();"); + EndBlock(); + + if ( standalone ) + GenStandaloneActivation(); + + GenInitHook(); + + Emit("} // %s\n\n", scope_prefix(addl_tag)); + Emit("} // zeek::detail"); + } + +void CPPCompile::GenCPPDynStmt() + { + Emit("ValPtr CPPDynStmt::Exec(Frame* f, StmtFlowType& flow)"); + + StartBlock(); + Emit("flow = FLOW_RETURN;"); + Emit("switch ( type_signature )"); StartBlock(); for ( auto i = 0U; i < func_casting_glue.size(); ++i ) @@ -343,41 +399,29 @@ void CPPCompile::GenEpilog() EndBlock(); EndBlock(); + } - NL(); +void CPPCompile::GenLoadBiFs() + { + Emit("void load_BiFs__CPP()"); + StartBlock(); + Emit("for ( auto& b : CPP__BiF_lookups__ )"); + Emit("\tb.ResolveBiF();"); + EndBlock(); + } - for ( auto gi : all_global_info ) - gi->GenerateInitializers(this); - - if ( standalone ) - GenStandaloneActivation(); - - NL(); - InitializeEnumMappings(); - - NL(); - InitializeFieldMappings(); - - NL(); - InitializeBiFs(); - - NL(); - indices_mgr.Generate(this); - - NL(); - InitializeStrings(); - - NL(); - InitializeHashes(); - - NL(); - InitializeConsts(); - - NL(); - Emit("void init__CPP()"); +void CPPCompile::GenFinishInit() + { + Emit("void finish_init__CPP()"); StartBlock(); + Emit("static bool did_init = false;"); + Emit("if ( did_init )"); + Emit("\treturn;"); + Emit("did_init = true;"); + + NL(); Emit("std::vector> InitIndices;"); Emit("generate_indices_set(CPP__Indices__init, InitIndices);"); @@ -394,13 +438,6 @@ void CPPCompile::GenEpilog() Emit("InitsManager im(CPP__ConstVals, InitConsts, InitIndices, CPP__Strings, CPP__Hashes, " "CPP__Type__, CPP__Attributes__, CPP__Attr__, CPP__CallExpr__);"); - NL(); - Emit("for ( auto& b : CPP__bodies_to_register )"); - StartBlock(); - Emit("auto f = make_intrusive(b.func_name.c_str(), b.func, b.type_signature);"); - Emit("register_body__CPP(f, b.priority, b.h, b.events);"); - EndBlock(); - NL(); int max_cohort = 0; for ( auto gi : all_global_info ) @@ -411,10 +448,6 @@ void CPPCompile::GenEpilog() if ( gi->CohortSize(c) > 0 ) Emit("%s.InitializeCohort(&im, %s);", gi->InitializersName(), Fmt(c)); - NL(); - Emit("for ( auto& b : CPP__BiF_lookups__ )"); - Emit("\tb.ResolveBiF();"); - // Populate mappings for dynamic offsets. NL(); Emit("for ( auto& em : CPP__enum_mappings__ )"); @@ -423,15 +456,24 @@ void CPPCompile::GenEpilog() Emit("for ( auto& fm : CPP__field_mappings__ )"); Emit("\tfield_mapping.push_back(fm.ComputeOffset(&im));"); - if ( standalone ) - Emit("standalone_init__CPP();"); + NL(); + Emit("load_BiFs__CPP();"); - EndBlock(true); + EndBlock(); + } - GenInitHook(); +void CPPCompile::GenRegisterBodies() + { + Emit("void register_bodies__CPP()"); + StartBlock(); - Emit("} // %s\n\n", scope_prefix(addl_tag)); - Emit("} // zeek::detail"); + Emit("for ( auto& b : CPP__bodies_to_register )"); + StartBlock(); + Emit("auto f = make_intrusive(b.func_name.c_str(), b.func, b.type_signature);"); + Emit("register_body__CPP(f, b.priority, b.h, b.events, finish_init__CPP);"); + EndBlock(); + + EndBlock(); } bool CPPCompile::IsCompilable(const FuncInfo& func, const char** reason) From 3fc58bdd9f17e8bb4765a3a1dac9727ce98dd09b Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sun, 12 Dec 2021 12:42:16 -0800 Subject: [PATCH 10/11] minor note regarding improving performance of C++-generated code --- src/script_opt/CPP/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/script_opt/CPP/README.md b/src/script_opt/CPP/README.md index 1ed3cc6c13..0187be4397 100644 --- a/src/script_opt/CPP/README.md +++ b/src/script_opt/CPP/README.md @@ -185,7 +185,7 @@ particularly difficult to fix. * A number of steps could be taken to increase the performance of the optimized code. These include: - 1. Switching the generated code to use the new ZVal-related interfaces. + 1. Switching the generated code to use the new ZVal-related interfaces, including for vector operations. 2. Directly calling BiFs rather than using the `Invoke()` method to do so. This relates to the broader question of switching BiFs to be based on a notion of "inlined C++" code in Zeek functions, rather than using the standalone `bifcl` BiF compiler. 3. Switching the Event Engine over to queuing events with `ZVal` arguments rather than `ValPtr` arguments. 4. Making the compiler aware of certain BiFs that can be directly inlined (e.g., `network_time()`), a technique employed effectively by the ZAM compiler. From a10fafe398af4cd161aabfc375202291019a9b6d Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sun, 12 Dec 2021 12:43:30 -0800 Subject: [PATCH 11/11] tweak to keep clang-format happy --- src/script_opt/ProfileFunc.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/src/script_opt/ProfileFunc.cc b/src/script_opt/ProfileFunc.cc index b354930516..b29bf068d4 100644 --- a/src/script_opt/ProfileFunc.cc +++ b/src/script_opt/ProfileFunc.cc @@ -66,7 +66,6 @@ static p_hash_type script_specific_hash(const Stmt* body, p_hash_type generic_ha return merge_p_hashes(generic_hash, p_hash(bl_f)); } - ProfileFunc::ProfileFunc(const Func* func, const StmtPtr& body, bool _abs_rec_fields) { profiled_func = func;