diff --git a/scripts/base/init-bare.zeek b/scripts/base/init-bare.zeek index e8cd896adc..5eb1d4de53 100644 --- a/scripts/base/init-bare.zeek +++ b/scripts/base/init-bare.zeek @@ -1933,6 +1933,7 @@ type gtp_delete_pdp_ctx_response_elements: record { @load base/frameworks/supervisor/api @load base/bif/supervisor.bif @load base/bif/packet_analysis.bif +@load base/bif/CPP-load.bif ## Internal function. function add_interface(iold: string, inew: string): string diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 3c101c3305..37036504c4 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -124,6 +124,9 @@ set(BIF_SRCS # it's needed before parsing the packet protocol scripts, which happen # very near to the start of parsing. packet_analysis/packet_analysis.bif + # The C++ loading BIF is treated like other top-level BIFs to give + # us flexibility regarding when it's called. + script_opt/CPP/CPP-load.bif ) foreach (bift ${BIF_SRCS}) diff --git a/src/Func.cc b/src/Func.cc index 82744623bb..6a85623ad5 100644 --- a/src/Func.cc +++ b/src/Func.cc @@ -62,6 +62,7 @@ #include "option.bif.func_h" #include "supervisor.bif.func_h" #include "packet_analysis.bif.func_h" +#include "CPP-load.bif.func_h" #include "zeek.bif.func_def" #include "stats.bif.func_def" @@ -70,6 +71,7 @@ #include "option.bif.func_def" #include "supervisor.bif.func_def" #include "packet_analysis.bif.func_def" +#include "CPP-load.bif.func_def" extern RETSIGTYPE sig_handler(int signo); diff --git a/src/Options.cc b/src/Options.cc index 9d6e195af6..6f5cfc7705 100644 --- a/src/Options.cc +++ b/src/Options.cc @@ -158,26 +158,48 @@ static void set_analysis_option(const char* opt, Options& opts) if ( util::streq(opt, "help") ) { fprintf(stderr, "--optimize options:\n"); + fprintf(stderr, " all equivalent to \"inline\" and \"activate\"\n"); + fprintf(stderr, " add-C++ generate private C++ for any missing script bodies\n"); fprintf(stderr, " dump-uds dump use-defs to stdout; implies xform\n"); fprintf(stderr, " dump-xform dump transformed scripts to stdout; implies xform\n"); + fprintf(stderr, " force-use-C++ use available C++ script bodies, warning about missing ones\n"); + fprintf(stderr, " gen-C++ generate C++ script bodies\n"); + fprintf(stderr, " gen-standalone-C++ generate \"standalone\" C++ script bodies\n"); fprintf(stderr, " help print this list\n"); fprintf(stderr, " inline inline function calls\n"); fprintf(stderr, " optimize-AST optimize the (transformed) AST; implies xform\n"); fprintf(stderr, " recursive report on recursive functions and exit\n"); + fprintf(stderr, " report-C++ report available C++ script bodies and exit\n"); + fprintf(stderr, " update-C++ generate reusable C++ for any missing script bodies\n"); + fprintf(stderr, " use-C++ use available C++ script bodies\n"); fprintf(stderr, " xform tranform scripts to \"reduced\" form\n"); exit(0); } auto& a_o = opts.analysis_options; - if ( util::streq(opt, "dump-uds") ) + if ( util::streq(opt, "add-C++") ) + a_o.add_CPP = true; + else if ( util::streq(opt, "dump-uds") ) a_o.activate = a_o.dump_uds = true; else if ( util::streq(opt, "dump-xform") ) a_o.activate = a_o.dump_xform = true; + else if ( util::streq(opt, "force-use-C++") ) + a_o.force_use_CPP = true; + else if ( util::streq(opt, "gen-C++") ) + a_o.gen_CPP = true; + else if ( util::streq(opt, "gen-standalone-C++") ) + a_o.gen_standalone_CPP = true; else if ( util::streq(opt, "inline") ) a_o.inliner = true; else if ( util::streq(opt, "recursive") ) a_o.inliner = a_o.report_recursive = true; + else if ( util::streq(opt, "report-C++") ) + a_o.report_CPP = true; + else if ( util::streq(opt, "update-C++") ) + a_o.update_CPP = true; + else if ( util::streq(opt, "use-C++") ) + a_o.use_CPP = true; else if ( util::streq(opt, "xform") ) a_o.activate = true; else if ( util::streq(opt, "optimize-AST") ) diff --git a/src/script_opt/ScriptOpt.cc b/src/script_opt/ScriptOpt.cc index 1995c0c7b8..3a8ec83df9 100644 --- a/src/script_opt/ScriptOpt.cc +++ b/src/script_opt/ScriptOpt.cc @@ -2,14 +2,18 @@ #include "zeek/Options.h" #include "zeek/Reporter.h" -#include "zeek/Desc.h" #include "zeek/module_util.h" +#include "zeek/Desc.h" +#include "zeek/EventHandler.h" +#include "zeek/EventRegistry.h" #include "zeek/script_opt/ScriptOpt.h" #include "zeek/script_opt/ProfileFunc.h" #include "zeek/script_opt/Inline.h" #include "zeek/script_opt/Reduce.h" #include "zeek/script_opt/GenRDs.h" #include "zeek/script_opt/UseDefs.h" +#include "zeek/script_opt/CPP/Compile.h" +#include "zeek/script_opt/CPP/Func.h" namespace zeek::detail { @@ -42,6 +46,10 @@ void optimize_func(ScriptFunc* f, std::shared_ptr pf, if ( analysis_options.only_func ) printf("Original: %s\n", obj_desc(body.get()).c_str()); + if ( body->Tag() == STMT_CPP ) + // We're not able to optimize this. + return; + if ( pf->NumWhenStmts() > 0 || pf->NumLambdas() > 0 ) { if ( analysis_options.only_func ) @@ -116,8 +124,10 @@ void optimize_func(ScriptFunc* f, std::shared_ptr pf, pf = std::make_shared(f, body, true); body->Traverse(pf.get()); + pf = std::make_shared(f, body, true); // Compute its reaching definitions. RD_Decorate reduced_rds(pf); + reduced_rds.TraverseFunction(f, scope, body); rc->SetDefSetsMgr(reduced_rds.GetDefSetsMgr()); @@ -189,14 +199,68 @@ static void check_env_opt(const char* opt, bool& opt_flag) void analyze_scripts() { static bool did_init = false; + static std::string hash_dir; + + bool generating_CPP = false; if ( ! did_init ) { + auto hd = getenv("ZEEK_HASH_DIR"); + if ( hd ) + hash_dir = std::string(hd) + "/"; + check_env_opt("ZEEK_DUMP_XFORM", analysis_options.dump_xform); check_env_opt("ZEEK_DUMP_UDS", analysis_options.dump_uds); check_env_opt("ZEEK_INLINE", analysis_options.inliner); check_env_opt("ZEEK_OPT", analysis_options.optimize_AST); check_env_opt("ZEEK_XFORM", analysis_options.activate); + check_env_opt("ZEEK_ADD_CPP", analysis_options.add_CPP); + check_env_opt("ZEEK_UPDATE_CPP", analysis_options.update_CPP); + check_env_opt("ZEEK_GEN_CPP", analysis_options.gen_CPP); + check_env_opt("ZEEK_GEN_STANDALONE_CPP", + analysis_options.gen_standalone_CPP); + check_env_opt("ZEEK_REPORT_CPP", analysis_options.report_CPP); + check_env_opt("ZEEK_USE_CPP", analysis_options.use_CPP); + check_env_opt("ZEEK_FORCE_USE_CPP", + analysis_options.force_use_CPP); + + if ( analysis_options.gen_standalone_CPP ) + analysis_options.gen_CPP = true; + + if ( analysis_options.force_use_CPP ) + analysis_options.use_CPP = true; + + if ( analysis_options.gen_CPP ) + { + if ( analysis_options.add_CPP ) + { + reporter->Warning("gen-C++ incompatible with add-C++"); + analysis_options.add_CPP = false; + } + + if ( analysis_options.update_CPP ) + { + reporter->Warning("gen-C++ incompatible with update-C++"); + analysis_options.update_CPP = false; + } + + generating_CPP = true; + } + + if ( analysis_options.update_CPP || analysis_options.add_CPP ) + generating_CPP = true; + + if ( analysis_options.use_CPP && generating_CPP ) + { + reporter->Error("generating C++ incompatible with using C++"); + exit(1); + } + + if ( analysis_options.use_CPP && ! CPP_init_hook ) + { + reporter->Error("no C++ functions available to use"); + exit(1); + } auto usage = getenv("ZEEK_USAGE_ISSUES"); @@ -218,18 +282,218 @@ void analyze_scripts() did_init = true; } - if ( ! analysis_options.activate && ! analysis_options.inliner ) + if ( ! analysis_options.activate && ! analysis_options.inliner && + ! generating_CPP && ! analysis_options.report_CPP && + ! analysis_options.use_CPP ) + // Avoid profiling overhead. return; + const auto hash_name = hash_dir + "CPP-hashes"; + const auto gen_name = hash_dir + "CPP-gen-addl.h"; + + // Now that everything's parsed and BiF's have been initialized, + // profile the functions. + auto pfs = std::make_unique(funcs, is_CPP_compilable, false); + + if ( CPP_init_hook ) + (*CPP_init_hook)(); + + if ( analysis_options.report_CPP ) + { + if ( ! CPP_init_hook ) + { + printf("no C++ script bodies available\n"); + exit(0); + } + + printf("C++ script bodies available that match loaded scripts:\n"); + + std::unordered_set already_reported; + + for ( auto& f : funcs ) + { + auto name = f.Func()->Name(); + auto hash = f.Profile()->HashVal(); + bool have = compiled_scripts.count(hash) > 0; + auto specific = ""; + + if ( ! have ) + { + hash = script_specific_hash(f.Body(), hash); + have = compiled_scripts.count(hash) > 0; + if ( have ) + specific = " - specific"; + } + + printf("script function %s (hash %llu%s): %s\n", + name, hash, specific, have ? "yes" : "no"); + + if ( have ) + already_reported.insert(hash); + } + + printf("\nAdditional C++ script bodies available:\n"); + int addl = 0; + for ( auto s : compiled_scripts ) + if ( already_reported.count(s.first) == 0 ) + { + printf("%s body (hash %llu)\n", + s.second.body->Name().c_str(), s.first); + ++addl; + } + + if ( addl == 0 ) + printf("(none)\n"); + + exit(0); + } + + if ( analysis_options.use_CPP ) + { + for ( auto& f : funcs ) + { + auto hash = f.Profile()->HashVal(); + auto s = compiled_scripts.find(hash); + + if ( s == compiled_scripts.end() ) + { // Look for script-specific body. + hash = script_specific_hash(f.Body(), hash); + s = compiled_scripts.find(hash); + } + + if ( s != compiled_scripts.end() ) + { + auto b = s->second.body; + b->SetHash(hash); + f.Func()->ReplaceBody(f.Body(), b); + f.SetBody(b); + + for ( auto& e : s->second.events ) + { + auto h = event_registry->Register(e); + h->SetUsed(); + } + } + + else if ( analysis_options.force_use_CPP ) + reporter->Warning("no C++ available for %s", f.Func()->Name()); + } + + // Now that we've loaded all of the compiled scripts + // relevant for the AST, activate standalone ones. + for ( auto cb : standalone_activations ) + (*cb)(); + } + + if ( generating_CPP ) + { + auto hm = std::make_unique(hash_name.c_str(), + analysis_options.add_CPP); + + if ( ! analysis_options.gen_CPP ) + { + for ( auto& func : funcs ) + { + auto hash = func.Profile()->HashVal(); + if ( compiled_scripts.count(hash) > 0 || + hm->HasHash(hash) ) + func.SetSkip(true); + } + + // Now that we've presumably marked a lot of functions + // as skippable, recompute the global profile. + pfs = std::make_unique(funcs, is_CPP_compilable, false); + } + + CPPCompile cpp(funcs, *pfs, gen_name.c_str(), *hm, + analysis_options.gen_CPP || + analysis_options.update_CPP, + analysis_options.gen_standalone_CPP); + + exit(0); + } + + if ( analysis_options.use_CPP ) + { + for ( auto& f : funcs ) + { + auto hash = f.Profile()->HashVal(); + auto s = compiled_scripts.find(hash); + + if ( s == compiled_scripts.end() ) + { // Look for script-specific body. + hash = script_specific_hash(f.Body(), hash); + s = compiled_scripts.find(hash); + } + + if ( s != compiled_scripts.end() ) + { + auto b = s->second.body; + b->SetHash(hash); + f.Func()->ReplaceBody(f.Body(), b); + f.SetBody(b); + + for ( auto& e : s->second.events ) + { + auto h = event_registry->Register(e); + h->SetUsed(); + } + } + + else if ( analysis_options.force_use_CPP ) + reporter->Warning("no C++ available for %s", f.Func()->Name()); + } + + // Now that we've loaded all of the compiled scripts + // relevant for the AST, activate standalone ones. + for ( auto cb : standalone_activations ) + (*cb)(); + } + + if ( generating_CPP ) + { + auto hm = std::make_unique(hash_name.c_str(), + analysis_options.add_CPP); + + if ( ! analysis_options.gen_CPP ) + { + for ( auto& func : funcs ) + { + auto hash = func.Profile()->HashVal(); + if ( compiled_scripts.count(hash) > 0 || + hm->HasHash(hash) ) + func.SetSkip(true); + } + + // Now that we've presumably marked a lot of functions + // as skippable, recompute the global profile. + pfs = std::make_unique(funcs, is_CPP_compilable, false); + } + + CPPCompile cpp(funcs, *pfs, gen_name.c_str(), *hm, + analysis_options.gen_CPP || + analysis_options.update_CPP, + analysis_options.gen_standalone_CPP); + + exit(0); + } + if ( analysis_options.usage_issues > 0 && analysis_options.optimize_AST ) { fprintf(stderr, "warning: \"-O optimize-AST\" option is incompatible with -u option, deactivating optimization\n"); analysis_options.optimize_AST = false; } - // Now that everything's parsed and BiF's have been initialized, - // profile the functions. - auto pfs = std::make_unique(funcs, nullptr, true); + // Re-profile the functions, this time without worrying about + // compatibility with compilation to C++. Note that the first + // profiling pass above may have marked some of the functions + // as to-skip, so first clear those markings. Once we have + // full compile-to-C++ and ZAM support for all Zeek language + // features, we can remove the re-profiling here. + for ( auto& f : funcs ) + f.SetSkip(false); + + pfs = std::make_unique(funcs, nullptr, true); // Figure out which functions either directly or indirectly // appear in "when" clauses. @@ -247,7 +511,10 @@ void analyze_scripts() when_funcs.insert(f.Func()); for ( auto& bf : f.Profile()->WhenCalls() ) + { + ASSERT(pfs->FuncProf(bf)); when_funcs_to_do.insert(bf); + } #ifdef NOT_YET if ( analysis_options.report_uncompilable ) diff --git a/src/script_opt/ScriptOpt.h b/src/script_opt/ScriptOpt.h index 0e5cfc98a8..d650d091c9 100644 --- a/src/script_opt/ScriptOpt.h +++ b/src/script_opt/ScriptOpt.h @@ -39,6 +39,34 @@ struct AnalyOpt { // If true, do global inlining. bool inliner = false; + // If true, generate C++; + bool gen_CPP = false; + + // If true, the C++ should be standalone (not require the presence + // of the corresponding script, and not activated by default). + bool gen_standalone_CPP = false; + + // If true, generate C++ for those script bodies that don't already + // have generated code, in a form that enables later compiles to + // take advantage of the newly-added elements. Only use for generating + // a zeek that will always include the associated scripts. + bool update_CPP = false; + + // If true, generate C++ for those script bodies that don't already + // have generated code. The added C++ is not made available for + // later generated code, and will work for a generated zeek that + // runs without including the associated scripts. + bool add_CPP = false; + + // If true, use C++ bodies if available. + bool use_CPP = false; + + // Same, but complain about missing bodies. + bool force_use_CPP = false; + + // If true, report on available C++ bodies. + bool report_CPP = false; + // If true, report which functions are directly and indirectly // recursive, and exit. Only germane if running the inliner. bool report_recursive = false; diff --git a/testing/btest/Baseline/coverage.bare-load-baseline/canonified_loaded_scripts.log b/testing/btest/Baseline/coverage.bare-load-baseline/canonified_loaded_scripts.log index 9e79290d34..af52d127d0 100644 --- a/testing/btest/Baseline/coverage.bare-load-baseline/canonified_loaded_scripts.log +++ b/testing/btest/Baseline/coverage.bare-load-baseline/canonified_loaded_scripts.log @@ -18,6 +18,7 @@ scripts/base/init-bare.zeek scripts/base/frameworks/supervisor/api.zeek build/scripts/base/bif/supervisor.bif.zeek build/scripts/base/bif/packet_analysis.bif.zeek + build/scripts/base/bif/CPP-load.bif.zeek build/scripts/base/bif/plugins/Zeek_SNMP.types.bif.zeek build/scripts/base/bif/plugins/Zeek_KRB.types.bif.zeek build/scripts/base/bif/event.bif.zeek diff --git a/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log b/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log index b065d74ea5..84aa4662ee 100644 --- a/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log +++ b/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log @@ -18,6 +18,7 @@ scripts/base/init-bare.zeek scripts/base/frameworks/supervisor/api.zeek build/scripts/base/bif/supervisor.bif.zeek build/scripts/base/bif/packet_analysis.bif.zeek + build/scripts/base/bif/CPP-load.bif.zeek build/scripts/base/bif/plugins/Zeek_SNMP.types.bif.zeek build/scripts/base/bif/plugins/Zeek_KRB.types.bif.zeek build/scripts/base/bif/event.bif.zeek diff --git a/testing/btest/Baseline/plugins.hooks/output b/testing/btest/Baseline/plugins.hooks/output index 777c2b2b60..35ef76d80b 100644 --- a/testing/btest/Baseline/plugins.hooks/output +++ b/testing/btest/Baseline/plugins.hooks/output @@ -648,6 +648,7 @@ 0.000000 MetaHookPost DrainEvents() -> 0.000000 MetaHookPost LoadFile(0, ../main, <...>/main.zeek) -> -1 0.000000 MetaHookPost LoadFile(0, ../plugin, <...>/plugin.zeek) -> -1 +0.000000 MetaHookPost LoadFile(0, ./CPP-load.bif.zeek, <...>/CPP-load.bif.zeek) -> -1 0.000000 MetaHookPost LoadFile(0, ./Zeek_ARP.events.bif.zeek, <...>/Zeek_ARP.events.bif.zeek) -> -1 0.000000 MetaHookPost LoadFile(0, ./Zeek_AsciiReader.ascii.bif.zeek, <...>/Zeek_AsciiReader.ascii.bif.zeek) -> -1 0.000000 MetaHookPost LoadFile(0, ./Zeek_AsciiWriter.ascii.bif.zeek, <...>/Zeek_AsciiWriter.ascii.bif.zeek) -> -1 @@ -872,6 +873,7 @@ 0.000000 MetaHookPost LoadFile(0, base/init-default, <...>/init-default.zeek) -> -1 0.000000 MetaHookPost LoadFile(0, base/init-frameworks-and-bifs.zeek, <...>/init-frameworks-and-bifs.zeek) -> -1 0.000000 MetaHookPost LoadFile(0, base/packet-protocols, <...>/packet-protocols) -> -1 +0.000000 MetaHookPost LoadFile(0, base<...>/CPP-load.bif, <...>/CPP-load.bif.zeek) -> -1 0.000000 MetaHookPost LoadFile(0, base<...>/Zeek_KRB.types.bif, <...>/Zeek_KRB.types.bif.zeek) -> -1 0.000000 MetaHookPost LoadFile(0, base<...>/Zeek_SNMP.types.bif, <...>/Zeek_SNMP.types.bif.zeek) -> -1 0.000000 MetaHookPost LoadFile(0, base<...>/active-http, <...>/active-http.zeek) -> -1 @@ -1656,6 +1658,7 @@ 0.000000 MetaHookPre DrainEvents() 0.000000 MetaHookPre LoadFile(0, ../main, <...>/main.zeek) 0.000000 MetaHookPre LoadFile(0, ../plugin, <...>/plugin.zeek) +0.000000 MetaHookPre LoadFile(0, ./CPP-load.bif.zeek, <...>/CPP-load.bif.zeek) 0.000000 MetaHookPre LoadFile(0, ./Zeek_ARP.events.bif.zeek, <...>/Zeek_ARP.events.bif.zeek) 0.000000 MetaHookPre LoadFile(0, ./Zeek_AsciiReader.ascii.bif.zeek, <...>/Zeek_AsciiReader.ascii.bif.zeek) 0.000000 MetaHookPre LoadFile(0, ./Zeek_AsciiWriter.ascii.bif.zeek, <...>/Zeek_AsciiWriter.ascii.bif.zeek) @@ -1880,6 +1883,7 @@ 0.000000 MetaHookPre LoadFile(0, base/init-default, <...>/init-default.zeek) 0.000000 MetaHookPre LoadFile(0, base/init-frameworks-and-bifs.zeek, <...>/init-frameworks-and-bifs.zeek) 0.000000 MetaHookPre LoadFile(0, base/packet-protocols, <...>/packet-protocols) +0.000000 MetaHookPre LoadFile(0, base<...>/CPP-load.bif, <...>/CPP-load.bif.zeek) 0.000000 MetaHookPre LoadFile(0, base<...>/Zeek_KRB.types.bif, <...>/Zeek_KRB.types.bif.zeek) 0.000000 MetaHookPre LoadFile(0, base<...>/Zeek_SNMP.types.bif, <...>/Zeek_SNMP.types.bif.zeek) 0.000000 MetaHookPre LoadFile(0, base<...>/active-http, <...>/active-http.zeek) @@ -2663,6 +2667,7 @@ 0.000000 | HookDrainEvents 0.000000 | HookLoadFile ../main <...>/main.zeek 0.000000 | HookLoadFile ../plugin <...>/plugin.zeek +0.000000 | HookLoadFile ./CPP-load.bif.zeek <...>/CPP-load.bif.zeek 0.000000 | HookLoadFile ./Zeek_ARP.events.bif.zeek <...>/Zeek_ARP.events.bif.zeek 0.000000 | HookLoadFile ./Zeek_AsciiReader.ascii.bif.zeek <...>/Zeek_AsciiReader.ascii.bif.zeek 0.000000 | HookLoadFile ./Zeek_AsciiWriter.ascii.bif.zeek <...>/Zeek_AsciiWriter.ascii.bif.zeek @@ -2899,6 +2904,7 @@ 0.000000 | HookLoadFile base/init-default <...>/init-default.zeek 0.000000 | HookLoadFile base/init-frameworks-and-bifs.zeek <...>/init-frameworks-and-bifs.zeek 0.000000 | HookLoadFile base/packet-protocols <...>/packet-protocols +0.000000 | HookLoadFile base<...>/CPP-load.bif <...>/CPP-load.bif.zeek 0.000000 | HookLoadFile base<...>/Zeek_KRB.types.bif <...>/Zeek_KRB.types.bif.zeek 0.000000 | HookLoadFile base<...>/Zeek_SNMP.types.bif <...>/Zeek_SNMP.types.bif.zeek 0.000000 | HookLoadFile base<...>/active-http <...>/active-http.zeek