hooks for activating the compiler & associated BiF

This commit is contained in:
Vern Paxson 2021-04-19 16:36:02 -07:00
parent 863be9436b
commit 24e92fa54a
9 changed files with 337 additions and 6 deletions

View file

@ -1933,6 +1933,7 @@ type gtp_delete_pdp_ctx_response_elements: record {
@load base/frameworks/supervisor/api
@load base/bif/supervisor.bif
@load base/bif/packet_analysis.bif
@load base/bif/CPP-load.bif
## Internal function.
function add_interface(iold: string, inew: string): string

View file

@ -124,6 +124,9 @@ set(BIF_SRCS
# it's needed before parsing the packet protocol scripts, which happen
# very near to the start of parsing.
packet_analysis/packet_analysis.bif
# The C++ loading BIF is treated like other top-level BIFs to give
# us flexibility regarding when it's called.
script_opt/CPP/CPP-load.bif
)
foreach (bift ${BIF_SRCS})

View file

@ -62,6 +62,7 @@
#include "option.bif.func_h"
#include "supervisor.bif.func_h"
#include "packet_analysis.bif.func_h"
#include "CPP-load.bif.func_h"
#include "zeek.bif.func_def"
#include "stats.bif.func_def"
@ -70,6 +71,7 @@
#include "option.bif.func_def"
#include "supervisor.bif.func_def"
#include "packet_analysis.bif.func_def"
#include "CPP-load.bif.func_def"
extern RETSIGTYPE sig_handler(int signo);

View file

@ -158,26 +158,48 @@ static void set_analysis_option(const char* opt, Options& opts)
if ( util::streq(opt, "help") )
{
fprintf(stderr, "--optimize options:\n");
fprintf(stderr, " all equivalent to \"inline\" and \"activate\"\n");
fprintf(stderr, " add-C++ generate private C++ for any missing script bodies\n");
fprintf(stderr, " dump-uds dump use-defs to stdout; implies xform\n");
fprintf(stderr, " dump-xform dump transformed scripts to stdout; implies xform\n");
fprintf(stderr, " force-use-C++ use available C++ script bodies, warning about missing ones\n");
fprintf(stderr, " gen-C++ generate C++ script bodies\n");
fprintf(stderr, " gen-standalone-C++ generate \"standalone\" C++ script bodies\n");
fprintf(stderr, " help print this list\n");
fprintf(stderr, " inline inline function calls\n");
fprintf(stderr, " optimize-AST optimize the (transformed) AST; implies xform\n");
fprintf(stderr, " recursive report on recursive functions and exit\n");
fprintf(stderr, " report-C++ report available C++ script bodies and exit\n");
fprintf(stderr, " update-C++ generate reusable C++ for any missing script bodies\n");
fprintf(stderr, " use-C++ use available C++ script bodies\n");
fprintf(stderr, " xform tranform scripts to \"reduced\" form\n");
exit(0);
}
auto& a_o = opts.analysis_options;
if ( util::streq(opt, "dump-uds") )
if ( util::streq(opt, "add-C++") )
a_o.add_CPP = true;
else if ( util::streq(opt, "dump-uds") )
a_o.activate = a_o.dump_uds = true;
else if ( util::streq(opt, "dump-xform") )
a_o.activate = a_o.dump_xform = true;
else if ( util::streq(opt, "force-use-C++") )
a_o.force_use_CPP = true;
else if ( util::streq(opt, "gen-C++") )
a_o.gen_CPP = true;
else if ( util::streq(opt, "gen-standalone-C++") )
a_o.gen_standalone_CPP = true;
else if ( util::streq(opt, "inline") )
a_o.inliner = true;
else if ( util::streq(opt, "recursive") )
a_o.inliner = a_o.report_recursive = true;
else if ( util::streq(opt, "report-C++") )
a_o.report_CPP = true;
else if ( util::streq(opt, "update-C++") )
a_o.update_CPP = true;
else if ( util::streq(opt, "use-C++") )
a_o.use_CPP = true;
else if ( util::streq(opt, "xform") )
a_o.activate = true;
else if ( util::streq(opt, "optimize-AST") )

View file

@ -2,14 +2,18 @@
#include "zeek/Options.h"
#include "zeek/Reporter.h"
#include "zeek/Desc.h"
#include "zeek/module_util.h"
#include "zeek/Desc.h"
#include "zeek/EventHandler.h"
#include "zeek/EventRegistry.h"
#include "zeek/script_opt/ScriptOpt.h"
#include "zeek/script_opt/ProfileFunc.h"
#include "zeek/script_opt/Inline.h"
#include "zeek/script_opt/Reduce.h"
#include "zeek/script_opt/GenRDs.h"
#include "zeek/script_opt/UseDefs.h"
#include "zeek/script_opt/CPP/Compile.h"
#include "zeek/script_opt/CPP/Func.h"
namespace zeek::detail {
@ -42,6 +46,10 @@ void optimize_func(ScriptFunc* f, std::shared_ptr<ProfileFunc> pf,
if ( analysis_options.only_func )
printf("Original: %s\n", obj_desc(body.get()).c_str());
if ( body->Tag() == STMT_CPP )
// We're not able to optimize this.
return;
if ( pf->NumWhenStmts() > 0 || pf->NumLambdas() > 0 )
{
if ( analysis_options.only_func )
@ -116,8 +124,10 @@ void optimize_func(ScriptFunc* f, std::shared_ptr<ProfileFunc> pf,
pf = std::make_shared<ProfileFunc>(f, body, true);
body->Traverse(pf.get());
pf = std::make_shared<ProfileFunc>(f, body, true);
// Compute its reaching definitions.
RD_Decorate reduced_rds(pf);
reduced_rds.TraverseFunction(f, scope, body);
rc->SetDefSetsMgr(reduced_rds.GetDefSetsMgr());
@ -189,14 +199,68 @@ static void check_env_opt(const char* opt, bool& opt_flag)
void analyze_scripts()
{
static bool did_init = false;
static std::string hash_dir;
bool generating_CPP = false;
if ( ! did_init )
{
auto hd = getenv("ZEEK_HASH_DIR");
if ( hd )
hash_dir = std::string(hd) + "/";
check_env_opt("ZEEK_DUMP_XFORM", analysis_options.dump_xform);
check_env_opt("ZEEK_DUMP_UDS", analysis_options.dump_uds);
check_env_opt("ZEEK_INLINE", analysis_options.inliner);
check_env_opt("ZEEK_OPT", analysis_options.optimize_AST);
check_env_opt("ZEEK_XFORM", analysis_options.activate);
check_env_opt("ZEEK_ADD_CPP", analysis_options.add_CPP);
check_env_opt("ZEEK_UPDATE_CPP", analysis_options.update_CPP);
check_env_opt("ZEEK_GEN_CPP", analysis_options.gen_CPP);
check_env_opt("ZEEK_GEN_STANDALONE_CPP",
analysis_options.gen_standalone_CPP);
check_env_opt("ZEEK_REPORT_CPP", analysis_options.report_CPP);
check_env_opt("ZEEK_USE_CPP", analysis_options.use_CPP);
check_env_opt("ZEEK_FORCE_USE_CPP",
analysis_options.force_use_CPP);
if ( analysis_options.gen_standalone_CPP )
analysis_options.gen_CPP = true;
if ( analysis_options.force_use_CPP )
analysis_options.use_CPP = true;
if ( analysis_options.gen_CPP )
{
if ( analysis_options.add_CPP )
{
reporter->Warning("gen-C++ incompatible with add-C++");
analysis_options.add_CPP = false;
}
if ( analysis_options.update_CPP )
{
reporter->Warning("gen-C++ incompatible with update-C++");
analysis_options.update_CPP = false;
}
generating_CPP = true;
}
if ( analysis_options.update_CPP || analysis_options.add_CPP )
generating_CPP = true;
if ( analysis_options.use_CPP && generating_CPP )
{
reporter->Error("generating C++ incompatible with using C++");
exit(1);
}
if ( analysis_options.use_CPP && ! CPP_init_hook )
{
reporter->Error("no C++ functions available to use");
exit(1);
}
auto usage = getenv("ZEEK_USAGE_ISSUES");
@ -218,18 +282,218 @@ void analyze_scripts()
did_init = true;
}
if ( ! analysis_options.activate && ! analysis_options.inliner )
if ( ! analysis_options.activate && ! analysis_options.inliner &&
! generating_CPP && ! analysis_options.report_CPP &&
! analysis_options.use_CPP )
// Avoid profiling overhead.
return;
const auto hash_name = hash_dir + "CPP-hashes";
const auto gen_name = hash_dir + "CPP-gen-addl.h";
// Now that everything's parsed and BiF's have been initialized,
// profile the functions.
auto pfs = std::make_unique<ProfileFuncs>(funcs, is_CPP_compilable, false);
if ( CPP_init_hook )
(*CPP_init_hook)();
if ( analysis_options.report_CPP )
{
if ( ! CPP_init_hook )
{
printf("no C++ script bodies available\n");
exit(0);
}
printf("C++ script bodies available that match loaded scripts:\n");
std::unordered_set<unsigned long long> already_reported;
for ( auto& f : funcs )
{
auto name = f.Func()->Name();
auto hash = f.Profile()->HashVal();
bool have = compiled_scripts.count(hash) > 0;
auto specific = "";
if ( ! have )
{
hash = script_specific_hash(f.Body(), hash);
have = compiled_scripts.count(hash) > 0;
if ( have )
specific = " - specific";
}
printf("script function %s (hash %llu%s): %s\n",
name, hash, specific, have ? "yes" : "no");
if ( have )
already_reported.insert(hash);
}
printf("\nAdditional C++ script bodies available:\n");
int addl = 0;
for ( auto s : compiled_scripts )
if ( already_reported.count(s.first) == 0 )
{
printf("%s body (hash %llu)\n",
s.second.body->Name().c_str(), s.first);
++addl;
}
if ( addl == 0 )
printf("(none)\n");
exit(0);
}
if ( analysis_options.use_CPP )
{
for ( auto& f : funcs )
{
auto hash = f.Profile()->HashVal();
auto s = compiled_scripts.find(hash);
if ( s == compiled_scripts.end() )
{ // Look for script-specific body.
hash = script_specific_hash(f.Body(), hash);
s = compiled_scripts.find(hash);
}
if ( s != compiled_scripts.end() )
{
auto b = s->second.body;
b->SetHash(hash);
f.Func()->ReplaceBody(f.Body(), b);
f.SetBody(b);
for ( auto& e : s->second.events )
{
auto h = event_registry->Register(e);
h->SetUsed();
}
}
else if ( analysis_options.force_use_CPP )
reporter->Warning("no C++ available for %s", f.Func()->Name());
}
// Now that we've loaded all of the compiled scripts
// relevant for the AST, activate standalone ones.
for ( auto cb : standalone_activations )
(*cb)();
}
if ( generating_CPP )
{
auto hm = std::make_unique<CPPHashManager>(hash_name.c_str(),
analysis_options.add_CPP);
if ( ! analysis_options.gen_CPP )
{
for ( auto& func : funcs )
{
auto hash = func.Profile()->HashVal();
if ( compiled_scripts.count(hash) > 0 ||
hm->HasHash(hash) )
func.SetSkip(true);
}
// Now that we've presumably marked a lot of functions
// as skippable, recompute the global profile.
pfs = std::make_unique<ProfileFuncs>(funcs, is_CPP_compilable, false);
}
CPPCompile cpp(funcs, *pfs, gen_name.c_str(), *hm,
analysis_options.gen_CPP ||
analysis_options.update_CPP,
analysis_options.gen_standalone_CPP);
exit(0);
}
if ( analysis_options.use_CPP )
{
for ( auto& f : funcs )
{
auto hash = f.Profile()->HashVal();
auto s = compiled_scripts.find(hash);
if ( s == compiled_scripts.end() )
{ // Look for script-specific body.
hash = script_specific_hash(f.Body(), hash);
s = compiled_scripts.find(hash);
}
if ( s != compiled_scripts.end() )
{
auto b = s->second.body;
b->SetHash(hash);
f.Func()->ReplaceBody(f.Body(), b);
f.SetBody(b);
for ( auto& e : s->second.events )
{
auto h = event_registry->Register(e);
h->SetUsed();
}
}
else if ( analysis_options.force_use_CPP )
reporter->Warning("no C++ available for %s", f.Func()->Name());
}
// Now that we've loaded all of the compiled scripts
// relevant for the AST, activate standalone ones.
for ( auto cb : standalone_activations )
(*cb)();
}
if ( generating_CPP )
{
auto hm = std::make_unique<CPPHashManager>(hash_name.c_str(),
analysis_options.add_CPP);
if ( ! analysis_options.gen_CPP )
{
for ( auto& func : funcs )
{
auto hash = func.Profile()->HashVal();
if ( compiled_scripts.count(hash) > 0 ||
hm->HasHash(hash) )
func.SetSkip(true);
}
// Now that we've presumably marked a lot of functions
// as skippable, recompute the global profile.
pfs = std::make_unique<ProfileFuncs>(funcs, is_CPP_compilable, false);
}
CPPCompile cpp(funcs, *pfs, gen_name.c_str(), *hm,
analysis_options.gen_CPP ||
analysis_options.update_CPP,
analysis_options.gen_standalone_CPP);
exit(0);
}
if ( analysis_options.usage_issues > 0 && analysis_options.optimize_AST )
{
fprintf(stderr, "warning: \"-O optimize-AST\" option is incompatible with -u option, deactivating optimization\n");
analysis_options.optimize_AST = false;
}
// Now that everything's parsed and BiF's have been initialized,
// profile the functions.
auto pfs = std::make_unique<ProfileFuncs>(funcs, nullptr, true);
// Re-profile the functions, this time without worrying about
// compatibility with compilation to C++. Note that the first
// profiling pass above may have marked some of the functions
// as to-skip, so first clear those markings. Once we have
// full compile-to-C++ and ZAM support for all Zeek language
// features, we can remove the re-profiling here.
for ( auto& f : funcs )
f.SetSkip(false);
pfs = std::make_unique<ProfileFuncs>(funcs, nullptr, true);
// Figure out which functions either directly or indirectly
// appear in "when" clauses.
@ -247,7 +511,10 @@ void analyze_scripts()
when_funcs.insert(f.Func());
for ( auto& bf : f.Profile()->WhenCalls() )
{
ASSERT(pfs->FuncProf(bf));
when_funcs_to_do.insert(bf);
}
#ifdef NOT_YET
if ( analysis_options.report_uncompilable )

View file

@ -39,6 +39,34 @@ struct AnalyOpt {
// If true, do global inlining.
bool inliner = false;
// If true, generate C++;
bool gen_CPP = false;
// If true, the C++ should be standalone (not require the presence
// of the corresponding script, and not activated by default).
bool gen_standalone_CPP = false;
// If true, generate C++ for those script bodies that don't already
// have generated code, in a form that enables later compiles to
// take advantage of the newly-added elements. Only use for generating
// a zeek that will always include the associated scripts.
bool update_CPP = false;
// If true, generate C++ for those script bodies that don't already
// have generated code. The added C++ is not made available for
// later generated code, and will work for a generated zeek that
// runs without including the associated scripts.
bool add_CPP = false;
// If true, use C++ bodies if available.
bool use_CPP = false;
// Same, but complain about missing bodies.
bool force_use_CPP = false;
// If true, report on available C++ bodies.
bool report_CPP = false;
// If true, report which functions are directly and indirectly
// recursive, and exit. Only germane if running the inliner.
bool report_recursive = false;

View file

@ -18,6 +18,7 @@ scripts/base/init-bare.zeek
scripts/base/frameworks/supervisor/api.zeek
build/scripts/base/bif/supervisor.bif.zeek
build/scripts/base/bif/packet_analysis.bif.zeek
build/scripts/base/bif/CPP-load.bif.zeek
build/scripts/base/bif/plugins/Zeek_SNMP.types.bif.zeek
build/scripts/base/bif/plugins/Zeek_KRB.types.bif.zeek
build/scripts/base/bif/event.bif.zeek

View file

@ -18,6 +18,7 @@ scripts/base/init-bare.zeek
scripts/base/frameworks/supervisor/api.zeek
build/scripts/base/bif/supervisor.bif.zeek
build/scripts/base/bif/packet_analysis.bif.zeek
build/scripts/base/bif/CPP-load.bif.zeek
build/scripts/base/bif/plugins/Zeek_SNMP.types.bif.zeek
build/scripts/base/bif/plugins/Zeek_KRB.types.bif.zeek
build/scripts/base/bif/event.bif.zeek

View file

@ -648,6 +648,7 @@
0.000000 MetaHookPost DrainEvents() -> <void>
0.000000 MetaHookPost LoadFile(0, ../main, <...>/main.zeek) -> -1
0.000000 MetaHookPost LoadFile(0, ../plugin, <...>/plugin.zeek) -> -1
0.000000 MetaHookPost LoadFile(0, ./CPP-load.bif.zeek, <...>/CPP-load.bif.zeek) -> -1
0.000000 MetaHookPost LoadFile(0, ./Zeek_ARP.events.bif.zeek, <...>/Zeek_ARP.events.bif.zeek) -> -1
0.000000 MetaHookPost LoadFile(0, ./Zeek_AsciiReader.ascii.bif.zeek, <...>/Zeek_AsciiReader.ascii.bif.zeek) -> -1
0.000000 MetaHookPost LoadFile(0, ./Zeek_AsciiWriter.ascii.bif.zeek, <...>/Zeek_AsciiWriter.ascii.bif.zeek) -> -1
@ -872,6 +873,7 @@
0.000000 MetaHookPost LoadFile(0, base/init-default, <...>/init-default.zeek) -> -1
0.000000 MetaHookPost LoadFile(0, base/init-frameworks-and-bifs.zeek, <...>/init-frameworks-and-bifs.zeek) -> -1
0.000000 MetaHookPost LoadFile(0, base/packet-protocols, <...>/packet-protocols) -> -1
0.000000 MetaHookPost LoadFile(0, base<...>/CPP-load.bif, <...>/CPP-load.bif.zeek) -> -1
0.000000 MetaHookPost LoadFile(0, base<...>/Zeek_KRB.types.bif, <...>/Zeek_KRB.types.bif.zeek) -> -1
0.000000 MetaHookPost LoadFile(0, base<...>/Zeek_SNMP.types.bif, <...>/Zeek_SNMP.types.bif.zeek) -> -1
0.000000 MetaHookPost LoadFile(0, base<...>/active-http, <...>/active-http.zeek) -> -1
@ -1656,6 +1658,7 @@
0.000000 MetaHookPre DrainEvents()
0.000000 MetaHookPre LoadFile(0, ../main, <...>/main.zeek)
0.000000 MetaHookPre LoadFile(0, ../plugin, <...>/plugin.zeek)
0.000000 MetaHookPre LoadFile(0, ./CPP-load.bif.zeek, <...>/CPP-load.bif.zeek)
0.000000 MetaHookPre LoadFile(0, ./Zeek_ARP.events.bif.zeek, <...>/Zeek_ARP.events.bif.zeek)
0.000000 MetaHookPre LoadFile(0, ./Zeek_AsciiReader.ascii.bif.zeek, <...>/Zeek_AsciiReader.ascii.bif.zeek)
0.000000 MetaHookPre LoadFile(0, ./Zeek_AsciiWriter.ascii.bif.zeek, <...>/Zeek_AsciiWriter.ascii.bif.zeek)
@ -1880,6 +1883,7 @@
0.000000 MetaHookPre LoadFile(0, base/init-default, <...>/init-default.zeek)
0.000000 MetaHookPre LoadFile(0, base/init-frameworks-and-bifs.zeek, <...>/init-frameworks-and-bifs.zeek)
0.000000 MetaHookPre LoadFile(0, base/packet-protocols, <...>/packet-protocols)
0.000000 MetaHookPre LoadFile(0, base<...>/CPP-load.bif, <...>/CPP-load.bif.zeek)
0.000000 MetaHookPre LoadFile(0, base<...>/Zeek_KRB.types.bif, <...>/Zeek_KRB.types.bif.zeek)
0.000000 MetaHookPre LoadFile(0, base<...>/Zeek_SNMP.types.bif, <...>/Zeek_SNMP.types.bif.zeek)
0.000000 MetaHookPre LoadFile(0, base<...>/active-http, <...>/active-http.zeek)
@ -2663,6 +2667,7 @@
0.000000 | HookDrainEvents
0.000000 | HookLoadFile ../main <...>/main.zeek
0.000000 | HookLoadFile ../plugin <...>/plugin.zeek
0.000000 | HookLoadFile ./CPP-load.bif.zeek <...>/CPP-load.bif.zeek
0.000000 | HookLoadFile ./Zeek_ARP.events.bif.zeek <...>/Zeek_ARP.events.bif.zeek
0.000000 | HookLoadFile ./Zeek_AsciiReader.ascii.bif.zeek <...>/Zeek_AsciiReader.ascii.bif.zeek
0.000000 | HookLoadFile ./Zeek_AsciiWriter.ascii.bif.zeek <...>/Zeek_AsciiWriter.ascii.bif.zeek
@ -2899,6 +2904,7 @@
0.000000 | HookLoadFile base/init-default <...>/init-default.zeek
0.000000 | HookLoadFile base/init-frameworks-and-bifs.zeek <...>/init-frameworks-and-bifs.zeek
0.000000 | HookLoadFile base/packet-protocols <...>/packet-protocols
0.000000 | HookLoadFile base<...>/CPP-load.bif <...>/CPP-load.bif.zeek
0.000000 | HookLoadFile base<...>/Zeek_KRB.types.bif <...>/Zeek_KRB.types.bif.zeek
0.000000 | HookLoadFile base<...>/Zeek_SNMP.types.bif <...>/Zeek_SNMP.types.bif.zeek
0.000000 | HookLoadFile base<...>/active-http <...>/active-http.zeek