tracking of event groups for compilation to standalone-C++

This commit is contained in:
Vern Paxson 2025-09-15 14:07:29 -07:00
parent b25a844210
commit ca49e69aac
17 changed files with 153 additions and 67 deletions

View file

@ -216,6 +216,16 @@ public:
*/
void AddFunc(detail::ScriptFuncPtr f);
/**
* @return The name associated with the group.
*/
const auto& GetName() const { return name; }
/**
* @return The type of group.
*/
const auto& GetEventGroupKind() const { return kind; }
private:
void UpdateFuncBodies();

View file

@ -295,10 +295,8 @@ ScriptFunc::ScriptFunc(std::string _name, FuncTypePtr ft, std::vector<StmtPtr> b
std::ranges::stable_sort(bodies, std::ranges::greater(), &Body::priority);
if ( ! bodies.empty() ) {
current_body = bodies[0].stmts;
current_priority = bodies[0].priority;
}
if ( ! bodies.empty() )
current_body = bodies[0];
}
ScriptFunc::~ScriptFunc() {
@ -562,13 +560,9 @@ void ScriptFunc::AddBody(StmtPtr new_body, const std::vector<IDPtr>& new_inits,
bodies.clear();
}
Body b;
b.stmts = new_body;
b.groups = {groups.begin(), groups.end()};
current_body = new_body;
current_priority = b.priority = priority;
current_body = Body{.stmts = new_body, .groups = {groups.begin(), groups.end()}, .priority = priority};
bodies.push_back(std::move(b));
bodies.push_back(current_body);
std::ranges::stable_sort(bodies, std::ranges::greater(), &Body::priority);
}
@ -577,18 +571,15 @@ void ScriptFunc::ReplaceBody(const StmtPtr& old_body, StmtPtr new_body) {
for ( auto body = bodies.begin(); body != bodies.end(); ++body )
if ( body->stmts.get() == old_body.get() ) {
if ( new_body ) {
if ( new_body )
body->stmts = new_body;
current_priority = body->priority;
}
else
bodies.erase(body);
found_it = true;
current_body = *body;
break;
}
current_body = new_body;
}
bool ScriptFunc::DeserializeCaptures(BrokerListView data) {

View file

@ -265,8 +265,7 @@ public:
*/
void ReplaceBody(const detail::StmtPtr& old_body, detail::StmtPtr new_body);
StmtPtr CurrentBody() const { return current_body; }
int CurrentPriority() const { return current_priority; }
auto CurrentBody() const { return current_body; }
/**
* Returns the function's frame size.
@ -322,11 +321,8 @@ private:
OffsetMap* captures_offset_mapping = nullptr;
// The most recently added/updated body ...
StmtPtr current_body;
// ... and its priority.
int current_priority = 0;
// A copy of the most recently added/updated Body.
Body current_body;
};
using built_in_func = ValPtr (*)(Frame* frame, const Args* args);

View file

@ -1,5 +1,6 @@
// See the file "COPYING" in the main distribution directory for copyright.
#include "zeek/EventRegistry.h"
#include "zeek/script_opt/CPP/Compile.h"
namespace zeek::detail {
@ -15,8 +16,9 @@ void CPPCompile::DeclareFunc(const FuncInfo& func) {
auto f = func.Func();
const auto& body = func.Body();
auto priority = func.Priority();
const auto& e_g = func.EventGroups();
CreateFunction(f->GetType(), pf, fname, body, priority, nullptr, f->Flavor());
CreateFunction(f->GetType(), pf, fname, body, priority, nullptr, f->Flavor(), &e_g);
if ( f->GetBodies().size() == 1 )
compiled_simple_funcs[f->GetName()] = fname;
@ -42,7 +44,8 @@ void CPPCompile::DeclareLambda(const LambdaExpr* l, const ProfileFunc* pf) {
}
void CPPCompile::CreateFunction(const FuncTypePtr& ft, const ProfileFunc* pf, const string& fname, const StmtPtr& body,
int priority, const LambdaExpr* l, FunctionFlavor flavor) {
int priority, const LambdaExpr* l, FunctionFlavor flavor,
const std::forward_list<EventGroupPtr>* e_g) {
const auto& yt = ft->Yield();
in_hook = flavor == FUNC_FLAVOR_HOOK;
@ -103,9 +106,29 @@ void CPPCompile::CreateFunction(const FuncTypePtr& ft, const ProfileFunc* pf, co
compiled_funcs.emplace(fname);
}
body_hashes[fname] = pf->HashVal();
body_priorities[fname] = priority;
body_locs[fname] = body->GetLocationInfo();
string module_group;
vector<string> attr_groups;
if ( e_g )
for ( auto g : *e_g ) {
const auto& name = g->GetName();
if ( g->GetEventGroupKind() == EventGroupKind::Module ) {
if ( module_group.empty() )
module_group = g->GetName();
else {
ASSERT(module_group == name);
}
}
else
attr_groups.push_back(name);
}
body_info[fname] = {.hash = pf->HashVal(),
.priority = priority,
.loc = body->GetLocationInfo(),
.module = module_group,
.groups = std::move(attr_groups)};
body_names.emplace(body.get(), fname);
}

View file

@ -32,7 +32,8 @@ void DeclareLambda(const LambdaExpr* l, const ProfileFunc* pf);
// dynamic casting approach, which only requires one additional class.
void CreateFunction(const FuncTypePtr& ft, const ProfileFunc* pf, const std::string& fname, const StmtPtr& body,
int priority, const LambdaExpr* l, FunctionFlavor flavor);
int priority, const LambdaExpr* l, FunctionFlavor flavor,
const std::forward_list<EventGroupPtr>* e_g = nullptr);
// Used for the case of creating a custom subclass of CPPStmt.
void DeclareSubclass(const FuncTypePtr& ft, const ProfileFunc* pf, const std::string& fname, const std::string& args,

View file

@ -365,13 +365,23 @@ void CPPCompile::RegisterCompiledBody(const string& f) {
ASSERT(fi != func_index.end());
auto type_signature = casting_index[fi->second];
auto h = body_hashes[f];
auto p = body_priorities[f];
auto loc = body_locs[f];
const auto& bi = body_info[f];
auto h = bi.hash;
auto p = bi.priority;
auto loc = bi.loc;
auto body_info = Fmt(p) + ", " + Fmt(h) + ", \"" + loc->FileName() + " (C++)\", " + Fmt(loc->FirstLine());
Emit("\tCPP_RegisterBody(\"%s\", (void*) %s, %s, %s, std::vector<std::string>(%s)),", f, f, Fmt(type_signature),
body_info, events);
string module_group = "\"" + bi.module + "\"";
string attr_groups = "{";
for ( const auto& g : bi.groups )
attr_groups += " \"" + g + "\",";
attr_groups += " }";
Emit("\tCPP_RegisterBody(\"%s\", (void*) %s, %s, %s, std::vector<std::string>(%s), %s, %s),", f, f,
Fmt(type_signature), body_info, events, module_group, attr_groups);
}
void CPPCompile::GenEpilog() {
@ -555,8 +565,12 @@ void CPPCompile::GenRegisterBodies() {
"auto f = make_intrusive<CPPDynStmt>(b.func_name.c_str(), b.func, b.type_signature, "
"b.filename, b.line_num);");
auto reg = standalone ? "register_standalone_body" : "register_body";
Emit("%s__CPP(f, b.priority, b.h, b.events, finish_init__CPP);", reg);
if ( standalone )
Emit(
"register_standalone_body__CPP(f, b.priority, b.h, b.events, b.module_group, b.attr_groups, "
"finish_init__CPP);");
else
Emit("register_body__CPP(f, b.priority, b.h, b.events, finish_init__CPP);");
EndBlock();
EndBlock();

View file

@ -68,6 +68,14 @@ void Emit(const std::string& fmt, const std::string& arg1, const std::string& ar
NL();
}
void Emit(const std::string& fmt, const std::string& arg1, const std::string& arg2, const std::string& arg3,
const std::string& arg4, const std::string& arg5, const std::string& arg6, const std::string& arg7) const {
Indent();
fprintf(write_file, fmt.c_str(), arg1.c_str(), arg2.c_str(), arg3.c_str(), arg4.c_str(), arg5.c_str(), arg6.c_str(),
arg7.c_str());
NL();
}
void NL() const { fputc('\n', write_file); }
// Indents to the current indentation level.

View file

@ -90,6 +90,8 @@ struct CompiledScript {
CPPStmtPtr body;
int priority;
std::vector<std::string> events;
std::string module_group;
std::vector<std::string> attr_groups;
void (*finish_init_func)();
};

View file

@ -224,10 +224,10 @@ p_hash_type CPPCompile::BodyHash(const Stmt* body) {
ASSERT(bn != body_names.end());
auto& body_name = bn->second;
auto bh = body_hashes.find(body_name);
ASSERT(bh != body_hashes.end());
auto bi = body_info.find(body_name);
ASSERT(bi != body_info.end());
return bh->second;
return bi->second.hash;
}
string CPPCompile::GenArgs(const RecordTypePtr& params, const Expr* e) {

View file

@ -51,15 +51,16 @@ std::unordered_map<std::string, std::string> compiled_simple_funcs;
// Maps function bodies to the names we use for them.
std::unordered_map<const Stmt*, std::string> body_names;
// Maps function names to hashes of bodies.
std::unordered_map<std::string, p_hash_type> body_hashes;
struct BodyInfo {
p_hash_type hash;
int priority;
const Location* loc; // for better-than-nothing error reporting
std::string module; // if non-nil, used for "module" event groups
std::vector<std::string> groups; // attribute-based event groups
};
// Maps function names to priorities, for hooks & event handlers.
std::unordered_map<std::string, int> body_priorities;
// Maps function names to script locations, for better-than-nothing error
// reporting.
std::unordered_map<std::string, const Location*> body_locs;
// Maps function names to their body info.
std::unordered_map<std::string, BodyInfo> body_info;
// Maps function names to events relevant to them.
std::unordered_map<std::string, std::vector<std::string>> body_events;

View file

@ -285,9 +285,9 @@ void CPPCompile::GenStandaloneActivation() {
// We didn't wind up compiling it.
continue;
auto bh = body_hashes.find(bname);
ASSERT(bh != body_hashes.end());
func_bodies[f].push_back(bh->second);
auto bi = body_info.find(bname);
ASSERT(bi != body_info.end());
func_bodies[f].push_back(bi->second.hash);
}
for ( auto& fb : func_bodies ) {

View file

@ -46,16 +46,18 @@ void register_type__CPP(TypePtr t, const string& name) {
}
void register_body__CPP(CPPStmtPtr body, int priority, p_hash_type hash, vector<string> events, void (*finish_init)()) {
compiled_scripts[hash] = {std::move(body), priority, std::move(events), finish_init};
compiled_scripts[hash] = {std::move(body), priority, std::move(events), {}, {}, finish_init};
}
static unordered_map<p_hash_type, CompiledScript> compiled_standalone_scripts;
void register_standalone_body__CPP(CPPStmtPtr body, int priority, p_hash_type hash, vector<string> events,
std::string module_group, std::vector<std::string> attr_groups,
void (*finish_init)()) {
// For standalone scripts we don't actually need finish_init, but
// we keep it for symmetry with compiled_scripts.
compiled_standalone_scripts[hash] = {std::move(body), priority, std::move(events), finish_init};
compiled_standalone_scripts[hash] = {std::move(body), priority, std::move(events), std::move(module_group),
std::move(attr_groups), finish_init};
}
void register_lambda__CPP(CPPStmtPtr body, p_hash_type hash, const char* name, TypePtr t, bool has_captures) {
@ -86,6 +88,19 @@ void register_scripts__CPP(p_hash_type h, void (*callback)()) {
standalone_callbacks[h] = callback;
}
// Updates "groups" with the event groups present in "cs".
static void update_event_groups(const CompiledScript& cs, unordered_set<EventGroupPtr> groups) {
if ( ! cs.module_group.empty() ) {
auto er = event_registry->RegisterGroup(EventGroupKind::Module, cs.module_group);
groups.insert(std::move(er));
}
for ( const auto& g : cs.attr_groups ) {
auto er = event_registry->RegisterGroup(EventGroupKind::Attribute, g);
groups.insert(std::move(er));
}
}
void activate_bodies__CPP(const char* fn, const char* module, bool exported, TypePtr t, vector<p_hash_type> hashes) {
auto ft = cast_intrusive<FuncType>(t);
auto fg = lookup_ID(fn, module, false, false, false);
@ -108,7 +123,7 @@ void activate_bodies__CPP(const char* fn, const char* module, bool exported, Typ
fg->SetVal(v);
}
auto f = v->AsFunc();
auto f = cast_intrusive<ScriptFunc>(v->AsFuncVal()->AsFuncPtr());
// Events we need to register.
unordered_set<string> events;
@ -116,6 +131,9 @@ void activate_bodies__CPP(const char* fn, const char* module, bool exported, Typ
if ( ft->Flavor() == FUNC_FLAVOR_EVENT )
events.insert(fn);
// Groups we need to add f to.
unordered_set<EventGroupPtr> groups;
vector<detail::IDPtr> no_inits; // empty initialization vector
int num_params = ft->Params()->NumFields();
@ -129,10 +147,15 @@ void activate_bodies__CPP(const char* fn, const char* module, bool exported, Typ
added_bodies[fn].insert(h);
events.insert(cs.events.begin(), cs.events.end());
update_event_groups(cs, groups);
}
for ( const auto& e : events )
event_registry->Register(e);
for ( auto& g : groups )
g->AddFunc(f);
}
IDPtr lookup_global__CPP(const char* g, const TypePtr& t, bool exported) {
@ -174,6 +197,7 @@ FuncValPtr lookup_func__CPP(string name, int num_bodies, vector<p_hash_type> has
vector<StmtPtr> bodies;
vector<int> priorities;
unordered_set<EventGroupPtr> groups;
for ( auto h : hashes ) {
auto cs = compiled_scripts.find(h);
@ -192,10 +216,15 @@ FuncValPtr lookup_func__CPP(string name, int num_bodies, vector<p_hash_type> has
// the semantics for Register explicitly allow it.
for ( auto& e : f.events )
event_registry->Register(e);
update_event_groups(f, groups);
}
auto sf = make_intrusive<ScriptFunc>(std::move(name), std::move(ft), std::move(bodies), std::move(priorities));
for ( auto& g : groups )
g->AddFunc(sf);
return make_intrusive<FuncVal>(std::move(sf));
}

View file

@ -47,7 +47,8 @@ extern void register_body__CPP(CPPStmtPtr body, int priority, p_hash_type hash,
// Same but for standalone function bodies.
extern void register_standalone_body__CPP(CPPStmtPtr body, int priority, p_hash_type hash,
std::vector<std::string> events, void (*finish_init)());
std::vector<std::string> events, std::string module_group,
std::vector<std::string> attr_groups, void (*finish_init)());
// Registers a lambda body as associated with the given hash. Includes
// the name of the lambda (so it can be made available as a quasi-global

View file

@ -544,7 +544,8 @@ protected:
// code that loops over a vector of these to perform the registrations.
struct CPP_RegisterBody {
CPP_RegisterBody(std::string _func_name, void* _func, int _type_signature, int _priority, p_hash_type _h,
const char* _filename, int _line_num, std::vector<std::string> _events)
const char* _filename, int _line_num, std::vector<std::string> _events, std::string _module_group,
std::vector<std::string> _attr_groups)
: func_name(std::move(_func_name)),
func(_func),
type_signature(_type_signature),
@ -552,7 +553,9 @@ struct CPP_RegisterBody {
h(_h),
filename(_filename),
line_num(_line_num),
events(std::move(_events)) {}
events(std::move(_events)),
module_group(std::move(_module_group)),
attr_groups(std::move(_attr_groups)) {}
std::string func_name; // name of the function
void* func; // pointer to C++
@ -562,6 +565,8 @@ struct CPP_RegisterBody {
const char* filename;
int line_num;
std::vector<std::string> events;
std::string module_group;
std::vector<std::string> attr_groups;
};
} // namespace zeek::detail

View file

@ -299,7 +299,11 @@ void Inliner::CoalesceEventHandlers(ScriptFuncPtr func, const std::vector<Func::
PostInline(oi, inlined_func);
funcs.emplace_back(inlined_func, new_scope, merged_body, 0);
// We don't need to worry about event groups because the CoalescedScriptFunc
// wrapper checks at run-time for whether any handlers have been disabled,
// and if so skips coalesced execution.
Func::Body body{.stmts = merged_body, .priority = 0};
funcs.emplace_back(inlined_func, new_scope, std::move(body));
auto pf = std::make_shared<ProfileFunc>(inlined_func.get(), merged_body, true);
funcs.back().SetProfile(std::move(pf));

View file

@ -41,7 +41,7 @@ void analyze_func(ScriptFuncPtr f) {
// Even if we're analyzing only a subset of the scripts, we still
// track all functions here because the inliner will need the full list.
ASSERT(f->GetScope());
funcs.emplace_back(f, f->GetScope(), f->CurrentBody(), f->CurrentPriority());
funcs.emplace_back(f, f->GetScope(), f->CurrentBody());
}
void analyze_lambda(LambdaExpr* l) {
@ -78,7 +78,8 @@ void analyze_global_stmts(Stmt* stmts) {
global_stmts->SetScope(sc);
global_stmts_ind = funcs.size();
funcs.emplace_back(global_stmts, sc, stmts->ThisPtr(), 0);
Func::Body body{.stmts = stmts->ThisPtr(), .priority = 0};
funcs.emplace_back(global_stmts, sc, std::move(body));
}
std::pair<StmtPtr, ScopePtr> get_global_stmts() {
@ -90,7 +91,7 @@ std::pair<StmtPtr, ScopePtr> get_global_stmts() {
void add_func_analysis_pattern(AnalyOpt& opts, const char* pat) {
try {
std::string full_pat = std::string("^(") + pat + ")$";
opts.only_funcs.emplace_back(full_pat);
opts.only_funcs.emplace_back(std::move(full_pat));
} catch ( const std::regex_error& e ) {
reporter->FatalError("bad file analysis pattern: %s", pat);
}
@ -99,7 +100,7 @@ void add_func_analysis_pattern(AnalyOpt& opts, const char* pat) {
void add_file_analysis_pattern(AnalyOpt& opts, const char* pat) {
try {
std::string full_pat = std::string("^.*(") + pat + ").*$";
opts.only_files.emplace_back(full_pat);
opts.only_files.emplace_back(std::move(full_pat));
} catch ( const std::regex_error& e ) {
reporter->FatalError("bad file analysis pattern: %s", pat);
}

View file

@ -149,19 +149,20 @@ using ScriptFuncPtr = IntrusivePtr<ScriptFunc>;
// Info we need for tracking an instance of a function.
class FuncInfo {
public:
FuncInfo(ScriptFuncPtr _func, ScopePtr _scope, StmtPtr _body, int _priority)
: func(std::move(_func)), scope(std::move(_scope)), body(std::move(_body)), priority(_priority) {}
FuncInfo(ScriptFuncPtr _func, ScopePtr _scope, Func::Body _body)
: func(std::move(_func)), scope(std::move(_scope)), body(std::move(_body)) {}
ScriptFunc* Func() const { return func.get(); }
const ScriptFuncPtr& FuncPtr() const { return func; }
const ScopePtr& Scope() const { return scope; }
const StmtPtr& Body() const { return body; }
int Priority() const { return priority; }
const StmtPtr& Body() const { return body.stmts; }
int Priority() const { return body.priority; }
auto EventGroups() const { return body.groups; }
const ProfileFunc* Profile() const { return pf.get(); }
std::shared_ptr<ProfileFunc> ProfilePtr() const { return pf; }
void SetScope(ScopePtr new_scope) { scope = std::move(new_scope); }
void SetBody(StmtPtr new_body) { body = std::move(new_body); }
void SetBody(StmtPtr new_body) { body.stmts = std::move(new_body); }
void SetProfile(std::shared_ptr<ProfileFunc> _pf) { pf = std::move(_pf); }
bool ShouldAnalyze() const { return should_analyze; }
@ -178,9 +179,8 @@ public:
protected:
ScriptFuncPtr func;
ScopePtr scope;
StmtPtr body;
Func::Body body;
std::shared_ptr<ProfileFunc> pf;
int priority;
// Whether to analyze this function at all, per optimization selection
// via --optimize-file/--optimize-func. If those flags aren't used,