From 64de2dbf311f998b5eb823a420ad338ef31b5cfe Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 5 Oct 2024 16:50:26 -0700 Subject: [PATCH 1/3] factored ZAM source's main header into collection of per-source-file headers --- src/script_opt/ZAM/AM-Opt.h | 101 ++++++++ src/script_opt/ZAM/Branches.h | 56 +++++ src/script_opt/ZAM/Compile.h | 421 ++------------------------------- src/script_opt/ZAM/Driver.h | 31 +++ src/script_opt/ZAM/Expr.h | 79 +++++++ src/script_opt/ZAM/Inst-Gen.h | 3 +- src/script_opt/ZAM/Low-Level.h | 42 ++++ src/script_opt/ZAM/Stmt.h | 48 ++++ src/script_opt/ZAM/Vars.h | 44 ++++ 9 files changed, 428 insertions(+), 397 deletions(-) create mode 100644 src/script_opt/ZAM/AM-Opt.h create mode 100644 src/script_opt/ZAM/Branches.h create mode 100644 src/script_opt/ZAM/Driver.h create mode 100644 src/script_opt/ZAM/Expr.h create mode 100644 src/script_opt/ZAM/Low-Level.h create mode 100644 src/script_opt/ZAM/Stmt.h create mode 100644 src/script_opt/ZAM/Vars.h diff --git a/src/script_opt/ZAM/AM-Opt.h b/src/script_opt/ZAM/AM-Opt.h new file mode 100644 index 0000000000..60e325d7c8 --- /dev/null +++ b/src/script_opt/ZAM/AM-Opt.h @@ -0,0 +1,101 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +// Methods for low-level optimization of the ZAM abstract machine. +// +// This file is included by Compile.h to insert into the ZAMCompiler class. + +// Optimizing the low-level compiled instructions. +void OptimizeInsts(); + +// Tracks which instructions can be branched to via the given +// set of switches. +template +void TallySwitchTargets(const CaseMapsI& switches); + +// Remove code that can't be reached. True if some removal happened. +bool RemoveDeadCode(); + +// Collapse chains of gotos. True if some something changed. +bool CollapseGoTos(); + +// Prune statements that are unnecessary. True if something got +// pruned. +bool PruneUnused(); + +// For the current state of insts1, compute lifetimes of frame +// denizens (variable(s) using a given frame slot) in terms of +// first-instruction-to-last-instruction during which they're +// relevant, including consideration for loops. +void ComputeFrameLifetimes(); + +// Given final frame lifetime information, remaps frame members +// with non-overlapping lifetimes to share slots. +void ReMapFrame(); + +// Given final frame lifetime information, remaps slots in the +// interpreter frame. (No longer strictly necessary.) +void ReMapInterpreterFrame(); + +// Computes the remapping for a variable currently in the given slot, +// whose scope begins at the given instruction. +void ReMapVar(const ID* id, int slot, zeek_uint_t inst); + +// Look to initialize the beginning of local lifetime based on slot +// assignment at instruction inst. +void CheckSlotAssignment(int slot, const ZInstI* inst); + +// Track that a local's lifetime begins at the given statement. +void SetLifetimeStart(int slot, const ZInstI* inst); + +// Look for extension of local lifetime based on slot usage +// at instruction inst. +void CheckSlotUse(int slot, const ZInstI* inst); + +// Extend (or create) the end of a local's lifetime. +void ExtendLifetime(int slot, const ZInstI* inst); + +// Returns the (live) instruction at the beginning/end of the loop(s) +// within which the given instruction lies; or that instruction +// itself if it's not inside a loop. The second argument specifies +// the loop depth. For example, a value of '2' means "extend to +// the beginning/end of any loop(s) of depth >= 2". +const ZInstI* BeginningOfLoop(const ZInstI* inst, int depth) const; +const ZInstI* EndOfLoop(const ZInstI* inst, int depth) const; + +// True if any statement other than a frame sync uses the given slot. +bool VarIsUsed(int slot) const; + +// Find the first non-dead instruction after i (inclusive). +// If follow_gotos is true, then if that instruction is +// an unconditional branch, continues the process until +// a different instruction is found (and report if there +// are infinite loops). +// +// First form returns nil if there's nothing live after i. +// Second form returns insts1.size() in that case. +ZInstI* FirstLiveInst(ZInstI* i, bool follow_gotos = false); +zeek_uint_t FirstLiveInst(zeek_uint_t i, bool follow_gotos = false); + +// Same, but not including i. +ZInstI* NextLiveInst(ZInstI* i, bool follow_gotos = false) { + if ( i->inst_num == static_cast(insts1.size()) - 1 ) + return nullptr; + return FirstLiveInst(insts1[i->inst_num + 1], follow_gotos); +} +int NextLiveInst(int i, bool follow_gotos = false) { return FirstLiveInst(i + 1, follow_gotos); } + +// Mark an instruction as unnecessary and remove its influence on +// other statements. The instruction is indicated as an offset +// into insts1; any labels associated with it are transferred +// to its next live successor, if any. +void KillInst(ZInstI* i) { KillInst(i->inst_num); } +void KillInst(zeek_uint_t i); + +// Helper function for propagating control flow (of a given type) +// backwards, when the instruction at the given offset has been killed. +void BackPropagateCFT(int inst_num, ControlFlowType cf_type); + +// The same, but kills any successor instructions until finding +// one that's labeled. +void KillInsts(ZInstI* i) { KillInsts(i->inst_num); } +void KillInsts(zeek_uint_t i); diff --git a/src/script_opt/ZAM/Branches.h b/src/script_opt/ZAM/Branches.h new file mode 100644 index 0000000000..e6461751d3 --- /dev/null +++ b/src/script_opt/ZAM/Branches.h @@ -0,0 +1,56 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +// Methods for managing low-level ZAM control flow, which is implemented +// using go-to branches. +// +// This file is included by Compile.h to insert into the ZAMCompiler class. + +void PushNexts() { PushGoTos(nexts); } +void PushBreaks() { PushGoTos(breaks); } +void PushFallThroughs() { PushGoTos(fallthroughs); } +void PushCatchReturns() { PushGoTos(catches); } + +void ResolveNexts(const InstLabel l) { ResolveGoTos(nexts, l, CFT_NEXT); } +void ResolveBreaks(const InstLabel l) { ResolveGoTos(breaks, l, CFT_BREAK); } +void ResolveFallThroughs(const InstLabel l) { ResolveGoTos(fallthroughs, l); } +void ResolveCatchReturns(const InstLabel l) { ResolveGoTos(catches, l, CFT_INLINED_RETURN); } + +using GoToSet = std::vector; +using GoToSets = std::vector; + +void PushGoTos(GoToSets& gotos); +void ResolveGoTos(GoToSets& gotos, const InstLabel l, ControlFlowType cft = CFT_NONE); + +ZAMStmt GenGoTo(GoToSet& v); +ZAMStmt GoToStub(); +ZAMStmt GoTo(const InstLabel l); +InstLabel GoToTarget(const ZAMStmt s); +InstLabel GoToTargetBeyond(const ZAMStmt s); + +void SetTarget(ZInstI* inst, const InstLabel l, int slot); + +// Given a GoTo target, find its live equivalent (first instruction +// at that location or beyond that's live). +ZInstI* FindLiveTarget(ZInstI* goto_target); + +// Given an instruction that has a slot associated with the +// given target, updates the slot to correspond with the current +// instruction number of the target. +void ConcretizeBranch(ZInstI* inst, ZInstI* target, int target_slot); + +void SetV(ZAMStmt s, const InstLabel l, int v) { + if ( v == 1 ) + SetV1(s, l); + else if ( v == 2 ) + SetV2(s, l); + else if ( v == 3 ) + SetV3(s, l); + else + SetV4(s, l); +} + +void SetV1(ZAMStmt s, const InstLabel l); +void SetV2(ZAMStmt s, const InstLabel l); +void SetV3(ZAMStmt s, const InstLabel l); +void SetV4(ZAMStmt s, const InstLabel l); +void SetGoTo(ZAMStmt s, const InstLabel targ) { SetV1(s, targ); } diff --git a/src/script_opt/ZAM/Compile.h b/src/script_opt/ZAM/Compile.h index b4d56cab75..f802de717b 100644 --- a/src/script_opt/ZAM/Compile.h +++ b/src/script_opt/ZAM/Compile.h @@ -51,14 +51,22 @@ public: ZInstAux* aux; }; +// Most of the methods for the compiler are either in separate header source +// files, or in headers generated by auxil/gen-zam. We include these within +// the private part of the compiler class definitions, so a few methods that +// need to be public are specified here directly, rather than via such +// headers. +// +// We declare member variables here, rather than in included headers, since +// many of them are used across different source files, and don't necessarily +// have a natural "home". + class ZAMCompiler { public: ZAMCompiler(ScriptFuncPtr f, std::shared_ptr pfs, std::shared_ptr pf, ScopePtr scope, StmtPtr body, std::shared_ptr ud, std::shared_ptr rd); ~ZAMCompiler(); - StmtPtr CompileBody(); - const FrameReMap& FrameDenizens() const { return shared_frame_denizens_final; } const std::vector& ManagedSlots() const { return managed_slotsI; } @@ -82,6 +90,8 @@ public: return str_cases; } + StmtPtr CompileBody(); + void Dump(); private: @@ -92,406 +102,27 @@ private: friend class CatZBI; friend class MultiZBI; - void Init(); - void InitGlobals(); - void InitArgs(); - void InitCaptures(); - void InitLocals(); - void TrackMemoryManagement(); - - void ResolveHookBreaks(); - void ComputeLoopLevels(); - void AdjustBranches(); - void RetargetBranches(); - void RemapFrameDenizens(const std::vector& inst1_to_inst2); - void CreateSharedFrameDenizens(); - void ConcretizeSwitches(); - - // The following are used for switch statements, mapping the - // switch value (which can be any atomic type) to a branch target. - // We have vectors of them because functions can contain multiple - // switches. - // See ZBody.h for their concrete counterparts, which we've - // already #include'd. + // The following are used for switch statements, mapping the switch value + // (which can be any atomic type) to a branch target. We have vectors of + // them because functions can contain multiple switches. + // + // See ZBody.h for their concrete counterparts, which we've already #include'd. template using CaseMapI = std::map; template using CaseMapsI = std::vector>; - template - void AdjustSwitchTables(CaseMapsI& abstract_cases); - - template - void ConcretizeSwitchTables(const CaseMapsI& abstract_cases, CaseMaps& concrete_cases); - - template - void DumpCases(const CaseMaps& cases, const char* type_name) const; - void DumpInsts1(const FrameReMap* remappings); - -#include "zeek/ZAM-MethodDecls.h" - - const ZAMStmt CompileStmt(const StmtPtr& body) { return CompileStmt(body.get()); } - const ZAMStmt CompileStmt(const Stmt* body); - - const ZAMStmt CompilePrint(const PrintStmt* ps); - const ZAMStmt CompileExpr(const ExprStmt* es); - const ZAMStmt CompileIf(const IfStmt* is); - const ZAMStmt CompileSwitch(const SwitchStmt* sw); - const ZAMStmt CompileWhile(const WhileStmt* ws); - const ZAMStmt CompileFor(const ForStmt* f); - const ZAMStmt CompileReturn(const ReturnStmt* r); - const ZAMStmt CompileCatchReturn(const CatchReturnStmt* cr); - const ZAMStmt CompileStmts(const StmtList* sl); - const ZAMStmt CompileInit(const InitStmt* is); - const ZAMStmt CompileWhen(const WhenStmt* ws); - - const ZAMStmt CompileNext() { return GenGoTo(nexts.back()); } - const ZAMStmt CompileBreak() { return GenGoTo(breaks.back()); } - const ZAMStmt CompileFallThrough() { return GenGoTo(fallthroughs.back()); } - const ZAMStmt CompileCatchReturn() { return GenGoTo(catches.back()); } - - const ZAMStmt IfElse(const Expr* e, const Stmt* s1, const Stmt* s2); - const ZAMStmt While(const Stmt* cond_stmt, const Expr* cond, const Stmt* body); - - const ZAMStmt InitRecord(IDPtr id, RecordType* rt); - const ZAMStmt InitVector(IDPtr id, VectorType* vt); - const ZAMStmt InitTable(IDPtr id, TableType* tt, Attributes* attrs); - - const ZAMStmt ValueSwitch(const SwitchStmt* sw, const NameExpr* v, const ConstExpr* c); - const ZAMStmt TypeSwitch(const SwitchStmt* sw, const NameExpr* v, const ConstExpr* c); - const ZAMStmt GenSwitch(const SwitchStmt* sw, int slot, InternalTypeTag it); - - void PushNexts() { PushGoTos(nexts); } - void PushBreaks() { PushGoTos(breaks); } - void PushFallThroughs() { PushGoTos(fallthroughs); } - void PushCatchReturns() { PushGoTos(catches); } - - void ResolveNexts(const InstLabel l) { ResolveGoTos(nexts, l, CFT_NEXT); } - void ResolveBreaks(const InstLabel l) { ResolveGoTos(breaks, l, CFT_BREAK); } - void ResolveFallThroughs(const InstLabel l) { ResolveGoTos(fallthroughs, l); } - void ResolveCatchReturns(const InstLabel l) { ResolveGoTos(catches, l, CFT_INLINED_RETURN); } - - const ZAMStmt LoopOverTable(const ForStmt* f, const NameExpr* val); - const ZAMStmt LoopOverVector(const ForStmt* f, const NameExpr* val); - const ZAMStmt LoopOverString(const ForStmt* f, const Expr* e); - - const ZAMStmt FinishLoop(const ZAMStmt iter_head, ZInstI& iter_stmt, const Stmt* body, int iter_slot, - bool is_table); - - const ZAMStmt Loop(const Stmt* body); - - const ZAMStmt CompileExpr(const ExprPtr& e) { return CompileExpr(e.get()); } - const ZAMStmt CompileExpr(const Expr* body); - - const ZAMStmt CompileIncrExpr(const IncrExpr* e); - const ZAMStmt CompileAppendToExpr(const AppendToExpr* e); - const ZAMStmt CompileAdd(const AggrAddExpr* e); - const ZAMStmt CompileDel(const AggrDelExpr* e); - const ZAMStmt CompileAddToExpr(const AddToExpr* e); - const ZAMStmt CompileRemoveFromExpr(const RemoveFromExpr* e); - const ZAMStmt CompileAssignExpr(const AssignExpr* e); - const ZAMStmt CompileRecFieldUpdates(const RecordFieldUpdatesExpr* e); - const ZAMStmt CompileZAMBuiltin(const NameExpr* lhs, const ScriptOptBuiltinExpr* zbi); - const ZAMStmt CompileAssignToIndex(const NameExpr* lhs, const IndexExpr* rhs); - const ZAMStmt CompileFieldLHSAssignExpr(const FieldLHSAssignExpr* e); - const ZAMStmt CompileScheduleExpr(const ScheduleExpr* e); - const ZAMStmt CompileSchedule(const NameExpr* n, const ConstExpr* c, int is_interval, EventHandler* h, - const ListExpr* l); - const ZAMStmt CompileEvent(EventHandler* h, const ListExpr* l); - - const ZAMStmt CompileInExpr(const NameExpr* n1, const NameExpr* n2, const NameExpr* n3) { - return CompileInExpr(n1, n2, nullptr, n3, nullptr); - } - - const ZAMStmt CompileInExpr(const NameExpr* n1, const NameExpr* n2, const ConstExpr* c) { - return CompileInExpr(n1, n2, nullptr, nullptr, c); - } - - const ZAMStmt CompileInExpr(const NameExpr* n1, const ConstExpr* c, const NameExpr* n3) { - return CompileInExpr(n1, nullptr, c, n3, nullptr); - } - - // In the following, one of n2 or c2 (likewise, n3/c3) will be nil. - const ZAMStmt CompileInExpr(const NameExpr* n1, const NameExpr* n2, const ConstExpr* c2, const NameExpr* n3, - const ConstExpr* c3); - - const ZAMStmt CompileInExpr(const NameExpr* n1, const ListExpr* l, const NameExpr* n2) { - return CompileInExpr(n1, l, n2, nullptr); - } - - const ZAMStmt CompileInExpr(const NameExpr* n, const ListExpr* l, const ConstExpr* c) { - return CompileInExpr(n, l, nullptr, c); - } - - const ZAMStmt CompileInExpr(const NameExpr* n1, const ListExpr* l, const NameExpr* n2, const ConstExpr* c); - - const ZAMStmt CompileIndex(const NameExpr* n1, const NameExpr* n2, const ListExpr* l, bool in_when); - const ZAMStmt CompileIndex(const NameExpr* n1, const ConstExpr* c, const ListExpr* l, bool in_when); - const ZAMStmt CompileIndex(const NameExpr* n1, int n2_slot, const TypePtr& n2_type, const ListExpr* l, - bool in_when); - - const ZAMStmt BuildLambda(const NameExpr* n, ExprPtr le); - const ZAMStmt BuildLambda(int n_slot, ExprPtr le); - - // Second argument is which instruction slot holds the branch target. - const ZAMStmt GenCond(const Expr* e, int& branch_v); - - const ZAMStmt Call(const ExprStmt* e); - const ZAMStmt AssignToCall(const ExprStmt* e); - const ZAMStmt DoCall(const CallExpr* c, const NameExpr* n); - bool CheckForBuiltIn(const ExprPtr& e, CallExprPtr c); - - const ZAMStmt AssignVecElems(const Expr* e); - const ZAMStmt AssignTableElem(const Expr* e); - - const ZAMStmt ConstructTable(const NameExpr* n, const Expr* e); - const ZAMStmt ConstructSet(const NameExpr* n, const Expr* e); - const ZAMStmt ConstructRecord(const NameExpr* n, const Expr* e) { return ConstructRecord(n, e, false); } - const ZAMStmt ConstructRecordFromRecord(const NameExpr* n, const Expr* e) { return ConstructRecord(n, e, true); } - const ZAMStmt ConstructRecord(const NameExpr* n, const Expr* e, bool is_from_rec); - const ZAMStmt ConstructVector(const NameExpr* n, const Expr* e); - - const ZAMStmt ArithCoerce(const NameExpr* n, const Expr* e); - const ZAMStmt RecordCoerce(const NameExpr* n, const Expr* e); - const ZAMStmt TableCoerce(const NameExpr* n, const Expr* e); - const ZAMStmt VectorCoerce(const NameExpr* n, const Expr* e); - - const ZAMStmt Is(const NameExpr* n, const Expr* e); - +#include "zeek/script_opt/ZAM/AM-Opt.h" +#include "zeek/script_opt/ZAM/Branches.h" +#include "zeek/script_opt/ZAM/Driver.h" +#include "zeek/script_opt/ZAM/Expr.h" #include "zeek/script_opt/ZAM/Inst-Gen.h" +#include "zeek/script_opt/ZAM/Low-Level.h" +#include "zeek/script_opt/ZAM/Stmt.h" +#include "zeek/script_opt/ZAM/Vars.h" - int ConvertToInt(const Expr* e) { - if ( e->Tag() == EXPR_NAME ) - return FrameSlot(e->AsNameExpr()->Id()); - else - return e->AsConstExpr()->Value()->AsInt(); - } - - int ConvertToCount(const Expr* e) { - if ( e->Tag() == EXPR_NAME ) - return FrameSlot(e->AsNameExpr()->Id()); - else - return e->AsConstExpr()->Value()->AsCount(); - } - - using GoToSet = std::vector; - using GoToSets = std::vector; - - void PushGoTos(GoToSets& gotos); - void ResolveGoTos(GoToSets& gotos, const InstLabel l, ControlFlowType cft = CFT_NONE); - - ZAMStmt GenGoTo(GoToSet& v); - ZAMStmt GoToStub(); - ZAMStmt GoTo(const InstLabel l); - InstLabel GoToTarget(const ZAMStmt s); - InstLabel GoToTargetBeyond(const ZAMStmt s); - - void SetTarget(ZInstI* inst, const InstLabel l, int slot); - - // Given a GoTo target, find its live equivalent (first instruction - // at that location or beyond that's live). - ZInstI* FindLiveTarget(ZInstI* goto_target); - - // Given an instruction that has a slot associated with the - // given target, updates the slot to correspond with the current - // instruction number of the target. - void ConcretizeBranch(ZInstI* inst, ZInstI* target, int target_slot); - - void SetV(ZAMStmt s, const InstLabel l, int v) { - if ( v == 1 ) - SetV1(s, l); - else if ( v == 2 ) - SetV2(s, l); - else if ( v == 3 ) - SetV3(s, l); - else - SetV4(s, l); - } - - void SetV1(ZAMStmt s, const InstLabel l); - void SetV2(ZAMStmt s, const InstLabel l); - void SetV3(ZAMStmt s, const InstLabel l); - void SetV4(ZAMStmt s, const InstLabel l); - void SetGoTo(ZAMStmt s, const InstLabel targ) { SetV1(s, targ); } - - const ZAMStmt StartingBlock(); - const ZAMStmt FinishBlock(const ZAMStmt start); - - bool NullStmtOK() const; - - const ZAMStmt EmptyStmt(); - const ZAMStmt ErrorStmt(); - const ZAMStmt LastInst(); - - // Adds control flow information to an instruction. - void AddCFT(ZInstI* inst, ControlFlowType cft); - - // Returns a handle to state associated with building - // up a list of values. - std::unique_ptr BuildVals(const ListExprPtr&); - - // "stride" is how many slots each element of l will consume. - ZInstAux* InternalBuildVals(const ListExpr* l, int stride = 1); - - // Returns how many values were added. - int InternalAddVal(ZInstAux* zi, int i, Expr* e); - - // Adds the given instruction to the ZAM program. The second - // argument, if true, suppresses generation of any pending - // global/capture store for this instruction. - const ZAMStmt AddInst(const ZInstI& inst, bool suppress_non_local = false); - - // Returns the statement just before the given one. - ZAMStmt PrevStmt(const ZAMStmt s); - - // Returns the last (interpreter) statement in the body. - const Stmt* LastStmt(const Stmt* s) const; - - // Returns the most recent added instruction *other* than those - // added for bookkeeping. - ZInstI* TopMainInst() { return insts1[top_main_inst]; } - - bool IsUnused(const IDPtr& id, const Stmt* where) const; - - bool IsCapture(const IDPtr& id) const { return IsCapture(id.get()); } - bool IsCapture(const ID* id) const; - - int CaptureOffset(const IDPtr& id) const { return IsCapture(id.get()); } - int CaptureOffset(const ID* id) const; - - void LoadParam(const ID* id); - const ZAMStmt LoadGlobal(const ID* id); - const ZAMStmt LoadCapture(const ID* id); - - int AddToFrame(const ID*); - - int FrameSlot(const IDPtr& id) { return FrameSlot(id.get()); } - int FrameSlot(const ID* id); - int FrameSlotIfName(const Expr* e) { - auto n = e->Tag() == EXPR_NAME ? e->AsNameExpr() : nullptr; - return n ? FrameSlot(n->Id()) : -1; - } - - int FrameSlot(const NameExpr* id) { return FrameSlot(id->AsNameExpr()->Id()); } - int Frame1Slot(const NameExpr* id, ZOp op) { return Frame1Slot(id->AsNameExpr()->Id(), op); } - - int Frame1Slot(const ID* id, ZOp op) { return Frame1Slot(id, op1_flavor[op]); } - int Frame1Slot(const NameExpr* n, ZAMOp1Flavor fl) { return Frame1Slot(n->Id(), fl); } - int Frame1Slot(const ID* id, ZAMOp1Flavor fl); - - // The slot without doing any global-related checking. - int RawSlot(const NameExpr* n) { return RawSlot(n->Id()); } - int RawSlot(const ID* id); - - bool HasFrameSlot(const ID* id) const; - - int NewSlot(const TypePtr& t) { return NewSlot(ZVal::IsManagedType(t)); } - int NewSlot(bool is_managed); - - int TempForConst(const ConstExpr* c); - - //////////////////////////////////////////////////////////// - // The following methods relate to optimizing the low-level - // ZAM function body after it is initially generated. They're - // factored out into ZOpt.cc since they're structurally quite - // different from the methods above that relate to the initial - // compilation. - - // Optimizing the low-level compiled instructions. - void OptimizeInsts(); - - // Tracks which instructions can be branched to via the given - // set of switches. - template - void TallySwitchTargets(const CaseMapsI& switches); - - // Remove code that can't be reached. True if some removal happened. - bool RemoveDeadCode(); - - // Collapse chains of gotos. True if some something changed. - bool CollapseGoTos(); - - // Prune statements that are unnecessary. True if something got - // pruned. - bool PruneUnused(); - - // For the current state of insts1, compute lifetimes of frame - // denizens (variable(s) using a given frame slot) in terms of - // first-instruction-to-last-instruction during which they're - // relevant, including consideration for loops. - void ComputeFrameLifetimes(); - - // Given final frame lifetime information, remaps frame members - // with non-overlapping lifetimes to share slots. - void ReMapFrame(); - - // Given final frame lifetime information, remaps slots in the - // interpreter frame. (No longer strictly necessary.) - void ReMapInterpreterFrame(); - - // Computes the remapping for a variable currently in the given slot, - // whose scope begins at the given instruction. - void ReMapVar(const ID* id, int slot, zeek_uint_t inst); - - // Look to initialize the beginning of local lifetime based on slot - // assignment at instruction inst. - void CheckSlotAssignment(int slot, const ZInstI* inst); - - // Track that a local's lifetime begins at the given statement. - void SetLifetimeStart(int slot, const ZInstI* inst); - - // Look for extension of local lifetime based on slot usage - // at instruction inst. - void CheckSlotUse(int slot, const ZInstI* inst); - - // Extend (or create) the end of a local's lifetime. - void ExtendLifetime(int slot, const ZInstI* inst); - - // Returns the (live) instruction at the beginning/end of the loop(s) - // within which the given instruction lies; or that instruction - // itself if it's not inside a loop. The second argument specifies - // the loop depth. For example, a value of '2' means "extend to - // the beginning/end of any loop(s) of depth >= 2". - const ZInstI* BeginningOfLoop(const ZInstI* inst, int depth) const; - const ZInstI* EndOfLoop(const ZInstI* inst, int depth) const; - - // True if any statement other than a frame sync uses the given slot. - bool VarIsUsed(int slot) const; - - // Find the first non-dead instruction after i (inclusive). - // If follow_gotos is true, then if that instruction is - // an unconditional branch, continues the process until - // a different instruction is found (and report if there - // are infinite loops). - // - // First form returns nil if there's nothing live after i. - // Second form returns insts1.size() in that case. - ZInstI* FirstLiveInst(ZInstI* i, bool follow_gotos = false); - zeek_uint_t FirstLiveInst(zeek_uint_t i, bool follow_gotos = false); - - // Same, but not including i. - ZInstI* NextLiveInst(ZInstI* i, bool follow_gotos = false) { - if ( i->inst_num == static_cast(insts1.size()) - 1 ) - return nullptr; - return FirstLiveInst(insts1[i->inst_num + 1], follow_gotos); - } - int NextLiveInst(int i, bool follow_gotos = false) { return FirstLiveInst(i + 1, follow_gotos); } - - // Mark an instruction as unnecessary and remove its influence on - // other statements. The instruction is indicated as an offset - // into insts1; any labels associated with it are transferred - // to its next live successor, if any. - void KillInst(ZInstI* i) { KillInst(i->inst_num); } - void KillInst(zeek_uint_t i); - - // Helper function for propagating control flow (of a given type) - // backwards, when the instruction at the given offset has been killed. - void BackPropagateCFT(int inst_num, ControlFlowType cf_type); - - // The same, but kills any successor instructions until finding - // one that's labeled. - void KillInsts(ZInstI* i) { KillInsts(i->inst_num); } - void KillInsts(zeek_uint_t i); +// Headers auto-generated by gen-zam. +#include "zeek/ZAM-MethodDecls.h" // The first of these is used as we compile down to ZInstI's. // The second is the final intermediary code. They're separate diff --git a/src/script_opt/ZAM/Driver.h b/src/script_opt/ZAM/Driver.h new file mode 100644 index 0000000000..af0ebb06a5 --- /dev/null +++ b/src/script_opt/ZAM/Driver.h @@ -0,0 +1,31 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +// Methods for driving the overall ZAM compilation process. +// +// This file is included by Compile.h to insert into the ZAMCompiler class. + +void Init(); +void InitGlobals(); +void InitArgs(); +void InitCaptures(); +void InitLocals(); +void TrackMemoryManagement(); + +template +void AdjustSwitchTables(CaseMapsI& abstract_cases); + +template +void ConcretizeSwitchTables(const CaseMapsI& abstract_cases, CaseMaps& concrete_cases); +void ConcretizeSwitches(); + +void RetargetBranches(); +void RemapFrameDenizens(const std::vector& inst1_to_inst2); +void CreateSharedFrameDenizens(); + +void ResolveHookBreaks(); +void ComputeLoopLevels(); +void AdjustBranches(); + +template +void DumpCases(const CaseMaps& cases, const char* type_name) const; +void DumpInsts1(const FrameReMap* remappings); diff --git a/src/script_opt/ZAM/Expr.h b/src/script_opt/ZAM/Expr.h new file mode 100644 index 0000000000..e03df24f3d --- /dev/null +++ b/src/script_opt/ZAM/Expr.h @@ -0,0 +1,79 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +// Methods for ZAM compilation of expression AST nodes (Expr's). +// +// This file is included by Compile.h to insert into the ZAMCompiler class. + +const ZAMStmt CompileExpr(const ExprPtr& e) { return CompileExpr(e.get()); } +const ZAMStmt CompileExpr(const Expr* body); + +const ZAMStmt CompileIncrExpr(const IncrExpr* e); +const ZAMStmt CompileAppendToExpr(const AppendToExpr* e); +const ZAMStmt CompileAdd(const AggrAddExpr* e); +const ZAMStmt CompileDel(const AggrDelExpr* e); +const ZAMStmt CompileAddToExpr(const AddToExpr* e); +const ZAMStmt CompileRemoveFromExpr(const RemoveFromExpr* e); +const ZAMStmt CompileAssignExpr(const AssignExpr* e); +const ZAMStmt CompileRecFieldUpdates(const RecordFieldUpdatesExpr* e); +const ZAMStmt CompileZAMBuiltin(const NameExpr* lhs, const ScriptOptBuiltinExpr* zbi); +const ZAMStmt CompileAssignToIndex(const NameExpr* lhs, const IndexExpr* rhs); +const ZAMStmt CompileFieldLHSAssignExpr(const FieldLHSAssignExpr* e); +const ZAMStmt CompileScheduleExpr(const ScheduleExpr* e); +const ZAMStmt CompileSchedule(const NameExpr* n, const ConstExpr* c, int is_interval, EventHandler* h, + const ListExpr* l); +const ZAMStmt CompileEvent(EventHandler* h, const ListExpr* l); + +const ZAMStmt CompileInExpr(const NameExpr* n1, const NameExpr* n2, const NameExpr* n3) { + return CompileInExpr(n1, n2, nullptr, n3, nullptr); +} + +const ZAMStmt CompileInExpr(const NameExpr* n1, const NameExpr* n2, const ConstExpr* c) { + return CompileInExpr(n1, n2, nullptr, nullptr, c); +} + +const ZAMStmt CompileInExpr(const NameExpr* n1, const ConstExpr* c, const NameExpr* n3) { + return CompileInExpr(n1, nullptr, c, n3, nullptr); +} + +// In the following, one of n2 or c2 (likewise, n3/c3) will be nil. +const ZAMStmt CompileInExpr(const NameExpr* n1, const NameExpr* n2, const ConstExpr* c2, const NameExpr* n3, + const ConstExpr* c3); + +const ZAMStmt CompileInExpr(const NameExpr* n1, const ListExpr* l, const NameExpr* n2) { + return CompileInExpr(n1, l, n2, nullptr); +} + +const ZAMStmt CompileInExpr(const NameExpr* n, const ListExpr* l, const ConstExpr* c) { + return CompileInExpr(n, l, nullptr, c); +} + +const ZAMStmt CompileInExpr(const NameExpr* n1, const ListExpr* l, const NameExpr* n2, const ConstExpr* c); + +const ZAMStmt CompileIndex(const NameExpr* n1, const NameExpr* n2, const ListExpr* l, bool in_when); +const ZAMStmt CompileIndex(const NameExpr* n1, const ConstExpr* c, const ListExpr* l, bool in_when); +const ZAMStmt CompileIndex(const NameExpr* n1, int n2_slot, const TypePtr& n2_type, const ListExpr* l, bool in_when); + +const ZAMStmt BuildLambda(const NameExpr* n, ExprPtr le); +const ZAMStmt BuildLambda(int n_slot, ExprPtr le); + +const ZAMStmt AssignVecElems(const Expr* e); +const ZAMStmt AssignTableElem(const Expr* e); + +const ZAMStmt Call(const ExprStmt* e); +const ZAMStmt AssignToCall(const ExprStmt* e); +bool CheckForBuiltIn(const ExprPtr& e, CallExprPtr c); +const ZAMStmt DoCall(const CallExpr* c, const NameExpr* n); + +const ZAMStmt ConstructTable(const NameExpr* n, const Expr* e); +const ZAMStmt ConstructSet(const NameExpr* n, const Expr* e); +const ZAMStmt ConstructRecord(const NameExpr* n, const Expr* e) { return ConstructRecord(n, e, false); } +const ZAMStmt ConstructRecordFromRecord(const NameExpr* n, const Expr* e) { return ConstructRecord(n, e, true); } +const ZAMStmt ConstructRecord(const NameExpr* n, const Expr* e, bool is_from_rec); +const ZAMStmt ConstructVector(const NameExpr* n, const Expr* e); + +const ZAMStmt ArithCoerce(const NameExpr* n, const Expr* e); +const ZAMStmt RecordCoerce(const NameExpr* n, const Expr* e); +const ZAMStmt TableCoerce(const NameExpr* n, const Expr* e); +const ZAMStmt VectorCoerce(const NameExpr* n, const Expr* e); + +const ZAMStmt Is(const NameExpr* n, const Expr* e); diff --git a/src/script_opt/ZAM/Inst-Gen.h b/src/script_opt/ZAM/Inst-Gen.h index 9cc7993615..253b4dcd59 100644 --- a/src/script_opt/ZAM/Inst-Gen.h +++ b/src/script_opt/ZAM/Inst-Gen.h @@ -4,8 +4,7 @@ // NameExpr*'s to slots. Some aren't needed, but we provide a complete // set mirroring the ZInstI constructors for consistency. // -// Maintained separately from Compile.h to make it conceptually simple to -// add new helpers. +// This file is included by Compile.h to insert into the ZAMCompiler class. ZInstI GenInst(ZOp op); ZInstI GenInst(ZOp op, const NameExpr* v1); diff --git a/src/script_opt/ZAM/Low-Level.h b/src/script_opt/ZAM/Low-Level.h new file mode 100644 index 0000000000..d40802f981 --- /dev/null +++ b/src/script_opt/ZAM/Low-Level.h @@ -0,0 +1,42 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +// Methods for low-level manipulation of ZAM instructions/statements. +// +// This file is included by Compile.h to insert into the ZAMCompiler class. + +const ZAMStmt StartingBlock(); +const ZAMStmt FinishBlock(const ZAMStmt start); + +bool NullStmtOK() const; + +const ZAMStmt EmptyStmt(); +const ZAMStmt ErrorStmt(); +const ZAMStmt LastInst(); + +// Adds control flow information to an instruction. +void AddCFT(ZInstI* inst, ControlFlowType cft); + +// Returns a handle to state associated with building +// up a list of values. +std::unique_ptr BuildVals(const ListExprPtr&); + +// "stride" is how many slots each element of l will consume. +ZInstAux* InternalBuildVals(const ListExpr* l, int stride = 1); + +// Returns how many values were added. +int InternalAddVal(ZInstAux* zi, int i, Expr* e); + +// Adds the given instruction to the ZAM program. The second +// argument, if true, suppresses generation of any pending +// global/capture store for this instruction. +const ZAMStmt AddInst(const ZInstI& inst, bool suppress_non_local = false); + +// Returns the statement just before the given one. +ZAMStmt PrevStmt(const ZAMStmt s); + +// Returns the last (interpreter) statement in the body. +const Stmt* LastStmt(const Stmt* s) const; + +// Returns the most recent added instruction *other* than those +// added for bookkeeping. +ZInstI* TopMainInst() { return insts1[top_main_inst]; } diff --git a/src/script_opt/ZAM/Stmt.h b/src/script_opt/ZAM/Stmt.h new file mode 100644 index 0000000000..8a85ed4539 --- /dev/null +++ b/src/script_opt/ZAM/Stmt.h @@ -0,0 +1,48 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +// Methods for ZAM compilation of statement AST nodes (Stmt's). +// +// This file is included by Compile.h to insert into the ZAMCompiler class. + +// Note, we first list the AST nodes and then the helper functions, though +// in the definitions source these are intermingled. +const ZAMStmt CompileStmt(const StmtPtr& body) { return CompileStmt(body.get()); } +const ZAMStmt CompileStmt(const Stmt* body); + +const ZAMStmt CompilePrint(const PrintStmt* ps); +const ZAMStmt CompileExpr(const ExprStmt* es); +const ZAMStmt CompileIf(const IfStmt* is); +const ZAMStmt CompileSwitch(const SwitchStmt* sw); +const ZAMStmt CompileWhile(const WhileStmt* ws); +const ZAMStmt CompileFor(const ForStmt* f); +const ZAMStmt CompileReturn(const ReturnStmt* r); +const ZAMStmt CompileCatchReturn(const CatchReturnStmt* cr); +const ZAMStmt CompileStmts(const StmtList* sl); +const ZAMStmt CompileInit(const InitStmt* is); +const ZAMStmt CompileWhen(const WhenStmt* ws); + +const ZAMStmt CompileNext() { return GenGoTo(nexts.back()); } +const ZAMStmt CompileBreak() { return GenGoTo(breaks.back()); } +const ZAMStmt CompileFallThrough() { return GenGoTo(fallthroughs.back()); } +const ZAMStmt CompileCatchReturn() { return GenGoTo(catches.back()); } + +const ZAMStmt IfElse(const Expr* e, const Stmt* s1, const Stmt* s2); +// Second argument is which instruction slot holds the branch target. +const ZAMStmt GenCond(const Expr* e, int& branch_v); + +const ZAMStmt While(const Stmt* cond_stmt, const Expr* cond, const Stmt* body); + +const ZAMStmt ValueSwitch(const SwitchStmt* sw, const NameExpr* v, const ConstExpr* c); +const ZAMStmt TypeSwitch(const SwitchStmt* sw, const NameExpr* v, const ConstExpr* c); +const ZAMStmt GenSwitch(const SwitchStmt* sw, int slot, InternalTypeTag it); + +const ZAMStmt LoopOverTable(const ForStmt* f, const NameExpr* val); +const ZAMStmt LoopOverVector(const ForStmt* f, const NameExpr* val); +const ZAMStmt LoopOverString(const ForStmt* f, const Expr* e); + +const ZAMStmt Loop(const Stmt* body); +const ZAMStmt FinishLoop(const ZAMStmt iter_head, ZInstI& iter_stmt, const Stmt* body, int iter_slot, bool is_table); + +const ZAMStmt InitRecord(IDPtr id, RecordType* rt); +const ZAMStmt InitVector(IDPtr id, VectorType* vt); +const ZAMStmt InitTable(IDPtr id, TableType* tt, Attributes* attrs); diff --git a/src/script_opt/ZAM/Vars.h b/src/script_opt/ZAM/Vars.h new file mode 100644 index 0000000000..5fe46e9fe4 --- /dev/null +++ b/src/script_opt/ZAM/Vars.h @@ -0,0 +1,44 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +// Methods for managing Zeek function variables. +// +// This file is included by Compile.h to insert into the ZAMCompiler class. + +bool IsUnused(const IDPtr& id, const Stmt* where) const; + +bool IsCapture(const IDPtr& id) const { return IsCapture(id.get()); } +bool IsCapture(const ID* id) const; + +int CaptureOffset(const IDPtr& id) const { return IsCapture(id.get()); } +int CaptureOffset(const ID* id) const; + +void LoadParam(const ID* id); +const ZAMStmt LoadGlobal(const ID* id); +const ZAMStmt LoadCapture(const ID* id); + +int AddToFrame(const ID*); + +int FrameSlot(const IDPtr& id) { return FrameSlot(id.get()); } +int FrameSlot(const ID* id); +int FrameSlotIfName(const Expr* e) { + auto n = e->Tag() == EXPR_NAME ? e->AsNameExpr() : nullptr; + return n ? FrameSlot(n->Id()) : -1; +} + +int FrameSlot(const NameExpr* id) { return FrameSlot(id->AsNameExpr()->Id()); } +int Frame1Slot(const NameExpr* id, ZOp op) { return Frame1Slot(id->AsNameExpr()->Id(), op); } + +int Frame1Slot(const ID* id, ZOp op) { return Frame1Slot(id, op1_flavor[op]); } +int Frame1Slot(const NameExpr* n, ZAMOp1Flavor fl) { return Frame1Slot(n->Id(), fl); } +int Frame1Slot(const ID* id, ZAMOp1Flavor fl); + +// The slot without doing any global-related checking. +int RawSlot(const NameExpr* n) { return RawSlot(n->Id()); } +int RawSlot(const ID* id); + +bool HasFrameSlot(const ID* id) const; + +int NewSlot(const TypePtr& t) { return NewSlot(ZVal::IsManagedType(t)); } +int NewSlot(bool is_managed); + +int TempForConst(const ConstExpr* c); From a2495d028e2f172ee4fb920c9550a7a29ff1b2a3 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 7 Oct 2024 14:16:13 -0700 Subject: [PATCH 2/3] renamed script optimization Attrs.h header to prepare for factoring large Compile.h --- src/script_opt/CPP/{Attrs.h => AttrExprType.h} | 0 src/script_opt/CPP/InitsInfo.cc | 2 +- src/script_opt/CPP/RuntimeInitSupport.h | 2 +- 3 files changed, 2 insertions(+), 2 deletions(-) rename src/script_opt/CPP/{Attrs.h => AttrExprType.h} (100%) diff --git a/src/script_opt/CPP/Attrs.h b/src/script_opt/CPP/AttrExprType.h similarity index 100% rename from src/script_opt/CPP/Attrs.h rename to src/script_opt/CPP/AttrExprType.h diff --git a/src/script_opt/CPP/InitsInfo.cc b/src/script_opt/CPP/InitsInfo.cc index b28a7f9b66..6006b7bda0 100644 --- a/src/script_opt/CPP/InitsInfo.cc +++ b/src/script_opt/CPP/InitsInfo.cc @@ -5,7 +5,7 @@ #include "zeek/Desc.h" #include "zeek/RE.h" #include "zeek/ZeekString.h" -#include "zeek/script_opt/CPP/Attrs.h" +#include "zeek/script_opt/CPP/AttrExprType.h" #include "zeek/script_opt/CPP/Compile.h" #include "zeek/script_opt/CPP/RuntimeInits.h" diff --git a/src/script_opt/CPP/RuntimeInitSupport.h b/src/script_opt/CPP/RuntimeInitSupport.h index 33625ea613..3f50e9f7e9 100644 --- a/src/script_opt/CPP/RuntimeInitSupport.h +++ b/src/script_opt/CPP/RuntimeInitSupport.h @@ -5,7 +5,7 @@ #pragma once #include "zeek/Val.h" -#include "zeek/script_opt/CPP/Attrs.h" +#include "zeek/script_opt/CPP/AttrExprType.h" #include "zeek/script_opt/CPP/Func.h" namespace zeek { From 744628f115e6340fb120d020b054422707148780 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 7 Oct 2024 16:58:10 -0700 Subject: [PATCH 3/3] factored CPP source's main header into collection of per-source-file headers --- src/script_opt/CPP/Attrs.h | 50 ++ src/script_opt/CPP/Compile.h | 987 ++-------------------------------- src/script_opt/CPP/Consts.h | 51 ++ src/script_opt/CPP/DeclFunc.h | 101 ++++ src/script_opt/CPP/Driver.h | 98 ++++ src/script_opt/CPP/Emit.h | 80 +++ src/script_opt/CPP/Exprs.h | 147 +++++ src/script_opt/CPP/GenFunc.h | 74 +++ src/script_opt/CPP/Inits.h | 127 +++++ src/script_opt/CPP/Stmts.h | 34 ++ src/script_opt/CPP/Types.h | 59 ++ src/script_opt/CPP/Vars.h | 69 +++ 12 files changed, 920 insertions(+), 957 deletions(-) create mode 100644 src/script_opt/CPP/Attrs.h create mode 100644 src/script_opt/CPP/Consts.h create mode 100644 src/script_opt/CPP/DeclFunc.h create mode 100644 src/script_opt/CPP/Driver.h create mode 100644 src/script_opt/CPP/Emit.h create mode 100644 src/script_opt/CPP/Exprs.h create mode 100644 src/script_opt/CPP/GenFunc.h create mode 100644 src/script_opt/CPP/Inits.h create mode 100644 src/script_opt/CPP/Stmts.h create mode 100644 src/script_opt/CPP/Types.h create mode 100644 src/script_opt/CPP/Vars.h diff --git a/src/script_opt/CPP/Attrs.h b/src/script_opt/CPP/Attrs.h new file mode 100644 index 0000000000..80663f0792 --- /dev/null +++ b/src/script_opt/CPP/Attrs.h @@ -0,0 +1,50 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +// Methods for tracking attributes associated with Zeek variables/types. +// Attributes arise mainly in the context of constructing types. +// +// This file is included by Compile.h to insert into the CPPCompiler class. + +public: +// Tracks a use of the given set of attributes, including +// initialization dependencies and the generation of any +// associated expressions. +// +// Returns the initialization info associated with the set of +// attributes. +std::shared_ptr RegisterAttributes(const AttributesPtr& attrs); + +// Convenient access to the global offset associated with +// a set of Attributes. +int AttributesOffset(const AttributesPtr& attrs) { return GI_Offset(RegisterAttributes(attrs)); } + +// The same, for a single attribute. +std::shared_ptr RegisterAttr(const AttrPtr& attr); + +// Returns a mapping of from Attr objects to their associated +// initialization information. The Attr must have previously +// been registered. +auto& ProcessedAttr() const { return processed_attr; } + +private: +// Start of methods related to managing script type attributes. +// Attributes arise mainly in the context of constructing types. +// See Attrs.cc for definitions. +// + +// Populates the 2nd and 3rd arguments with C++ representations +// of the tags and (optional) values/expressions associated with +// the set of attributes. +void BuildAttrs(const AttributesPtr& attrs, std::string& attr_tags, std::string& attr_vals); + +// Returns a string representation of the name associated with +// different attribute tags (e.g., "ATTR_DEFAULT"). +static const char* AttrName(AttrTag t); + +// Similar for attributes, so we can reconstruct record types. +CPPTracker attributes = {"attrs", false}; + +// Maps Attributes and Attr's to their global initialization +// information. +std::unordered_map> processed_attrs; +std::unordered_map> processed_attr; diff --git a/src/script_opt/CPP/Compile.h b/src/script_opt/CPP/Compile.h index 060dae6fa7..c15a54a598 100644 --- a/src/script_opt/CPP/Compile.h +++ b/src/script_opt/CPP/Compile.h @@ -124,533 +124,47 @@ namespace zeek::detail { class CPPCompile { public: + // Constructing a CPPCompile object does all of the compilation. CPPCompile(std::vector& _funcs, std::shared_ptr pfs, const std::string& gen_name, bool _standalone, bool report_uncompilable); ~CPPCompile(); - // Constructing a CPPCompile object does all of the compilation. - // The public methods here are for use by helper classes. - - // Tracks the given type (with support methods for ones that - // are complicated), recursively including its sub-types, and - // creating initializations for constructing C++ variables - // representing the types. - // - // Returns the initialization info associated with the type. - std::shared_ptr RegisterType(const TypePtr& t); - - // Easy access to the global offset and the initialization - // cohort associated with a given type. - int TypeOffset(const TypePtr& t) { return GI_Offset(RegisterType(t)); } - int TypeCohort(const TypePtr& t) { return GI_Cohort(RegisterType(t)); } - int TypeFinalCohort(const TypePtr& t) { return GI_FinalCohort(RegisterType(t)); } - - // Tracks a Zeek ValPtr used as a constant value. These occur - // in two contexts: directly as constant expressions, and indirectly - // as elements within aggregate constants (such as in vector - // initializers). - // - // Returns the associated initialization info. In addition, - // consts_offset returns an offset into an initialization-time - // global that tracks all constructed globals, providing - // general access to them for aggregate constants. - std::shared_ptr RegisterConstant(const ValPtr& vp, int& consts_offset); - - // Tracks a global to generate the necessary initialization. - // Returns the associated initialization info. - std::shared_ptr RegisterGlobal(const ID* g); - - // Tracks a use of the given set of attributes, including - // initialization dependencies and the generation of any - // associated expressions. - // - // Returns the initialization info associated with the set of - // attributes. - std::shared_ptr RegisterAttributes(const AttributesPtr& attrs); - - // Convenient access to the global offset associated with - // a set of Attributes. - int AttributesOffset(const AttributesPtr& attrs) { return GI_Offset(RegisterAttributes(attrs)); } - - // The same, for a single attribute. - std::shared_ptr RegisterAttr(const AttrPtr& attr); - - // Returns a mapping of from Attr objects to their associated - // initialization information. The Attr must have previously - // been registered. - auto& ProcessedAttr() const { return processed_attr; } - - // True if the given expression is simple enough that we can - // generate code to evaluate it directly, and don't need to - // create a separate function per RegisterInitExpr() to track it. - static bool IsSimpleInitExpr(const ExprPtr& e); - - // Tracks expressions used in attributes (such as &default=). - // - // We need to generate code to evaluate these, via CallExpr's - // that invoke functions that return the value of the expression. - // However, we can't generate that code when first encountering - // the attribute, because doing so will need to refer to the names - // of types, and initially those are unavailable (because the type's - // representatives, per pfs->RepTypes(), might not have yet been - // tracked). So instead we track the associated CallExprInitInfo - // objects, and after all types have been tracked, then spin - // through them to generate the code. - // - // Returns the associated initialization information. - std::shared_ptr RegisterInitExpr(const ExprPtr& e); - - // Tracks a C++ string value needed for initialization. Returns - // an offset into the global vector that will hold these. - int TrackString(std::string s) { - auto ts = tracked_strings.find(s); - if ( ts != tracked_strings.end() ) - return ts->second; - - int offset = ordered_tracked_strings.size(); - tracked_strings[s] = offset; - ordered_tracked_strings.emplace_back(s); - - return offset; - } - - // Tracks a profile hash value needed for initialization. Returns - // an offset into the global vector that will hold these. - int TrackHash(p_hash_type h) { - auto th = tracked_hashes.find(h); - if ( th != tracked_hashes.end() ) - return th->second; - - int offset = ordered_tracked_hashes.size(); - tracked_hashes[h] = offset; - ordered_tracked_hashes.emplace_back(h); - - return offset; - } - - // Returns the hash associated with a given function body. - // It's a fatal error to call this for a body that hasn't - // been compiled. + // Returns the hash associated with a given function body. It's a fatal + // error to call this for a body that hasn't been compiled. p_hash_type BodyHash(const Stmt* body); - // Returns true if at least one of the function bodies associated - // with the function/hook/event handler of the given fname is - // not compilable. + // Returns true if at least one of the function bodies associated with + // the function/hook/event handler of the given fname is not compilable. bool NotFullyCompilable(const std::string& fname) const { return not_fully_compilable.count(fname) > 0; } private: - // Start of methods related to driving the overall compilation - // process. - // See Driver.cc for definitions. - // - - // Main driver, invoked by constructor. - void Compile(bool report_uncompilable); - - // The following methods all create objects that track the - // initializations of a given type of value. In each, "tag" - // is the name used to identify the initializer global - // associated with the given type of value, and "type" is - // its C++ representation. Often "tag" is concatenated with - // "type" to designate a specific C++ type. For example, - // "tag" might be "Double" and "type" might be "ValPtr"; - // the resulting global's type is "DoubleValPtr". - - // Creates an object for tracking values associated with Zeek - // constants. "c_type" is the C++ type used in the initializer - // for each object; or, if empty, it specifies that we represent - // the value using an index into a separate vector that holds - // the constant. - std::shared_ptr CreateConstInitInfo(const char* tag, const char* type, const char* c_type); - - // Creates an object for tracking compound initializers, which - // are whose initialization uses indexes into other vectors. - std::shared_ptr CreateCompoundInitInfo(const char* tag, const char* type); - - // Creates an object for tracking initializers that have custom - // C++ objects to hold their initialization information. - std::shared_ptr CreateCustomInitInfo(const char* tag, const char* type); - - // Generates the declaration associated with a set of initializations - // and tracks the object to facilitate looping over all so - // initializations. As a convenience, returns the object. - std::shared_ptr RegisterInitInfo(const char* tag, const char* type, - std::shared_ptr gi); - - // Generate the beginning of the compiled code: run-time functions, - // namespace, auxiliary globals. - void GenProlog(); - - // Given the name of a function body that's been compiled, generate - // code to register it at run-time, and track its associated hash - // so subsequent compilations can reuse it. - void RegisterCompiledBody(const std::string& f); - - // After compilation, generate the final code. Most of this is - // in support of run-time initialization of various dynamic values. - void GenEpilog(); - - // Generate the main method of the CPPDynStmt class, doing dynamic - // dispatch for function invocation. - void GenCPPDynStmt(); - - // Generate a function to load BiFs. - void GenLoadBiFs(); - - // Generate the main initialization function, which finalizes - // the run-time environment. - void GenFinishInit(); - - // Generate the function that registers compiled script bodies. - void GenRegisterBodies(); - - // True if the given function (plus body and profile) is one - // that should be compiled. If non-nil, sets reason to the - // the reason why, if there's a fundamental problem. If however - // the function should be skipped for other reasons, then sets - // it to nil. - bool IsCompilable(const FuncInfo& func, const char** reason = nullptr); - - // The set of functions/bodies we're compiling. - std::vector& funcs; - - // The global profile of all of the functions. - std::shared_ptr pfs; - - // Script functions that we are able to compile. We compute - // these ahead of time so that when compiling script function A - // which makes a call to script function B, we know whether - // B will indeed be compiled, or if it'll be interpreted due to - // it including some functionality we don't currently support - // for compilation. - // - // Indexed by the C++ name of the function. - std::unordered_set compilable_funcs; - - // Tracks which functions/hooks/events have at least one non-compilable - // body. Indexed by the Zeek name of function. - std::unordered_set not_fully_compilable; - - // Maps functions (not hooks or events) to upstream compiled names. - std::unordered_map hashed_funcs; - - // If true, the generated code should run "standalone". - bool standalone = false; - - // Hash over the functions in this compilation. This is only - // needed for "seatbelts", to ensure that we can produce a - // unique hash relating to this compilation (*and* its - // compilation time, which is why these are "seatbelts" and - // likely not important to make distinct). - p_hash_type total_hash = 0; - - // - // End of methods related to script/C++ variables. - - // Start of methods related to script variables and their C++ - // counterparts. - // See Vars.cc for definitions. - // - - // Generate declarations associated with the given global, and, if - // it's used as a variable (not just as a function being called), - // track it as such. - void CreateGlobal(const ID* g); - - // Register the given identifier as a BiF. If is_var is true - // then the BiF is also used in a non-call context. - void AddBiF(const ID* b, bool is_var); - - // Register the given global name. "suffix" distinguishes particular - // types of globals, such as the names of bifs, global (non-function) - // variables, or compiled Zeek functions. - bool AddGlobal(const std::string& g, const char* suffix); - - // Tracks that the body we're currently compiling refers to the - // given event. - void RegisterEvent(std::string ev_name); - - // The following match various forms of identifiers to the - // name used for their C++ equivalent. - const char* IDName(const IDPtr& id) { return IDName(id.get()); } - const char* IDName(const ID* id) { return IDNameStr(id).c_str(); } - const std::string& IDNameStr(const ID* id); - - // Returns a canonicalized version of a variant of a global made - // distinct by the given suffix. - std::string GlobalName(const std::string& g, const char* suffix) { return Canonicalize(g.c_str()) + "_" + suffix; } - - // Returns a canonicalized form of a local identifier's name, - // expanding its module prefix if needed. - std::string LocalName(const ID* l) const; - std::string LocalName(const IDPtr& l) const { return LocalName(l.get()); } - - // The same, but for a capture. - std::string CaptureName(const ID* l) const; - std::string CaptureName(const IDPtr& l) const { return CaptureName(l.get()); } - - // Returns a canonicalized name, with various non-alphanumeric - // characters stripped or transformed, and guaranteed not to - // conflict with C++ keywords. - std::string Canonicalize(const char* name) const; - - // Returns the name of the global corresponding to an expression - // (which must be a EXPR_NAME). - std::string GlobalName(const ExprPtr& e) { return globals[e->AsNameExpr()->Id()->Name()]; } - - // Maps global names (not identifiers) to the names we use for them. - std::unordered_map globals; - - // Similar for locals, for the function currently being compiled. - std::unordered_map locals; - - // Retrieves the initialization information associated with the - // given global. - std::unordered_map> global_gis; - - // Maps event names to the names we use for them. - std::unordered_map events; - - // Globals that correspond to variables, not functions. - IDSet global_vars; - - // - // End of methods related to script/C++ variables. - - // Start of methods related to declaring compiled script functions, - // including related classes. - // See DeclFunc.cc for definitions. - // - - // Generates declarations (class, forward reference to C++ function) - // for the given script function. - void DeclareFunc(const FuncInfo& func); - - // Similar, but for lambdas. - void DeclareLambda(const LambdaExpr* l, const ProfileFunc* pf); - - // Generates code to declare the compiled version of a script - // function. "ft" gives the functions type, "pf" its profile, - // "fname" its C++ name, "body" its AST, "l" if non-nil its - // corresponding lambda expression, and "flavor" whether it's - // a hook/event/function. - // - // We use two basic approaches. Most functions are represented - // by a "CPPDynStmt" object that's parameterized by a void* pointer - // to the underlying C++ function and an index used to dynamically - // cast the pointer to having the correct type for then calling it. - // Lambdas, however (including "implicit" lambdas used to associate - // complex expressions with &attributes), each have a unique - // subclass derived from CPPStmt that calls the underlying C++ - // function without requiring a cast, and that holds the values - // of the lambda's captures. - // - // It would be cleanest to use the latter approach for all functions, - // but the hundreds/thousands of additional classes required for - // doing so significantly slows down C++ compilation, so we instead - // opt for the uglier dynamic casting approach, which only requires - // one additional class. - void CreateFunction(const FuncTypePtr& ft, const ProfileFunc* pf, const std::string& fname, const StmtPtr& body, - int priority, const LambdaExpr* l, FunctionFlavor flavor); - - // Used for the case of creating a custom subclass of CPPStmt. - void DeclareSubclass(const FuncTypePtr& ft, const ProfileFunc* pf, const std::string& fname, - const std::string& args, const IDPList* lambda_ids); - - // Used for the case of employing an instance of a CPPDynStmt object. - void DeclareDynCPPStmt(); - - // Generates the declarations (and in-line definitions) associated - // with compiling a lambda. - void BuildLambda(const FuncTypePtr& ft, const ProfileFunc* pf, const std::string& fname, const StmtPtr& body, - const LambdaExpr* l, const IDPList* lambda_ids); - - // For a call to the C++ version of a function of type "ft" and - // with lambda captures lambda_ids (nil if not applicable), generates - // code that binds the Interpreter arguments (i.e., Frame offsets) - // to C++ function arguments, as well as passing in the captures. - std::string BindArgs(const FuncTypePtr& ft, const IDPList* lambda_ids); - - // Generates the declaration for the parameters for a function with - // the given type, lambda captures (if non-nil), and profile. - std::string ParamDecl(const FuncTypePtr& ft, const IDPList* lambda_ids, const ProfileFunc* pf); - - // Returns in p_types the types associated with the parameters for a function - // of the given type, set of lambda captures (if any), and profile. - void GatherParamTypes(std::vector& p_types, const FuncTypePtr& ft, const IDPList* lambda_ids, - const ProfileFunc* pf); - - // Same, but instead returns the parameter's names. - void GatherParamNames(std::vector& p_names, const FuncTypePtr& ft, const IDPList* lambda_ids, - const ProfileFunc* pf); - - // Inspects the given profile to find the i'th parameter (starting - // at 0). Returns nil if the profile indicates that the parameter - // is not used by the function. - const ID* FindParam(int i, const ProfileFunc* pf); - - // Information associated with a CPPDynStmt dynamic dispatch. - struct DispatchInfo { - std::string cast; // C++ cast to use for function pointer - std::string args; // arguments to pass to the function - bool is_hook; // whether the function is a hook - TypePtr yield; // what type the function returns, if any - }; - - // An array of cast/invocation pairs used to generate the CPPDynStmt - // Exec method. - std::vector func_casting_glue; - - // Maps casting strings to indices into func_casting_glue. The index - // is what's used to dynamically switch to the right dispatch. - std::unordered_map casting_index; - - // Maps functions (using their C++ name) to their casting strings. - std::unordered_map func_index; - - // Names for lambda capture ID's. These require a separate space - // that incorporates the lambda's name, to deal with nested lambda's - // that refer to the identifiers with the same name. - std::unordered_map lambda_names; - - // The function's parameters. Tracked so we don't re-declare them. - IDSet params; - - // Whether we're compiling a hook. - bool in_hook = false; - - // - // End of methods related to declaring compiled script functions. - - // Start of methods related to generating the bodies of compiled - // script functions. Note that some of this sort of functionality is - // instead in CPPDeclFunc.cc, due to the presence of inlined methods. - // See GenFunc.cc for definitions. - // - - // Driver functions for compiling the body of the given function - // or lambda. - void CompileFunc(const FuncInfo& func); - void CompileLambda(const LambdaExpr* l, const ProfileFunc* pf); - - // Generates the body of the Invoke() method (which supplies the - // "glue" for calling the C++-generated code, for CPPStmt subclasses). - void GenInvokeBody(const std::string& fname, const TypePtr& t, const std::string& args) { - GenInvokeBody(fname + "(" + args + ")", t); - } - void GenInvokeBody(const std::string& call, const TypePtr& t); - - // Generates the code for the body of a script function with - // the given type, profile, C++ name, AST, lambda captures - // (if non-nil), and hook/event/function "flavor". - void DefineBody(const FuncTypePtr& ft, const ProfileFunc* pf, const std::string& fname, const StmtPtr& body, - const IDPList* lambda_ids, FunctionFlavor flavor); - - // Declare parameters that originate from a type signature of - // "any" but were concretized in this declaration. - void TranslateAnyParams(const FuncTypePtr& ft, const ProfileFunc* pf); - - // Generates code to dynamically initialize any events referred to - // in the function. - void InitializeEvents(const ProfileFunc* pf); - - // Declare local variables (which are non-globals that aren't - // parameters or lambda captures). - void DeclareLocals(const ProfileFunc* func, const IDPList* lambda_ids); - - // Returns the C++ name to use for a given function body. - std::string BodyName(const FuncInfo& func); - - // Generate the arguments to be used when calling a C++-generated - // function. - std::string GenArgs(const RecordTypePtr& params, const Expr* e); - - // Functions that we've declared/compiled. Indexed by full C++ name. - std::unordered_set compiled_funcs; - - // "Simple" functions that we've compiled, i.e., those that have - // a single body and thus can be called directly. Indexed by - // function name, and maps to the C++ name. - std::unordered_map compiled_simple_funcs; - - // Maps function bodies to the names we use for them. - std::unordered_map body_names; - - // Maps function names to hashes of bodies. - std::unordered_map body_hashes; - - // Maps function names to priorities, for hooks & event handlers. - std::unordered_map body_priorities; - - // Maps function names to script locations, for better-than-nothing - // error reporting. - std::unordered_map body_locs; - - // Maps function names to events relevant to them. - std::unordered_map> body_events; - - // Full type of the function we're currently compiling. - FuncTypePtr func_type; - - // Return type of the function we're currently compiling. - TypePtr ret_type; - - // Internal name of the function we're currently compiling. - std::string body_name; - - // - // End of methods related to generating compiled script bodies. - - // Methods related to generating code for representing script constants - // as run-time values. There's only one nontrivial one of these, - // RegisterConstant() (declared above, as it's public). All the other - // work is done by secondary objects - see InitsInfo.{h,cc} for those. - - // Returns the object used to track indices (vectors of integers - // that are used to index various other vectors, including other - // indices). Only used by CPP_InitsInfo objects, but stored - // in the CPPCompile object to make it available across different - // CPP_InitsInfo objects. +#include "zeek/script_opt/CPP/Attrs.h" +#include "zeek/script_opt/CPP/Consts.h" +#include "zeek/script_opt/CPP/DeclFunc.h" +#include "zeek/script_opt/CPP/Driver.h" +#include "zeek/script_opt/CPP/Emit.h" +#include "zeek/script_opt/CPP/Exprs.h" +#include "zeek/script_opt/CPP/GenFunc.h" +#include "zeek/script_opt/CPP/Inits.h" +#include "zeek/script_opt/CPP/Stmts.h" +#include "zeek/script_opt/CPP/Types.h" +#include "zeek/script_opt/CPP/Vars.h" + + // Returns the object used to track indices (vectors of integers that + // are used to index various other vectors, including other indices). + // Only used by CPP_InitsInfo objects, but stored in the CPPCompile object + // to make it available across different CPP_InitsInfo objects. friend class CPP_InitsInfo; IndicesManager& IndMgr() { return indices_mgr; } - // Maps (non-native) constants to associated C++ globals. - std::unordered_map const_exprs; + IndicesManager indices_mgr; - // Maps the values of (non-native) constants to associated initializer - // information. - std::unordered_map> const_vals; + // The following objects track initialization information for different + // types of initializers: Zeek types, individual attributes, sets of + // attributes, expressions that call script functions (for attribute + // expressions), registering lambda bodies, and registering Zeek globals. - // Same, but for the offset into the vector that tracks all constants - // collectively (to support initialization of compound constants). - std::unordered_map const_offsets; - - // The same as the above pair, but indexed by the string representation - // rather than the Val*. The reason for having both is to enable - // reusing common constants even though their Val*'s differ. - std::unordered_map> constants; - std::unordered_map constants_offsets; - - // Used for memory management associated with const_vals's index. - std::vector cv_indices; - - // For different types of constants (as indicated by TypeTag), - // provides the associated object that manages the initializers - // for those constants. - std::unordered_map> const_info; - - // Tracks entries for constructing the vector of all constants - // (regardless of type). Each entry provides a TypeTag, used - // to identify the type-specific vector for a given constant, - // and the offset into that vector. - std::vector> consts; - - // The following objects track initialization information for - // different types of initializers: Zeek types, individual - // attributes, sets of attributes, expressions that call script - // functions (for attribute expressions), registering lambda - // bodies, and registering Zeek globals. std::shared_ptr type_info; std::shared_ptr attr_info; std::shared_ptr attrs_info; @@ -658,454 +172,13 @@ private: std::shared_ptr lambda_reg_info; std::shared_ptr global_id_info; - // Tracks all of the above objects (as well as each entry in - // const_info), to facilitate easy iterating over them. + // Tracks all of the above objects (as well as each entry in const_info), + // to facilitate easy iterating over them. std::set> all_global_info; - // Tracks the attribute expressions for which we need to generate - // function calls to evaluate them. + // Tracks the attribute expressions for which we need to generate function + // calls to evaluate them. std::unordered_map> init_infos; - - // See IndMgr() above for the role of this variable. - IndicesManager indices_mgr; - - // Maps strings to associated offsets. - std::unordered_map tracked_strings; - - // Tracks strings we've registered in order (corresponding to - // their offsets). - std::vector ordered_tracked_strings; - - // The same as the previous two, but for profile hashes. - std::vector ordered_tracked_hashes; - std::unordered_map tracked_hashes; - - // - // End of methods related to generating code for script constants. - - // Start of methods related to generating code for AST Stmt's. - // For the most part, code generation is straightforward as - // it matches the Exec/DoExec methods of the corresponding - // Stmt subclasses. - // See Stmts.cc for definitions. - // - - void GenStmt(const StmtPtr& s) { GenStmt(s.get()); } - void GenStmt(const Stmt* s); - void GenInitStmt(const InitStmt* init); - void GenIfStmt(const IfStmt* i); - void GenWhileStmt(const WhileStmt* w); - void GenReturnStmt(const ReturnStmt* r); - void GenEventStmt(const EventStmt* ev); - - void GenSwitchStmt(const SwitchStmt* sw); - void GenTypeSwitchStmt(const Expr* e, const case_list* cases); - void GenTypeSwitchCase(const ID* id, int case_offset, bool is_multi); - void GenValueSwitchStmt(const Expr* e, const case_list* cases); - - void GenWhenStmt(const WhenStmt* w); - void GenWhenStmt(const WhenInfo* wi, const std::string& when_lambda, const Location* loc, - std::vector local_aggrs); - void GenForStmt(const ForStmt* f); - void GenForOverTable(const ExprPtr& tbl, const IDPtr& value_var, const IDPList* loop_vars); - void GenForOverVector(const ExprPtr& tbl, const IDPtr& value_var, const IDPList* loop_vars); - void GenForOverString(const ExprPtr& str, const IDPList* loop_vars); - - void GenAssertStmt(const AssertStmt* a); - - // Nested level of loops/switches for which "break"'s should be - // C++ breaks rather than a "hook" break. - int break_level = 0; - - // - // End of methods related to generating code for AST Stmt's. - - // Start of methods related to generating code for AST Expr's. - // See Exprs.cc for definitions. - // - - // These methods are all oriented around returning strings - // of C++ code; they do not directly emit the code, since often - // the caller will be embedding the result in some surrounding - // context. No effort is made to reduce string copying; this - // isn't worth the hassle, as it takes just a few seconds for - // the compiler to generate 100K+ LOC that clang will then need - // 10s of seconds to compile, so speeding up the compiler has - // little practical advantage. - - // The following enum's represent whether, for expressions yielding - // native values, the end goal is to have the value in (1) native - // form, (2) instead in ValPtr form, or (3) whichever is more - // convenient to generate (sometimes used when the caller knows - // that the value is non-native). - enum GenType { - GEN_NATIVE, - GEN_VAL_PTR, - GEN_DONT_CARE, - }; - - // Generate an expression for which we want the result embedded - // in {} initializers (generally to be used in calling a function - // where we want those values to be translated to a vector). - std::string GenExprs(const Expr* e); - - // Generate the value(s) associated with a ListExpr. If true, - // the "nested" parameter indicates that this list is embedded - // within an outer list, in which case it's expanded to include - // {}'s. It's false if the ListExpr is at the top level, such - // as when expanding the arguments in a CallExpr. - std::string GenListExpr(const Expr* e, GenType gt, bool nested); - - // Per-Expr-subclass code generation. The resulting code generally - // reflects the corresponding Eval() or Fold() methods. - std::string GenExpr(const ExprPtr& e, GenType gt, bool top_level = false) { - return GenExpr(e.get(), gt, top_level); - } - std::string GenExpr(const Expr* e, GenType gt, bool top_level = false); - - std::string GenNameExpr(const NameExpr* ne, GenType gt); - std::string GenConstExpr(const ConstExpr* c, GenType gt); - std::string GenAggrAdd(const Expr* e); - std::string GenAggrDel(const Expr* e); - std::string GenIncrExpr(const Expr* e, GenType gt, bool is_incr, bool top_level); - std::string GenCondExpr(const Expr* e, GenType gt); - std::string GenCallExpr(const CallExpr* c, GenType gt, bool top_level); - std::string GenInExpr(const Expr* e, GenType gt); - std::string GenFieldExpr(const FieldExpr* fe, GenType gt); - std::string GenHasFieldExpr(const HasFieldExpr* hfe, GenType gt); - std::string GenIndexExpr(const Expr* e, GenType gt); - std::string GenAssignExpr(const Expr* e, GenType gt, bool top_level); - std::string GenAddToExpr(const Expr* e, GenType gt, bool top_level); - std::string GenRemoveFromExpr(const Expr* e, GenType gt, bool top_level); - std::string GenSizeExpr(const Expr* e, GenType gt); - std::string GenScheduleExpr(const Expr* e); - std::string GenLambdaExpr(const Expr* e); - std::string GenLambdaExpr(const Expr* e, std::string capture_args); - std::string GenIsExpr(const Expr* e, GenType gt); - - std::string GenArithCoerceExpr(const Expr* e, GenType gt); - std::string GenRecordCoerceExpr(const Expr* e); - std::string GenTableCoerceExpr(const Expr* e); - std::string GenVectorCoerceExpr(const Expr* e); - - std::string GenRecordConstructorExpr(const Expr* e); - std::string GenSetConstructorExpr(const Expr* e); - std::string GenTableConstructorExpr(const Expr* e); - std::string GenVectorConstructorExpr(const Expr* e); - - // Generate code for constants that can be expressed directly - // as C++ constants. - std::string GenVal(const ValPtr& v); - - // Helper functions for particular Expr subclasses / flavors. - std::string GenUnary(const Expr* e, GenType gt, const char* op, const char* vec_op = nullptr); - std::string GenBinary(const Expr* e, GenType gt, const char* op, const char* vec_op = nullptr); - std::string GenBinarySet(const Expr* e, GenType gt, const char* op); - std::string GenBinaryString(const Expr* e, GenType gt, const char* op); - std::string GenBinaryPattern(const Expr* e, GenType gt, const char* op); - std::string GenBinaryAddr(const Expr* e, GenType gt, const char* op); - std::string GenBinarySubNet(const Expr* e, GenType gt, const char* op); - std::string GenEQ(const Expr* e, GenType gt, const char* op, const char* vec_op); - - std::string GenAssign(const ExprPtr& lhs, const ExprPtr& rhs, const std::string& rhs_native, - const std::string& rhs_val_ptr, GenType gt, bool top_level); - std::string GenDirectAssign(const ExprPtr& lhs, const std::string& rhs_native, const std::string& rhs_val_ptr, - GenType gt, bool top_level); - std::string GenIndexAssign(const ExprPtr& lhs, const ExprPtr& rhs, const std::string& rhs_val_ptr, GenType gt, - bool top_level); - std::string GenFieldAssign(const ExprPtr& lhs, const ExprPtr& rhs, const std::string& rhs_native, - const std::string& rhs_val_ptr, GenType gt, bool top_level); - std::string GenListAssign(const ExprPtr& lhs, const ExprPtr& rhs); - - // Support for element-by-element vector operations. - std::string GenVectorOp(const Expr* e, std::string op, const char* vec_op); - std::string GenVectorOp(const Expr* e, std::string op1, std::string op2, const char* vec_op); - - // If "all_deep" is true, it means make all of the captures - // deep copies, not just the ones that were explicitly marked - // as deep copies. That functionality is used to support - // Clone() methods; it's not needed when creating a new lambda - // instance. - std::string GenLambdaClone(const LambdaExpr* l, bool all_deep); - - // Returns an initializer list for a vector of integers. - std::string GenIntVector(const std::vector& vec); - - // The following are used to generate accesses to elements of - // extensible types. They first check whether the type has - // been extended (for records, beyond the field of interest); - // if not, then the access is done directly. If the access - // is however to an extended element, then they indirect the - // access through a map that is generated dynamically when - // the compiled code. Doing so allows the compiled code to - // work in contexts where other extensions occur that would - // otherwise conflict with hardwired offsets/values. - std::string GenField(const ExprPtr& rec, int field); - std::string GenEnum(const TypePtr& et, const ValPtr& ev); - - // For record that are extended via redef's, maps fields - // beyond the original definition to locations in the - // global (in the compiled code) "field_mapping" array. - // - // So for each such record, there's a second map of - // field-in-the-record to offset-in-field_mapping. - std::unordered_map> record_field_mappings; - - // Total number of such mappings (i.e., entries in the inner maps, - // not the outer map). - int num_rf_mappings = 0; - - // For each entry in "field_mapping", the record (as a global - // offset) and TypeDecl associated with the mapping. - std::vector> field_decls; - - // For enums that are extended via redef's, maps each distinct - // value (that the compiled scripts refer to) to locations in the - // global (in the compiled code) "enum_mapping" array. - // - // So for each such enum, there's a second map of - // value-during-compilation to offset-in-enum_mapping. - std::unordered_map> enum_val_mappings; - - // Total number of such mappings (i.e., entries in the inner maps, - // not the outer map). - int num_ev_mappings = 0; - - // For each entry in "enum_mapping", the EnumType (as a global - // offset) and name associated with the mapping. - std::vector> enum_names; - - // - // End of methods related to generating code for AST Expr's. - - // Start of methods related to managing script types. - // See Types.cc for definitions. - // - - // "Native" types are those Zeek scripting types that we support - // using low-level C++ types (like "zeek_uint_t" for "count"). - // Types that we instead support using some form of ValPtr - // representation are "non-native". - bool IsNativeType(const TypePtr& t) const; - - // Given an expression corresponding to a native type (and with - // the given script type 't'), converts it to the given GenType. - std::string NativeToGT(const std::string& expr, const TypePtr& t, GenType gt); - - // Given an expression with a C++ type of generic "ValPtr", of the - // given script type 't', converts it as needed to the given GenType. - std::string GenericValPtrToGT(const std::string& expr, const TypePtr& t, GenType gt); - - // Returns the name of a C++ variable that will hold a TypePtr - // of the appropriate flavor. 't' does not need to be a type - // representative. - std::string GenTypeName(const Type* t); - std::string GenTypeName(const TypePtr& t) { return GenTypeName(t.get()); } - - // Returns the "representative" for a given type, used to ensure - // that we re-use the C++ variable corresponding to a type and - // don't instantiate redundant instances. - const Type* TypeRep(const Type* t) { return pfs->TypeRep(t); } - const Type* TypeRep(const TypePtr& t) { return TypeRep(t.get()); } - - // Low-level C++ representations for types, of various flavors. - static const char* TypeTagName(TypeTag tag); - const char* TypeName(const TypePtr& t); - const char* FullTypeName(const TypePtr& t); - const char* TypeType(const TypePtr& t); - - // Access to a type's underlying values. - const char* NativeAccessor(const TypePtr& t); - - // The name for a type that should be used in declaring - // an IntrusivePtr to such a type. - const char* IntrusiveVal(const TypePtr& t); - - // Maps types to indices in the global "CPP__Type__" array. - CPPTracker types = {"types", true}; - - // Used to prevent analysis of mutually-referring types from - // leading to infinite recursion. Maps types to their global - // initialization information (or, initially, to nullptr, if - // they're in the process of being registered). - std::unordered_map> processed_types; - - // - // End of methods related to managing script types. - - // Start of methods related to managing script type attributes. - // Attributes arise mainly in the context of constructing types. - // See Attrs.cc for definitions. - // - - // Populates the 2nd and 3rd arguments with C++ representations - // of the tags and (optional) values/expressions associated with - // the set of attributes. - void BuildAttrs(const AttributesPtr& attrs, std::string& attr_tags, std::string& attr_vals); - - // Returns a string representation of the name associated with - // different attribute tags (e.g., "ATTR_DEFAULT"). - static const char* AttrName(AttrTag t); - - // Similar for attributes, so we can reconstruct record types. - CPPTracker attributes = {"attrs", false}; - - // Maps Attributes and Attr's to their global initialization - // information. - std::unordered_map> processed_attrs; - std::unordered_map> processed_attr; - - // - // End of methods related to managing script type attributes. - - // Start of methods related to run-time initialization. - // See Inits.cc for definitions. - // - - // Generates code for dynamically generating an expression - // associated with an attribute, via a function call. - void GenInitExpr(std::shared_ptr ce_init); - - // Returns the name of a function used to evaluate an - // initialization expression. - std::string InitExprName(const ExprPtr& e); - - // Convenience functions for return the offset or initialization cohort - // associated with an initialization. - int GI_Offset(const std::shared_ptr& gi) const { return gi ? gi->Offset() : -1; } - int GI_Cohort(const std::shared_ptr& gi) const { return gi ? gi->InitCohort() : 0; } - int GI_FinalCohort(const std::shared_ptr& gi) const { return gi ? gi->FinalInitCohort() : 0; } - - // Generate code to initialize the mappings for record field - // offsets for field accesses into regions of records that - // can be extensible (and thus can vary at run-time to the - // offsets encountered during compilation). - void InitializeFieldMappings(); - - // Same, but for enum types. - void InitializeEnumMappings(); - - // Generate code to initialize BiFs. - void InitializeBiFs(); - - // Generate code to initialize strings that we track. - void InitializeStrings(); - - // Generate code to initialize hashes that we track. - void InitializeHashes(); - - // Generate code to initialize indirect references to constants. - void InitializeConsts(); - - // Generate code to initialize globals (using dynamic statements - // rather than constants). - void InitializeGlobals(); - - // Generate the initialization hook for this set of compiled code. - void GenInitHook(); - - // Generates code to activate standalone code. - void GenStandaloneActivation(); - - // Generates code to register the initialization for standalone - // use, and prints to stdout a Zeek script that can load all of - // what we compiled. - void GenLoad(); - - // A list of BiFs to look up during initialization. First - // string is the name of the C++ global holding the BiF, the - // second is its name as known to Zeek. - std::unordered_map BiFs; - - // Expressions for which we need to generate initialization-time - // code. Currently, these are only expressions appearing in - // attributes. - CPPTracker init_exprs = {"gen_init_expr", false}; - - // - // End of methods related to run-time initialization. - - // Start of methods related to low-level code generation. - // See Emit.cc for definitions. - // - - // The following all need to be able to emit code. - friend class CPP_BasicConstInitsInfo; - friend class CPP_CompoundInitsInfo; - friend class IndicesManager; - - // Used to create (indented) C++ {...} code blocks. "needs_semi" - // controls whether to terminate the block with a ';' (such as - // for class definitions. - void StartBlock(); - void EndBlock(bool needs_semi = false); - - void IndentUp() { ++block_level; } - void IndentDown() { --block_level; } - - // Various ways of generating code. The multi-argument methods - // assume that the first argument is a printf-style format - // (but one that can only have %s specifiers). - void Emit(const std::string& str) const { - Indent(); - fprintf(write_file, "%s", str.c_str()); - NL(); - } - - void Emit(const std::string& fmt, const std::string& arg, bool do_NL = true) const { - Indent(); - fprintf(write_file, fmt.c_str(), arg.c_str()); - if ( do_NL ) - NL(); - } - - void Emit(const std::string& fmt, const std::string& arg1, const std::string& arg2) const { - Indent(); - fprintf(write_file, fmt.c_str(), arg1.c_str(), arg2.c_str()); - NL(); - } - - void Emit(const std::string& fmt, const std::string& arg1, const std::string& arg2, const std::string& arg3) const { - Indent(); - fprintf(write_file, fmt.c_str(), arg1.c_str(), arg2.c_str(), arg3.c_str()); - NL(); - } - - void Emit(const std::string& fmt, const std::string& arg1, const std::string& arg2, const std::string& arg3, - const std::string& arg4) const { - Indent(); - fprintf(write_file, fmt.c_str(), arg1.c_str(), arg2.c_str(), arg3.c_str(), arg4.c_str()); - NL(); - } - - void Emit(const std::string& fmt, const std::string& arg1, const std::string& arg2, const std::string& arg3, - const std::string& arg4, const std::string& arg5) const { - Indent(); - fprintf(write_file, fmt.c_str(), arg1.c_str(), arg2.c_str(), arg3.c_str(), arg4.c_str(), arg5.c_str()); - NL(); - } - - void Emit(const std::string& fmt, const std::string& arg1, const std::string& arg2, const std::string& arg3, - const std::string& arg4, const std::string& arg5, const std::string& arg6) const { - Indent(); - fprintf(write_file, fmt.c_str(), arg1.c_str(), arg2.c_str(), arg3.c_str(), arg4.c_str(), arg5.c_str(), - arg6.c_str()); - NL(); - } - - void NL() const { fputc('\n', write_file); } - - // Indents to the current indentation level. - void Indent() const; - - // File to which we're generating code. - FILE* write_file; - - // Indentation level. - int block_level = 0; - - // - // End of methods related to run-time initialization. }; } // namespace zeek::detail diff --git a/src/script_opt/CPP/Consts.h b/src/script_opt/CPP/Consts.h new file mode 100644 index 0000000000..4bac98d31e --- /dev/null +++ b/src/script_opt/CPP/Consts.h @@ -0,0 +1,51 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +// Methods related to generating code for representing script constants +// as run-time values. There's only one nontrivial one of these, +// RegisterConstant() (declared above, as it's public). All the other +// work is done by secondary objects - see InitsInfo.{h,cc} for those. +// +// This file is included by Compile.h to insert into the CPPCompiler class. + +public: +// Tracks a Zeek ValPtr used as a constant value. These occur in two +// contexts: directly as constant expressions, and indirectly as elements +// within aggregate constants (such as in vector initializers). +// +// Returns the associated initialization info. In addition, consts_offset +// returns an offset into an initialization-time global that tracks all +// constructed globals, providing general access to them for aggregate +// constants. +std::shared_ptr RegisterConstant(const ValPtr& vp, int& consts_offset); + +private: +// Maps (non-native) constants to associated C++ globals. +std::unordered_map const_exprs; + +// Maps the values of (non-native) constants to associated initializer +// information. +std::unordered_map> const_vals; + +// Same, but for the offset into the vector that tracks all constants +// collectively (to support initialization of compound constants). +std::unordered_map const_offsets; + +// The same as the above pair, but indexed by the string representation +// rather than the Val*. The reason for having both is to enable +// reusing common constants even though their Val*'s differ. +std::unordered_map> constants; +std::unordered_map constants_offsets; + +// Used for memory management associated with const_vals's index. +std::vector cv_indices; + +// For different types of constants (as indicated by TypeTag), +// provides the associated object that manages the initializers +// for those constants. +std::unordered_map> const_info; + +// Tracks entries for constructing the vector of all constants +// (regardless of type). Each entry provides a TypeTag, used +// to identify the type-specific vector for a given constant, +// and the offset into that vector. +std::vector> consts; diff --git a/src/script_opt/CPP/DeclFunc.h b/src/script_opt/CPP/DeclFunc.h new file mode 100644 index 0000000000..f2b2a8cf85 --- /dev/null +++ b/src/script_opt/CPP/DeclFunc.h @@ -0,0 +1,101 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +// Methods for generating declarations of functions and lambdas. +// The counterpart to GenFunc.cc. +// +// This file is included by Compile.h to insert into the CPPCompiler class. + +// Generates declarations (class, forward reference to C++ function) for the +// given script function. +void DeclareFunc(const FuncInfo& func); + +// Similar, but for lambdas. +void DeclareLambda(const LambdaExpr* l, const ProfileFunc* pf); + +// Generates code to declare the compiled version of a script function. +// "ft" gives the functions type, "pf" its profile, "fname" its C++ name, +// "body" its AST, "l" if non-nil its corresponding lambda expression, and +// "flavor" whether it's a hook/event/function. +// +// We use two basic approaches. Most functions are represented by a +// "CPPDynStmt" object that's parameterized by a void* pointer to the +// underlying C++ function and an index used to dynamically cast the pointer +// to having the correct type for then calling it. Lambdas, however +// (including "implicit" lambdas used to associate complex expressions with +// &attributes), each have a unique subclass derived from CPPStmt that calls +// the underlying C++ function without requiring a cast, and that holds the +// values of the lambda's captures. +// +// It would be cleanest to use the latter approach for all functions, but +// the hundreds/thousands of additional classes required for doing so +// significantly slows down C++ compilation, so we instead opt for the uglier +// dynamic casting approach, which only requires one additional class. + +void CreateFunction(const FuncTypePtr& ft, const ProfileFunc* pf, const std::string& fname, const StmtPtr& body, + int priority, const LambdaExpr* l, FunctionFlavor flavor); + +// Used for the case of creating a custom subclass of CPPStmt. +void DeclareSubclass(const FuncTypePtr& ft, const ProfileFunc* pf, const std::string& fname, const std::string& args, + const IDPList* lambda_ids); + +// Used for the case of employing an instance of a CPPDynStmt object. +void DeclareDynCPPStmt(); + +// Generates the declarations (and in-line definitions) associated with +// compiling a lambda. +void BuildLambda(const FuncTypePtr& ft, const ProfileFunc* pf, const std::string& fname, const StmtPtr& body, + const LambdaExpr* l, const IDPList* lambda_ids); + +// For a call to the C++ version of a function of type "ft" and with lambda +// captures lambda_ids (nil if not applicable), generates code that binds the +// Interpreter arguments (i.e., Frame offsets) to C++ function arguments, as +// well as passing in the captures. +std::string BindArgs(const FuncTypePtr& ft, const IDPList* lambda_ids); + +// Generates the declaration for the parameters for a function with the given +// type, lambda captures (if non-nil), and profile. +std::string ParamDecl(const FuncTypePtr& ft, const IDPList* lambda_ids, const ProfileFunc* pf); + +// Returns in p_types the types associated with the parameters for a function +// of the given type, set of lambda captures (if any), and profile. +void GatherParamTypes(std::vector& p_types, const FuncTypePtr& ft, const IDPList* lambda_ids, + const ProfileFunc* pf); + +// Same, but instead returns the parameter's names. +void GatherParamNames(std::vector& p_names, const FuncTypePtr& ft, const IDPList* lambda_ids, + const ProfileFunc* pf); + +// Inspects the given profile to find the i'th parameter (starting at 0). +// Returns nil if the profile indicates that the parameter is not used by the +// function. +const ID* FindParam(int i, const ProfileFunc* pf); + +// Information associated with a CPPDynStmt dynamic dispatch. +struct DispatchInfo { + std::string cast; // C++ cast to use for function pointer + std::string args; // arguments to pass to the function + bool is_hook; // whether the function is a hook + TypePtr yield; // what type the function returns, if any +}; + +// An array of cast/invocation pairs used to generate the CPPDynStmt Exec +// method. +std::vector func_casting_glue; + +// Maps casting strings to indices into func_casting_glue. The index is +// what's used to dynamically switch to the right dispatch. +std::unordered_map casting_index; + +// Maps functions (using their C++ name) to their casting strings. +std::unordered_map func_index; + +// Names for lambda capture ID's. These require a separate space that +// incorporates the lambda's name, to deal with nested lambda's that refer +// to the identifiers with the same name. +std::unordered_map lambda_names; + +// The function's parameters. Tracked so we don't re-declare them. +IDSet params; + +// Whether we're compiling a hook. +bool in_hook = false; diff --git a/src/script_opt/CPP/Driver.h b/src/script_opt/CPP/Driver.h new file mode 100644 index 0000000000..e2a097ed2e --- /dev/null +++ b/src/script_opt/CPP/Driver.h @@ -0,0 +1,98 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +// Methods for driving the overall "-O gen-C++" compilation process. +// +// This file is included by Compile.h to insert into the CPPCompiler class. + +// Main driver, invoked by constructor. +void Compile(bool report_uncompilable); + +// Generate the beginning of the compiled code: run-time functions, +// namespace, auxiliary globals. +void GenProlog(); + +// The following methods all create objects that track the initializations +// of a given type of value. In each, "tag" is the name used to identify the +// initializer global associated with the given type of value, and "type" is +// its C++ representation. Often "tag" is concatenated with "type" to designate +// a specific C++ type. For example, "tag" might be "Double" and "type" might +// be "ValPtr"; the resulting global's type is "DoubleValPtr". + +// Creates an object for tracking values associated with Zeek constants. +// "c_type" is the C++ type used in the initializer for each object; or, if +// empty, it specifies that we represent the value using an index into a +// separate vector that holds the constant. +std::shared_ptr CreateConstInitInfo(const char* tag, const char* type, const char* c_type); + +// Creates an object for tracking compound initializers, which are whose +// initialization uses indexes into other vectors. +std::shared_ptr CreateCompoundInitInfo(const char* tag, const char* type); + +// Creates an object for tracking initializers that have custom C++ objects +// to hold their initialization information. +std::shared_ptr CreateCustomInitInfo(const char* tag, const char* type); + +// Generates the declaration associated with a set of initializations and +// tracks the object to facilitate looping over all so initializations. +// As a convenience, returns the object. +std::shared_ptr RegisterInitInfo(const char* tag, const char* type, std::shared_ptr gi); + +// Given the name of a function body that's been compiled, generate code to +// register it at run-time, and track its associated hash so subsequent +// compilations can reuse it. +void RegisterCompiledBody(const std::string& f); + +// After compilation, generate the final code. Most of this is in support +// of run-time initialization of various dynamic values. +void GenEpilog(); + +// Generate the main method of the CPPDynStmt class, doing dynamic dispatch +// for function invocation. +void GenCPPDynStmt(); + +// Generate a function to load BiFs. +void GenLoadBiFs(); + +// Generate the main initialization function, which finalizes the run-time +// environment. +void GenFinishInit(); + +// Generate the function that registers compiled script bodies. +void GenRegisterBodies(); + +// True if the given function (plus body and profile) is one that should be +// compiled. If non-nil, sets reason to the the reason why, if there's a +// fundamental problem. If however the function should be skipped for other +// reasons, then sets it to nil. +bool IsCompilable(const FuncInfo& func, const char** reason = nullptr); + +// The set of functions/bodies we're compiling. +std::vector& funcs; + +// The global profile of all of the functions. +std::shared_ptr pfs; + +// Script functions that we are able to compile. We compute these ahead +// of time so that when compiling script function A which makes a call to +// script function B, we know whether B will indeed be compiled, or if it'll +// be interpreted due to it including some functionality we don't currently +// support for compilation. +// +// Indexed by the C++ name of the function. +std::unordered_set compilable_funcs; + +// Tracks which functions/hooks/events have at least one non-compilable body. +// Indexed by the Zeek name of function. +std::unordered_set not_fully_compilable; + +// Maps functions (not hooks or events) to upstream compiled names. +std::unordered_map hashed_funcs; + +// If true, the generated code should run "standalone". +bool standalone = false; + +// Hash over the functions in this compilation. This is only needed for +// "seatbelts", to ensure that we can produce a unique hash relating to this +// compilation (*and* its compilation time, which is why these are "seatbelts" +// and likely not important to make distinct). +p_hash_type total_hash = 0; diff --git a/src/script_opt/CPP/Emit.h b/src/script_opt/CPP/Emit.h new file mode 100644 index 0000000000..6871c84df8 --- /dev/null +++ b/src/script_opt/CPP/Emit.h @@ -0,0 +1,80 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +// Low-level methods for emitting code. +// +// This file is included by Compile.h to insert into the CPPCompiler class. + +// The following all need to be able to emit code. +friend class CPP_BasicConstInitsInfo; +friend class CPP_CompoundInitsInfo; +friend class IndicesManager; + +// Used to create (indented) C++ {...} code blocks. "needs_semi" +// controls whether to terminate the block with a ';' (such as +// for class definitions. +void StartBlock(); +void EndBlock(bool needs_semi = false); + +void IndentUp() { ++block_level; } +void IndentDown() { --block_level; } + +// Various ways of generating code. The multi-argument methods +// assume that the first argument is a printf-style format +// (but one that can only have %s specifiers). +void Emit(const std::string& str) const { + Indent(); + fprintf(write_file, "%s", str.c_str()); + NL(); +} + +void Emit(const std::string& fmt, const std::string& arg, bool do_NL = true) const { + Indent(); + fprintf(write_file, fmt.c_str(), arg.c_str()); + if ( do_NL ) + NL(); +} + +void Emit(const std::string& fmt, const std::string& arg1, const std::string& arg2) const { + Indent(); + fprintf(write_file, fmt.c_str(), arg1.c_str(), arg2.c_str()); + NL(); +} + +void Emit(const std::string& fmt, const std::string& arg1, const std::string& arg2, const std::string& arg3) const { + Indent(); + fprintf(write_file, fmt.c_str(), arg1.c_str(), arg2.c_str(), arg3.c_str()); + NL(); +} + +void Emit(const std::string& fmt, const std::string& arg1, const std::string& arg2, const std::string& arg3, + const std::string& arg4) const { + Indent(); + fprintf(write_file, fmt.c_str(), arg1.c_str(), arg2.c_str(), arg3.c_str(), arg4.c_str()); + NL(); +} + +void Emit(const std::string& fmt, const std::string& arg1, const std::string& arg2, const std::string& arg3, + const std::string& arg4, const std::string& arg5) const { + Indent(); + fprintf(write_file, fmt.c_str(), arg1.c_str(), arg2.c_str(), arg3.c_str(), arg4.c_str(), arg5.c_str()); + NL(); +} + +void Emit(const std::string& fmt, const std::string& arg1, const std::string& arg2, const std::string& arg3, + const std::string& arg4, const std::string& arg5, const std::string& arg6) const { + Indent(); + fprintf(write_file, fmt.c_str(), arg1.c_str(), arg2.c_str(), arg3.c_str(), arg4.c_str(), arg5.c_str(), + arg6.c_str()); + NL(); +} + +void NL() const { fputc('\n', write_file); } + +// Indents to the current indentation level. +void Indent() const; + +// File to which we're generating code. +FILE* write_file; + +// Indentation level. +int block_level = 0; diff --git a/src/script_opt/CPP/Exprs.h b/src/script_opt/CPP/Exprs.h new file mode 100644 index 0000000000..37ce2946ec --- /dev/null +++ b/src/script_opt/CPP/Exprs.h @@ -0,0 +1,147 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +// Methods for generating code corresponding with Zeek expression AST nodes +// (Expr objects). +// +// This file is included by Compile.h to insert into the CPPCompiler class. + +// These methods are all oriented around returning strings of C++ code; +// they do not directly emit the code, since often the caller will be embedding +// the result in some surrounding context. No effort is made to reduce string +// copying; this isn't worth the hassle, as it takes just a few seconds for +// the compiler to generate 100K+ LOC that clang will then need 10s of seconds +// to compile, so speeding up the compiler has little practical advantage. + +// The following enum's represent whether, for expressions yielding native +// values, the end goal is to have the value in (1) native form, (2) instead +// in ValPtr form, or (3) whichever is more convenient to generate (sometimes +// used when the caller knows that the value is non-native). +enum GenType { + GEN_NATIVE, + GEN_VAL_PTR, + GEN_DONT_CARE, +}; + +// Generate an expression for which we want the result embedded in {} +// initializers (generally to be used in calling a function where we want +// those values to be translated to a vector). +std::string GenExprs(const Expr* e); + +// Generate the value(s) associated with a ListExpr. If true, the "nested" +// parameter indicates that this list is embedded within an outer list, in +// which case it's expanded to include {}'s. It's false if the ListExpr is +// at the top level, such as when expanding the arguments in a CallExpr. +std::string GenListExpr(const Expr* e, GenType gt, bool nested); + +// Per-Expr-subclass code generation. The resulting code generally reflects +// the corresponding Eval() or Fold() methods. +std::string GenExpr(const ExprPtr& e, GenType gt, bool top_level = false) { return GenExpr(e.get(), gt, top_level); } +std::string GenExpr(const Expr* e, GenType gt, bool top_level = false); + +std::string GenNameExpr(const NameExpr* ne, GenType gt); +std::string GenConstExpr(const ConstExpr* c, GenType gt); +std::string GenAggrAdd(const Expr* e); +std::string GenAggrDel(const Expr* e); +std::string GenIncrExpr(const Expr* e, GenType gt, bool is_incr, bool top_level); +std::string GenCondExpr(const Expr* e, GenType gt); +std::string GenCallExpr(const CallExpr* c, GenType gt, bool top_level); +std::string GenInExpr(const Expr* e, GenType gt); +std::string GenFieldExpr(const FieldExpr* fe, GenType gt); +std::string GenHasFieldExpr(const HasFieldExpr* hfe, GenType gt); +std::string GenIndexExpr(const Expr* e, GenType gt); +std::string GenAssignExpr(const Expr* e, GenType gt, bool top_level); +std::string GenAddToExpr(const Expr* e, GenType gt, bool top_level); +std::string GenRemoveFromExpr(const Expr* e, GenType gt, bool top_level); +std::string GenSizeExpr(const Expr* e, GenType gt); +std::string GenScheduleExpr(const Expr* e); +std::string GenLambdaExpr(const Expr* e); +std::string GenLambdaExpr(const Expr* e, std::string capture_args); +std::string GenIsExpr(const Expr* e, GenType gt); + +std::string GenArithCoerceExpr(const Expr* e, GenType gt); +std::string GenRecordCoerceExpr(const Expr* e); +std::string GenTableCoerceExpr(const Expr* e); +std::string GenVectorCoerceExpr(const Expr* e); + +std::string GenRecordConstructorExpr(const Expr* e); +std::string GenSetConstructorExpr(const Expr* e); +std::string GenTableConstructorExpr(const Expr* e); +std::string GenVectorConstructorExpr(const Expr* e); + +// Generate code for constants that can be expressed directly as C++ constants. +std::string GenVal(const ValPtr& v); + +// Helper functions for particular Expr subclasses / flavors. +std::string GenUnary(const Expr* e, GenType gt, const char* op, const char* vec_op = nullptr); +std::string GenBinary(const Expr* e, GenType gt, const char* op, const char* vec_op = nullptr); +std::string GenBinarySet(const Expr* e, GenType gt, const char* op); +std::string GenBinaryString(const Expr* e, GenType gt, const char* op); +std::string GenBinaryPattern(const Expr* e, GenType gt, const char* op); +std::string GenBinaryAddr(const Expr* e, GenType gt, const char* op); +std::string GenBinarySubNet(const Expr* e, GenType gt, const char* op); +std::string GenEQ(const Expr* e, GenType gt, const char* op, const char* vec_op); + +std::string GenAssign(const ExprPtr& lhs, const ExprPtr& rhs, const std::string& rhs_native, + const std::string& rhs_val_ptr, GenType gt, bool top_level); +std::string GenDirectAssign(const ExprPtr& lhs, const std::string& rhs_native, const std::string& rhs_val_ptr, + GenType gt, bool top_level); +std::string GenIndexAssign(const ExprPtr& lhs, const ExprPtr& rhs, const std::string& rhs_val_ptr, GenType gt, + bool top_level); +std::string GenFieldAssign(const ExprPtr& lhs, const ExprPtr& rhs, const std::string& rhs_native, + const std::string& rhs_val_ptr, GenType gt, bool top_level); +std::string GenListAssign(const ExprPtr& lhs, const ExprPtr& rhs); + +// Support for element-by-element vector operations. +std::string GenVectorOp(const Expr* e, std::string op, const char* vec_op); +std::string GenVectorOp(const Expr* e, std::string op1, std::string op2, const char* vec_op); + +// If "all_deep" is true, it means make all of the captures deep copies, +// not just the ones that were explicitly marked as deep copies. That +// functionality is used to support Clone() methods; it's not needed when +// creating a new lambda instance. +std::string GenLambdaClone(const LambdaExpr* l, bool all_deep); + +// Returns an initializer list for a vector of integers. +std::string GenIntVector(const std::vector& vec); + +// The following are used to generate accesses to elements of extensible +// types. They first check whether the type has been extended (for records, +// beyond the field of interest); if not, then the access is done directly. +// If the access is however to an extended element, then they indirect the +// access through a map that is generated dynamically when the compiled code. +// Doing so allows the compiled code to work in contexts where other extensions +// occur that would otherwise conflict with hardwired offsets/values. +std::string GenField(const ExprPtr& rec, int field); +std::string GenEnum(const TypePtr& et, const ValPtr& ev); + +// For record that are extended via redef's, maps fields beyond the original +// definition to locations in the global (in the compiled code) "field_mapping" +// array. +// +// So for each such record, there's a second map of field-in-the-record to +// offset-in-field_mapping. +std::unordered_map> record_field_mappings; + +// Total number of such mappings (i.e., entries in the inner maps, not the +// outer map). +int num_rf_mappings = 0; + +// For each entry in "field_mapping", the record (as a global offset) and +// TypeDecl associated with the mapping. +std::vector> field_decls; + +// For enums that are extended via redef's, maps each distinct value (that +// the compiled scripts refer to) to locations in the global (in the compiled +// code) "enum_mapping" array. +// +// So for each such enum, there's a second map of value-during-compilation to +// offset-in-enum_mapping. +std::unordered_map> enum_val_mappings; + +// Total number of such mappings (i.e., entries in the inner maps, not the +// outer map). +int num_ev_mappings = 0; + +// For each entry in "enum_mapping", the EnumType (as a global offset) and +// name associated with the mapping. +std::vector> enum_names; diff --git a/src/script_opt/CPP/GenFunc.h b/src/script_opt/CPP/GenFunc.h new file mode 100644 index 0000000000..13427b9689 --- /dev/null +++ b/src/script_opt/CPP/GenFunc.h @@ -0,0 +1,74 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +// Methods for generating function/lambda definitions. The counterpart +// to DeclFunc.cc. +// +// This file is included by Compile.h to insert into the CPPCompiler class. + +// Driver functions for compiling the body of the given function or lambda. +void CompileFunc(const FuncInfo& func); +void CompileLambda(const LambdaExpr* l, const ProfileFunc* pf); + +// Generates the body of the Invoke() method (which supplies the "glue" +// for calling the C++-generated code, for CPPStmt subclasses). +void GenInvokeBody(const std::string& fname, const TypePtr& t, const std::string& args) { + GenInvokeBody(fname + "(" + args + ")", t); +} +void GenInvokeBody(const std::string& call, const TypePtr& t); + +// Generates the code for the body of a script function with the given +// type, profile, C++ name, AST, lambda captures (if non-nil), and +// hook/event/function "flavor". +void DefineBody(const FuncTypePtr& ft, const ProfileFunc* pf, const std::string& fname, const StmtPtr& body, + const IDPList* lambda_ids, FunctionFlavor flavor); + +// Declare parameters that originate from a type signature of "any" but were +// concretized in this declaration. +void TranslateAnyParams(const FuncTypePtr& ft, const ProfileFunc* pf); + +// Generates code to dynamically initialize any events referred to in the +// function. +void InitializeEvents(const ProfileFunc* pf); + +// Declare local variables (which are non-globals that aren't parameters or +// lambda captures). +void DeclareLocals(const ProfileFunc* func, const IDPList* lambda_ids); + +// Returns the C++ name to use for a given function body. +std::string BodyName(const FuncInfo& func); + +// Generate the arguments to be used when calling a C++-generated function. +std::string GenArgs(const RecordTypePtr& params, const Expr* e); + +// Functions that we've declared/compiled. Indexed by full C++ name. +std::unordered_set compiled_funcs; + +// "Simple" functions that we've compiled, i.e., those that have a single +// body and thus can be called directly. Indexed by function name, and +// maps to the C++ name. +std::unordered_map compiled_simple_funcs; + +// Maps function bodies to the names we use for them. +std::unordered_map body_names; + +// Maps function names to hashes of bodies. +std::unordered_map body_hashes; + +// Maps function names to priorities, for hooks & event handlers. +std::unordered_map body_priorities; + +// Maps function names to script locations, for better-than-nothing error +// reporting. +std::unordered_map body_locs; + +// Maps function names to events relevant to them. +std::unordered_map> body_events; + +// Full type of the function we're currently compiling. +FuncTypePtr func_type; + +// Return type of the function we're currently compiling. +TypePtr ret_type; + +// Internal name of the function we're currently compiling. +std::string body_name; diff --git a/src/script_opt/CPP/Inits.h b/src/script_opt/CPP/Inits.h new file mode 100644 index 0000000000..cb5646924b --- /dev/null +++ b/src/script_opt/CPP/Inits.h @@ -0,0 +1,127 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +// Methods for generating run-time initialization of objects relating to +// Zeek values and types. +// +// This file is included by Compile.h to insert into the CPPCompiler class. + +public: +// True if the given expression is simple enough that we can generate code +// to evaluate it directly, and don't need to create a separate function per +// RegisterInitExpr() to track it. +static bool IsSimpleInitExpr(const ExprPtr& e); + +// Easy access to the global offset and the initialization +// cohort associated with a given type. +int TypeOffset(const TypePtr& t) { return GI_Offset(RegisterType(t)); } +int TypeCohort(const TypePtr& t) { return GI_Cohort(RegisterType(t)); } +int TypeFinalCohort(const TypePtr& t) { return GI_FinalCohort(RegisterType(t)); } + +// Tracks expressions used in attributes (such as &default=). +// +// We need to generate code to evaluate these, via CallExpr's that invoke +// functions that return the value of the expression. However, we can't +// generate that code when first encountering the attribute, because doing +// so will need to refer to the names of types, and initially those are +// unavailable (because the type's representatives, per pfs->RepTypes(), might +// not have yet been tracked). So instead we track the associated +// CallExprInitInfo objects, and after all types have been tracked, then spin +// through them to generate the code. +// +// Returns the associated initialization information. +std::shared_ptr RegisterInitExpr(const ExprPtr& e); + +// Tracks a C++ string value needed for initialization. Returns +// an offset into the global vector that will hold these. +int TrackString(std::string s) { + auto ts = tracked_strings.find(s); + if ( ts != tracked_strings.end() ) + return ts->second; + + int offset = ordered_tracked_strings.size(); + tracked_strings[s] = offset; + ordered_tracked_strings.emplace_back(s); + + return offset; +} + +// Tracks a profile hash value needed for initialization. Returns +// an offset into the global vector that will hold these. +int TrackHash(p_hash_type h) { + auto th = tracked_hashes.find(h); + if ( th != tracked_hashes.end() ) + return th->second; + + int offset = ordered_tracked_hashes.size(); + tracked_hashes[h] = offset; + ordered_tracked_hashes.emplace_back(h); + + return offset; +} + +private: +// Generates code for dynamically generating an expression associated with an +// attribute, via a function call. +void GenInitExpr(std::shared_ptr ce_init); + +// Returns the name of a function used to evaluate an initialization expression. +std::string InitExprName(const ExprPtr& e); + +// Convenience functions for returning the offset or initialization cohort +// associated with an initialization. +int GI_Offset(const std::shared_ptr& gi) const { return gi ? gi->Offset() : -1; } +int GI_Cohort(const std::shared_ptr& gi) const { return gi ? gi->InitCohort() : 0; } +int GI_FinalCohort(const std::shared_ptr& gi) const { return gi ? gi->FinalInitCohort() : 0; } + +// Generate code to initialize the mappings for record field offsets for field +// accesses into regions of records that can be extensible (and thus can vary +// at run-time to the offsets encountered during compilation). +void InitializeFieldMappings(); + +// Same, but for enum types. +void InitializeEnumMappings(); + +// Generate code to initialize BiFs. +void InitializeBiFs(); + +// Generate code to initialize strings that we track. +void InitializeStrings(); + +// Generate code to initialize hashes that we track. +void InitializeHashes(); + +// Generate code to initialize indirect references to constants. +void InitializeConsts(); + +// Generate code to initialize globals (using dynamic statements rather than +// constants). +void InitializeGlobals(); + +// Generate the initialization hook for this set of compiled code. +void GenInitHook(); + +// Generates code to activate standalone code. +void GenStandaloneActivation(); + +// Generates code to register the initialization for standalone use, and +// prints to stdout a Zeek script that can load all of what we compiled. +void GenLoad(); + +// A list of BiFs to look up during initialization. First string is the name +// of the C++ global holding the BiF, the second is its name as known to Zeek. +std::unordered_map BiFs; + +// Expressions for which we need to generate initialization-time code. +// Currently, these are only expressions appearing in attributes. +CPPTracker init_exprs = {"gen_init_expr", false}; + +// Maps strings to associated offsets. +std::unordered_map tracked_strings; + +// Tracks strings we've registered in order (corresponding to +// their offsets). +std::vector ordered_tracked_strings; + +// The same as the previous two, but for profile hashes. +std::vector ordered_tracked_hashes; +std::unordered_map tracked_hashes; diff --git a/src/script_opt/CPP/Stmts.h b/src/script_opt/CPP/Stmts.h new file mode 100644 index 0000000000..e37e8a254b --- /dev/null +++ b/src/script_opt/CPP/Stmts.h @@ -0,0 +1,34 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +// Methods for generating code corresponding with Zeek statement AST nodes +// (Stmt objects). For the most part, code generation is straightforward as +// it matches the Exec/DoExec methods of the corresponding Stmt subclasses. +// +// This file is included by Compile.h to insert into the CPPCompiler class. + +void GenStmt(const StmtPtr& s) { GenStmt(s.get()); } +void GenStmt(const Stmt* s); +void GenInitStmt(const InitStmt* init); +void GenIfStmt(const IfStmt* i); +void GenWhileStmt(const WhileStmt* w); +void GenReturnStmt(const ReturnStmt* r); +void GenEventStmt(const EventStmt* ev); + +void GenSwitchStmt(const SwitchStmt* sw); +void GenTypeSwitchStmt(const Expr* e, const case_list* cases); +void GenTypeSwitchCase(const ID* id, int case_offset, bool is_multi); +void GenValueSwitchStmt(const Expr* e, const case_list* cases); + +void GenWhenStmt(const WhenStmt* w); +void GenWhenStmt(const WhenInfo* wi, const std::string& when_lambda, const Location* loc, + std::vector local_aggrs); +void GenForStmt(const ForStmt* f); +void GenForOverTable(const ExprPtr& tbl, const IDPtr& value_var, const IDPList* loop_vars); +void GenForOverVector(const ExprPtr& tbl, const IDPtr& value_var, const IDPList* loop_vars); +void GenForOverString(const ExprPtr& str, const IDPList* loop_vars); + +void GenAssertStmt(const AssertStmt* a); + +// Nested level of loops/switches for which "break"'s should be +// C++ breaks rather than a "hook" break. +int break_level = 0; diff --git a/src/script_opt/CPP/Types.h b/src/script_opt/CPP/Types.h new file mode 100644 index 0000000000..4b67800de9 --- /dev/null +++ b/src/script_opt/CPP/Types.h @@ -0,0 +1,59 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +// Methods for dealing with Zeek script types. +// +// This file is included by Compile.h to insert into the CPPCompiler class. + +public: +// Tracks the given type (with support methods for ones that are complicated), +// recursively including its sub-types, and creating initializations for +// constructing C++ variables representing the types. +// +// Returns the initialization info associated with the type. +std::shared_ptr RegisterType(const TypePtr& t); + +private: +// "Native" types are those Zeek scripting types that we support using +// low-level C++ types (like "zeek_uint_t" for "count"). Types that we +// instead support using some form of ValPtr representation are "non-native". +bool IsNativeType(const TypePtr& t) const; + +// Given an expression corresponding to a native type (and with the given +// script type 't'), converts it to the given GenType. +std::string NativeToGT(const std::string& expr, const TypePtr& t, GenType gt); + +// Given an expression with a C++ type of generic "ValPtr", of the given script +// type 't', converts it as needed to the given GenType. +std::string GenericValPtrToGT(const std::string& expr, const TypePtr& t, GenType gt); + +// Returns the name of a C++ variable that will hold a TypePtr of the +// appropriate flavor. 't' does not need to be a type representative. +std::string GenTypeName(const Type* t); +std::string GenTypeName(const TypePtr& t) { return GenTypeName(t.get()); } + +// Returns the "representative" for a given type, used to ensure that we +// re-use the C++ variable corresponding to a type and don't instantiate +// redundant instances. +const Type* TypeRep(const Type* t) { return pfs->TypeRep(t); } +const Type* TypeRep(const TypePtr& t) { return TypeRep(t.get()); } + +// Low-level C++ representations for types, of various flavors. +static const char* TypeTagName(TypeTag tag); +const char* TypeName(const TypePtr& t); +const char* FullTypeName(const TypePtr& t); +const char* TypeType(const TypePtr& t); + +// Access to a type's underlying values. +const char* NativeAccessor(const TypePtr& t); + +// The name for a type that should be used in declaring an IntrusivePtr to +// such a type. +const char* IntrusiveVal(const TypePtr& t); + +// Maps types to indices in the global "CPP__Type__" array. +CPPTracker types = {"types", true}; + +// Used to prevent analysis of mutually-referring types from leading to +// infinite recursion. Maps types to their global initialization information +// (or, initially, to nullptr, if they're in the process of being registered). +std::unordered_map> processed_types; diff --git a/src/script_opt/CPP/Vars.h b/src/script_opt/CPP/Vars.h new file mode 100644 index 0000000000..00c94d18dc --- /dev/null +++ b/src/script_opt/CPP/Vars.h @@ -0,0 +1,69 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +// Methods related to Zeek script variables and their C++ counterparts. +// +// This file is included by Compile.h to insert into the CPPCompiler class. + +public: +// Tracks a global to generate the necessary initialization. +// Returns the associated initialization info. +std::shared_ptr RegisterGlobal(const ID* g); + +private: +// Generate declarations associated with the given global, and, if it's used +// as a variable (not just as a function being called), track it as such. +void CreateGlobal(const ID* g); + +// Register the given identifier as a BiF. If is_var is true then the BiF +// is also used in a non-call context. +void AddBiF(const ID* b, bool is_var); + +// Register the given global name. "suffix" distinguishes particular types +// of globals, such as the names of bifs, global (non-function) variables, +// or compiled Zeek functions. +bool AddGlobal(const std::string& g, const char* suffix); + +// Tracks that the body we're currently compiling refers to the given event. +void RegisterEvent(std::string ev_name); + +// The following match various forms of identifiers to the name used for +// their C++ equivalent. +const char* IDName(const IDPtr& id) { return IDName(id.get()); } +const char* IDName(const ID* id) { return IDNameStr(id).c_str(); } +const std::string& IDNameStr(const ID* id); + +// Returns a canonicalized version of a variant of a global made distinct by +// the given suffix. +std::string GlobalName(const std::string& g, const char* suffix) { return Canonicalize(g.c_str()) + "_" + suffix; } + +// Returns a canonicalized form of a local identifier's name, expanding its +// module prefix if needed. +std::string LocalName(const ID* l) const; +std::string LocalName(const IDPtr& l) const { return LocalName(l.get()); } + +// The same, but for a capture. +std::string CaptureName(const ID* l) const; +std::string CaptureName(const IDPtr& l) const { return CaptureName(l.get()); } + +// Returns a canonicalized name, with various non-alphanumeric characters +// stripped or transformed, and guaranteed not to conflict with C++ keywords. +std::string Canonicalize(const char* name) const; + +// Returns the name of the global corresponding to an expression (which must +// be a EXPR_NAME). +std::string GlobalName(const ExprPtr& e) { return globals[e->AsNameExpr()->Id()->Name()]; } + +// Maps global names (not identifiers) to the names we use for them. +std::unordered_map globals; + +// Similar for locals, for the function currently being compiled. +std::unordered_map locals; + +// Retrieves the initialization information associated with the given global. +std::unordered_map> global_gis; + +// Maps event names to the names we use for them. +std::unordered_map events; + +// Globals that correspond to variables, not functions. +IDSet global_vars;